AI-Hypercomputer
diff --git a/‎.github/workflows/UnitTests.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/UnitTests.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎requirements.txt‎
Lines changed: 1 addition & 1 deletion b/‎requirements.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎requirements_with_jax_ai_image.txt‎
Lines changed: 1 addition & 1 deletion b/‎requirements_with_jax_ai_image.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/maxdiffusion/checkpointing/ltx2_checkpointer.py‎
Lines changed: 113 additions & 0 deletions b/‎src/maxdiffusion/checkpointing/ltx2_checkpointer.py‎
Lines changed: 113 additions & 0 deletions
diff --git a/‎src/maxdiffusion/configs/base_wan_27b.yml‎
Lines changed: 4 additions & 1 deletion b/‎src/maxdiffusion/configs/base_wan_27b.yml‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎src/maxdiffusion/configs/ltx2_video.yml‎
Lines changed: 30 additions & 23 deletions b/‎src/maxdiffusion/configs/ltx2_video.yml‎
Lines changed: 30 additions & 23 deletions
@@ -59,7 +59,7 @@ jobs:
     - name: PyTest
       run: | #--deselect=src/maxdiffusion/tests/input_pipeline_interface_test.py
         export LIBTPU_INIT_ARGS='--xla_tpu_scoped_vmem_limit_kib=65536'
-        HF_HUB_CACHE=/mnt/disks/github-runner-disk/ HF_HOME=/mnt/disks/github-runner-disk/ TOKENIZERS_PARALLELISM=false python3 -m pytest --deselect=src/maxdiffusion/tests/ltx_transformer_step_test.py -x
+        HF_HUB_CACHE=/mnt/disks/github-runner-disk/ HF_HOME=/mnt/disks/github-runner-disk/ TOKENIZERS_PARALLELISM=false python3 -m pytest --deselect=src/maxdiffusion/tests/ltx_transformer_step_test.py --ignore=src/maxdiffusion/kernels/splash_attention -x
 #  add_pull_ready:
 #    if: github.ref != 'refs/heads/main'
 #    permissions:
 
@@ -31,7 +31,7 @@ opencv-python-headless==4.10.0.84
 orbax-checkpoint
 tokenizers==0.21.0
 huggingface_hub>=0.30.2
-transformers==4.48.1
+transformers==4.51.0
 einops==0.8.0
 sentencepiece
 aqtp
 
@@ -30,7 +30,7 @@ opencv-python-headless==4.10.0.84
 orbax-checkpoint
 tokenizers==0.21.0
 huggingface_hub>=0.30.2
-transformers==4.48.1
+transformers==4.51.0
 tokamax
 einops==0.8.0
 sentencepiece
 
@@ -0,0 +1,113 @@
+"""
+Copyright 2025 Google LLC
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     https://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import json
+import jax
+import numpy as np
+from typing import Optional, Tuple
+from maxdiffusion.pipelines.ltx2.ltx2_pipeline import LTX2Pipeline
+from maxdiffusion import max_logging
+from maxdiffusion.checkpointing.checkpointing_utils import create_orbax_checkpoint_manager
+import orbax.checkpoint as ocp
+from etils import epath
+
+LTX2_CHECKPOINT = "LTX2_CHECKPOINT"
+
+
+class LTX2Checkpointer:
+
+  def __init__(self, config, checkpoint_type: str = LTX2_CHECKPOINT):
+    self.config = config
+    self.checkpoint_type = checkpoint_type
+    self.opt_state = None
+
+    self.checkpoint_manager: ocp.CheckpointManager = create_orbax_checkpoint_manager(
+        getattr(self.config, "checkpoint_dir", ""),
+        enable_checkpointing=True,
+        save_interval_steps=1,
+        checkpoint_type=checkpoint_type,
+        dataset_type=getattr(config, "dataset_type", None),
+    )
+
+  def load_ltx2_configs_from_orbax(self, step: Optional[int]) -> Tuple[Optional[dict], Optional[int]]:
+    if self.checkpoint_manager is None:
+      max_logging.log("No checkpoint manager configured, skipping Orbax load.")
+      return None, None
+
+    if step is None:
+      step = self.checkpoint_manager.latest_step()
+      max_logging.log(f"Latest LTX2 checkpoint step: {step}")
+      if step is None:
+        max_logging.log("No LTX2 checkpoint found.")
+        return None, None
+    max_logging.log(f"Loading LTX2 checkpoint from step {step}")
+    metadatas = self.checkpoint_manager.item_metadata(step)
+    transformer_metadata = metadatas.ltx2_state
+    abstract_tree_structure_params = jax.tree_util.tree_map(ocp.utils.to_shape_dtype_struct, transformer_metadata)
+    params_restore = ocp.args.PyTreeRestore(
+        restore_args=jax.tree.map(
+            lambda _: ocp.RestoreArgs(restore_type=np.ndarray),
+            abstract_tree_structure_params,
+        )
+    )
+
+    max_logging.log("Restoring LTX2 checkpoint")
+    restored_checkpoint = self.checkpoint_manager.restore(
+        directory=epath.Path(self.config.checkpoint_dir),
+        step=step,
+        args=ocp.args.Composite(
+            ltx2_state=params_restore,
+            ltx2_config=ocp.args.JsonRestore(),
+        ),
+    )
+    max_logging.log(f"restored checkpoint {restored_checkpoint.keys()}")
+    max_logging.log(f"restored checkpoint ltx2_state {restored_checkpoint.ltx2_state.keys()}")
+    max_logging.log(f"optimizer found in checkpoint {'opt_state' in restored_checkpoint.ltx2_state.keys()}")
+    return restored_checkpoint, step
+
+  def load_checkpoint(
+      self, step=None, vae_only=False, load_transformer=True
+  ) -> Tuple[LTX2Pipeline, Optional[dict], Optional[int]]:
+    restored_checkpoint, step = self.load_ltx2_configs_from_orbax(step)
+    opt_state = None
+
+    if restored_checkpoint:
+      max_logging.log("Loading LTX2 pipeline from checkpoint")
+      pipeline = LTX2Pipeline.from_checkpoint(self.config, restored_checkpoint, vae_only, load_transformer)
+      if "opt_state" in restored_checkpoint.ltx2_state.keys():
+        opt_state = restored_checkpoint.ltx2_state["opt_state"]
+    else:
+      max_logging.log("No checkpoint found, loading pipeline from pretrained hub")
+      pipeline = LTX2Pipeline.from_pretrained(self.config, vae_only, load_transformer)
+
+    return pipeline, opt_state, step
+
+  def save_checkpoint(self, train_step, pipeline: LTX2Pipeline, train_states: dict):
+    """Saves the training state and model configurations."""
+
+    def config_to_json(model_or_config):
+      return json.loads(model_or_config.to_json_string())
+
+    max_logging.log(f"Saving checkpoint for step {train_step}")
+    items = {
+        "ltx2_config": ocp.args.JsonSave(config_to_json(pipeline.transformer)),
+    }
+
+    items["ltx2_state"] = ocp.args.PyTreeSave(train_states)
+
+    # Save the checkpoint
+    self.checkpoint_manager.save(train_step, args=ocp.args.Composite(**items))
+    max_logging.log(f"Checkpoint for step {train_step} saved.")
@@ -303,8 +303,11 @@ guidance_scale_high: 4.0
 # timestep to switch between low noise and high noise transformer
 boundary_ratio: 0.875
 
-# Diffusion CFG cache (FasterCache-style, WAN 2.1 T2V only)
+# Diffusion CFG cache (FasterCache-style)
 use_cfg_cache: False
+# SenCache: Sensitivity-Aware Caching (arXiv:2602.24208) — skip forward pass
+# when predicted output change (based on accumulated latent/timestep drift) is small
+use_sen_cache: False
 
 # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
 guidance_rescale: 0.0
 
@@ -4,63 +4,65 @@ skip_jax_distributed_system: False
 attention: 'flash'
 attention_sharding_uniform: True 
 precision: 'bf16'
-data_sharding: ['data', 'fsdp', 'context', 'tensor']
-remat_policy: "NONE"
+scan_layers: True
 names_which_can_be_saved: []
 names_which_can_be_offloaded: []
+remat_policy: "NONE"
 
 jax_cache_dir: ''
 weights_dtype: 'bfloat16'
 activations_dtype: 'bfloat16'
 
-run_name: ''
+run_name: 'ltx2_inference'
 output_dir: ''
 config_path: ''
 save_config_to_gcs: False
 
-frame_rate: 30
+#Checkpoints
 max_sequence_length: 1024
 sampler: "from_checkpoint"
 
 # Generation parameters
-dataset_name: ''
-dataset_save_location: ''
 global_batch_size_to_train_on: 1
 num_inference_steps: 40
 guidance_scale: 3.0
 fps: 24
-prompt: "A cat and a dog baking a cake together in a kitchen. The cat is carefully measuring flour, while the dog is stirring the batter with a wooden spoon. The kitchen is cozy, with sunlight streaming through the window."
-negative_prompt: "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"
+pipeline_type: multi-scale
+prompt: "A man in a brightly lit room talks on a vintage telephone. In a low, heavy voice, he says, 'I understand. I won't call again. Goodbye.' He hangs up the receiver and looks down with a sad expression. He holds the black rotary phone to his right ear with his right hand, his left hand holding a rocks glass with amber liquid. He wears a brown suit jacket over a white shirt, and a gold ring on his left ring finger. His short hair is neatly combed, and he has light skin with visible wrinkles around his eyes. The camera remains stationary, focused on his face and upper body. The room is brightly lit by a warm light source off-screen to the left, casting shadows on the wall behind him. The scene appears to be from a dramatic movie."
+negative_prompt: "shaky, glitchy, low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly, transition, static."
 height: 512
 width: 768
-num_frames: 121
 decode_timestep: 0.05
 decode_noise_scale: 0.025
+num_frames: 121
 quantization: "int8"
 seed: 10
 #parallelism
 mesh_axes: ['data', 'fsdp', 'context', 'tensor']
 logical_axis_rules: [
-                      ['batch', 'data'],
-                      ['activation_heads', 'fsdp'],
-                      ['activation_batch', 'data'],
-                      ['activation_kv', 'tensor'],
+                      ['batch', ['data', 'fsdp']],
+                      ['activation_batch', ['data', 'fsdp']],
+                      ['activation_self_attn_heads', ['context', 'tensor']],
+                      ['activation_cross_attn_q_length', ['context', 'tensor']],
+                      ['activation_length', 'context'],
+                      ['activation_heads', 'tensor'],
                       ['mlp','tensor'],
-                      ['embed','fsdp'],
+                      ['embed', ['context', 'fsdp']],
                       ['heads', 'tensor'],
-                      ['norm', 'fsdp'],
-                      ['conv_batch', ['data','fsdp']],
+                      ['norm', 'tensor'],
+                      ['conv_batch', ['data', 'context', 'fsdp']],
                       ['out_channels', 'tensor'],
-                      ['conv_out', 'fsdp'],
-                      ['conv_in', 'fsdp']
+                      ['conv_out', 'context'],
                     ]
-dcn_data_parallelism: 1
+data_sharding: ['data', 'fsdp', 'context', 'tensor']
+
+dcn_data_parallelism: 1  # recommended DCN axis to be auto-sharded
 dcn_fsdp_parallelism: -1
 dcn_context_parallelism: 1
 dcn_tensor_parallelism: 1
 ici_data_parallelism: 1
-ici_fsdp_parallelism: -1
-ici_context_parallelism: 1
+ici_fsdp_parallelism: 1  
+ici_context_parallelism: -1 # recommended ICI axis to be auto-sharded
 ici_tensor_parallelism: 1
 enable_profiler: False
 
@@ -74,8 +76,11 @@ model_name: "ltx2_video"
 model_type: "T2V"
 unet_checkpoint: ''
 checkpoint_dir: ""
+dataset_name: ''
+train_split: 'train'
+dataset_type: 'tfrecord'
 cache_latents_text_encoder_outputs: True
-per_device_batch_size: 1
+per_device_batch_size: 0.125
 compile_topology_num_slices: -1 
 quantization_local_shard_count: -1
 use_qwix_quantization: False
@@ -84,4 +89,6 @@ act_quantization_calibration_method: "absmax"
 bwd_quantization_calibration_method: "absmax"
 qwix_module_path: ".*"
 jit_initializers: True 
-enable_single_replica_ckpt_restoring: False
+enable_single_replica_ckpt_restoring: False
+seed: 0
+audio_format: "s16"