huggingface
diff --git a/‎src/diffusers/hooks/mag_cache.py‎
Lines changed: 121 additions & 54 deletions b/‎src/diffusers/hooks/mag_cache.py‎
Lines changed: 121 additions & 54 deletions
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 from dataclasses import dataclass
-from typing import Optional, Tuple, Union
+from typing import List, Optional, Tuple, Union
 
 import numpy as np
 import torch
@@ -30,7 +30,8 @@
 _MAG_CACHE_LEADER_BLOCK_HOOK = "mag_cache_leader_block_hook"
 _MAG_CACHE_BLOCK_HOOK = "mag_cache_block_hook"
 
-# Default Mag Ratios for Flux models (Dev/Schnell)
+# Default Mag Ratios for Flux models (Dev/Schnell) are provided for convenience.
+# Users must explicitly pass these to the config if using Flux.
 # Reference: https://github.com/Zehong-Ma/MagCache
 FLUX_MAG_RATIOS = np.array(
     [1.0]
@@ -97,38 +98,62 @@ class MagCacheConfig:
         num_inference_steps (`int`, defaults to `28`):
             The number of inference steps used in the pipeline. This is required to interpolate `mag_ratios` correctly.
         mag_ratios (`np.ndarray`, *optional*):
-            The pre-computed magnitude ratios for the model. If not provided, defaults to the Flux ratios.
+            The pre-computed magnitude ratios for the model. These are checkpoint-dependent.
+            If not provided, you must set `calibrate=True` to calculate them for your specific model.
+            For Flux models, you can use `diffusers.hooks.mag_cache.FLUX_MAG_RATIOS`.
+        calibrate (`bool`, defaults to `False`):
+            If True, enables calibration mode. In this mode, no blocks are skipped. Instead, the hook calculates
+            the magnitude ratios for the current run and logs them at the end. Use this to obtain `mag_ratios`
+            for new models or schedulers.
     """
 
     threshold: float = 0.24
     max_skip_steps: int = 5
     retention_ratio: float = 0.1
     num_inference_steps: int = 28
     mag_ratios: Optional[np.ndarray] = None
+    calibrate: bool = False
 
     def __post_init__(self):
-        if self.mag_ratios is None:
-            self.mag_ratios = FLUX_MAG_RATIOS
-
-        if len(self.mag_ratios) != self.num_inference_steps:
-            logger.debug(
-                f"Interpolating mag_ratios from length {len(self.mag_ratios)} to {self.num_inference_steps}"
+        # Strict validation: User MUST provide ratios OR enable calibration.
+        if self.mag_ratios is None and not self.calibrate:
+            raise ValueError(
+                " `mag_ratios` must be provided for MagCache inference because these ratios are model-dependent.\n"
+                "To get them for your model:\n"
+                "1. Initialize `MagCacheConfig(calibrate=True, ...)`\n"
+                "2. Run inference on your model once.\n"
+                "3. Copy the printed ratios array and pass it to `mag_ratios` in the config.\n"
+                "For Flux models, you can import `FLUX_MAG_RATIOS` from `diffusers.hooks.mag_cache`."
             )
-            self.mag_ratios = nearest_interp(self.mag_ratios, self.num_inference_steps)
+
+        if not self.calibrate and self.mag_ratios is not None:
+            if len(self.mag_ratios) != self.num_inference_steps:
+                logger.debug(
+                    f"Interpolating mag_ratios from length {len(self.mag_ratios)} to {self.num_inference_steps}"
+                )
+                self.mag_ratios = nearest_interp(self.mag_ratios, self.num_inference_steps)
 
 
 class MagCacheState(BaseState):
     def __init__(self) -> None:
         super().__init__()
+        # Cache for the residual (output - input) from the *previous* timestep
         self.previous_residual: torch.Tensor = None
 
+        # State inputs/outputs for the current forward pass
         self.head_block_input: Union[torch.Tensor, Tuple[torch.Tensor, ...]] = None
         self.should_compute: bool = True
 
+        # MagCache accumulators
         self.accumulated_ratio: float = 1.0
         self.accumulated_err: float = 0.0
         self.accumulated_steps: int = 0
+
+        # Current step counter (timestep index)
         self.step_index: int = 0
+        
+        # Calibration storage
+        self.calibration_ratios: List[float] = []
 
     def reset(self):
         self.previous_residual = None
@@ -137,6 +162,7 @@ def reset(self):
         self.accumulated_err = 0.0
         self.accumulated_steps = 0
         self.step_index = 0
+        self.calibration_ratios = []
 
 
 class MagCacheHeadHook(ModelHook):
@@ -153,36 +179,42 @@ def initialize_hook(self, module):
         return module
 
     def new_forward(self, module: torch.nn.Module, *args, **kwargs):
+        # Capture input hidden_states
         hidden_states = self._metadata._get_parameter_from_args_kwargs("hidden_states", args, kwargs)
 
         state: MagCacheState = self.state_manager.get_state()
         state.head_block_input = hidden_states
 
         should_compute = True
 
-        current_step = state.step_index
-        if current_step >= len(self.config.mag_ratios):
-            current_scale = 1.0
+        if self.config.calibrate:
+            # Never skip during calibration
+            should_compute = True
         else:
-            current_scale = self.config.mag_ratios[current_step]
+            # MagCache Logic
+            current_step = state.step_index
+            if current_step >= len(self.config.mag_ratios):
+                current_scale = 1.0
+            else:
+                current_scale = self.config.mag_ratios[current_step]
 
-        retention_step = int(self.config.retention_ratio * self.config.num_inference_steps + 0.5)
+            retention_step = int(self.config.retention_ratio * self.config.num_inference_steps + 0.5)
 
-        if current_step >= retention_step:
-            state.accumulated_ratio *= current_scale
-            state.accumulated_steps += 1
-            state.accumulated_err += abs(1.0 - state.accumulated_ratio)
+            if current_step >= retention_step:
+                state.accumulated_ratio *= current_scale
+                state.accumulated_steps += 1
+                state.accumulated_err += abs(1.0 - state.accumulated_ratio)
 
-            if (
-                state.previous_residual is not None
-                and state.accumulated_err <= self.config.threshold
-                and state.accumulated_steps <= self.config.max_skip_steps
-            ):
-                should_compute = False
-            else:
-                state.accumulated_ratio = 1.0
-                state.accumulated_steps = 0
-                state.accumulated_err = 0.0
+                if (
+                    state.previous_residual is not None
+                    and state.accumulated_err <= self.config.threshold
+                    and state.accumulated_steps <= self.config.max_skip_steps
+                ):
+                    should_compute = False
+                else:
+                    state.accumulated_ratio = 1.0
+                    state.accumulated_steps = 0
+                    state.accumulated_err = 0.0
 
         state.should_compute = should_compute
 
@@ -193,6 +225,7 @@ def new_forward(self, module: torch.nn.Module, *args, **kwargs):
             output = hidden_states
             res = state.previous_residual
 
+            # Attempt to apply residual handling shape mismatches (e.g., text+image vs image only)
             if res.shape == output.shape:
                 output = output + res
             elif (
@@ -201,6 +234,7 @@ def new_forward(self, module: torch.nn.Module, *args, **kwargs):
                 and output.shape[0] == res.shape[0]
                 and output.shape[2] == res.shape[2]
             ):
+                # Assuming concatenation where image part is at the end (standard in Flux/SD3)
                 diff = output.shape[1] - res.shape[1]
                 if diff > 0:
                     output = output.clone()
@@ -220,20 +254,18 @@ def new_forward(self, module: torch.nn.Module, *args, **kwargs):
                 original_encoder_hidden_states = self._metadata._get_parameter_from_args_kwargs(
                     "encoder_hidden_states", args, kwargs
                 )
-
                 max_idx = max(
                     self._metadata.return_hidden_states_index, self._metadata.return_encoder_hidden_states_index
                 )
                 ret_list = [None] * (max_idx + 1)
-
                 ret_list[self._metadata.return_hidden_states_index] = output
                 ret_list[self._metadata.return_encoder_hidden_states_index] = original_encoder_hidden_states
-
                 return tuple(ret_list)
             else:
                 return output
 
         else:
+            # Compute original forward
             output = self.fn_ref.original_forward(*args, **kwargs)
             return output
 
@@ -260,21 +292,14 @@ def new_forward(self, module: torch.nn.Module, *args, **kwargs):
 
         if not state.should_compute:
             hidden_states = self._metadata._get_parameter_from_args_kwargs("hidden_states", args, kwargs)
-
             if self.is_tail:
-                state.step_index += 1
-                if state.step_index >= self.config.num_inference_steps:
-                    state.step_index = 0
-                    state.accumulated_ratio = 1.0
-                    state.accumulated_steps = 0
-                    state.accumulated_err = 0.0
-                    state.previous_residual = None
+                # Still need to advance step index even if we skip
+                self._advance_step(state)
 
             if self._metadata.return_encoder_hidden_states_index is not None:
                 encoder_hidden_states = self._metadata._get_parameter_from_args_kwargs(
                     "encoder_hidden_states", args, kwargs
                 )
-
                 max_idx = max(
                     self._metadata.return_hidden_states_index, self._metadata.return_encoder_hidden_states_index
                 )
@@ -285,38 +310,71 @@ def new_forward(self, module: torch.nn.Module, *args, **kwargs):
 
             return hidden_states
 
-
         output = self.fn_ref.original_forward(*args, **kwargs)
 
         if self.is_tail:
+            # Calculate residual for next steps
             if isinstance(output, tuple):
                 out_hidden = output[self._metadata.return_hidden_states_index]
             else:
                 out_hidden = output
 
             in_hidden = state.head_block_input
-
+            
+            # Determine residual
             if out_hidden.shape == in_hidden.shape:
                 residual = out_hidden - in_hidden
             elif out_hidden.ndim == 3 and in_hidden.ndim == 3 and out_hidden.shape[2] == in_hidden.shape[2]:
                 diff = in_hidden.shape[1] - out_hidden.shape[1]
                 if diff == 0:
                     residual = out_hidden - in_hidden
                 else:
-                    residual = out_hidden - in_hidden
+                    residual = out_hidden - in_hidden # Fallback to matching tail
+            else:
+                 # Fallback for completely mismatched shapes
+                 residual = out_hidden # Invalid but prevents crash
 
-            state.previous_residual = residual
+            if self.config.calibrate:
+                self._perform_calibration_step(state, residual)
 
-            state.step_index += 1
-            if state.step_index >= self.config.num_inference_steps:
-                state.step_index = 0
-                state.accumulated_ratio = 1.0
-                state.accumulated_steps = 0
-                state.accumulated_err = 0.0
-                state.previous_residual = None
+            state.previous_residual = residual
+            self._advance_step(state)
 
         return output
 
+    def _perform_calibration_step(self, state: MagCacheState, current_residual: torch.Tensor):
+        if state.previous_residual is None:
+            # First step has no previous residual to compare against.
+            # We log 1.0 as a neutral starting point.
+            ratio = 1.0
+        else:
+            # MagCache Calibration Formula: mean(norm(curr) / norm(prev))
+            # norm(dim=-1) gives magnitude of each token vector
+            curr_norm = torch.linalg.norm(current_residual.float(), dim=-1)
+            prev_norm = torch.linalg.norm(state.previous_residual.float(), dim=-1)
+            
+            # Avoid division by zero
+            ratio = (curr_norm / (prev_norm + 1e-8)).mean().item()
+        
+        state.calibration_ratios.append(ratio)
+    
+    def _advance_step(self, state: MagCacheState):
+        state.step_index += 1
+        if state.step_index >= self.config.num_inference_steps:
+            # End of inference loop
+            if self.config.calibrate:
+                print(f"\n[MagCache] Calibration Complete. Copy these values to MagCacheConfig(mag_ratios=...):")
+                print(f"{state.calibration_ratios}\n")
+                logger.info(f"MagCache Calibration Results: {state.calibration_ratios}")
+            
+            # Reset state
+            state.step_index = 0
+            state.accumulated_ratio = 1.0
+            state.accumulated_steps = 0
+            state.accumulated_err = 0.0
+            state.previous_residual = None
+            state.calibration_ratios = []
+
 
 def apply_mag_cache(module: torch.nn.Module, config: MagCacheConfig) -> None:
     """
@@ -331,7 +389,6 @@ def apply_mag_cache(module: torch.nn.Module, config: MagCacheConfig) -> None:
     state_manager = StateManager(MagCacheState, (), {})
     remaining_blocks = []
 
-    # Identify blocks
     for name, submodule in module.named_children():
         if name not in _ALL_TRANSFORMER_BLOCK_IDENTIFIERS or not isinstance(submodule, torch.nn.ModuleList):
             continue
@@ -342,6 +399,16 @@ def apply_mag_cache(module: torch.nn.Module, config: MagCacheConfig) -> None:
         logger.warning("MagCache: No transformer blocks found to apply hooks.")
         return
 
+    if len(remaining_blocks) == 1:
+        # Single block case: It acts as both Head (Decision) and Tail (Residual Calc)
+        name, block = remaining_blocks[0]
+        logger.info(f"MagCache: Applying Head+Tail Hooks to single block '{name}'")
+        # Apply BlockHook (Tail) FIRST so it is the INNER wrapper
+        _apply_mag_cache_block_hook(block, state_manager, config, is_tail=True)
+        # Apply HeadHook SECOND so it is the OUTER wrapper (controls flow)
+        _apply_mag_cache_head_hook(block, state_manager, config)
+        return
+
     head_block_name, head_block = remaining_blocks.pop(0)
     tail_block_name, tail_block = remaining_blocks.pop(-1)
 
@@ -371,4 +438,4 @@ def _apply_mag_cache_block_hook(
 ) -> None:
     registry = HookRegistry.check_if_exists_or_initialize(block)
     hook = MagCacheBlockHook(state_manager, is_tail, config)
-    registry.register_hook(hook, _MAG_CACHE_BLOCK_HOOK)
+    registry.register_hook(hook, _MAG_CACHE_BLOCK_HOOK)