huggingface
diff --git a/‎docs/source/en/api/models/wan_animate_transformer_3d.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/en/api/models/wan_animate_transformer_3d.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/en/api/pipelines/wan.md‎
Lines changed: 18 additions & 30 deletions b/‎docs/source/en/api/pipelines/wan.md‎
Lines changed: 18 additions & 30 deletions
diff --git a/‎src/diffusers/schedulers/scheduling_consistency_models.py‎
Lines changed: 41 additions & 3 deletions b/‎src/diffusers/schedulers/scheduling_consistency_models.py‎
Lines changed: 41 additions & 3 deletions
diff --git a/‎src/diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py‎
Lines changed: 29 additions & 1 deletion b/‎src/diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py‎
Lines changed: 29 additions & 1 deletion
diff --git a/‎src/diffusers/schedulers/scheduling_ddim.py‎
Lines changed: 3 additions & 2 deletions b/‎src/diffusers/schedulers/scheduling_ddim.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎src/diffusers/schedulers/scheduling_ddim_inverse.py‎
Lines changed: 3 additions & 2 deletions b/‎src/diffusers/schedulers/scheduling_ddim_inverse.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎src/diffusers/schedulers/scheduling_ddim_parallel.py‎
Lines changed: 3 additions & 2 deletions b/‎src/diffusers/schedulers/scheduling_ddim_parallel.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎src/diffusers/schedulers/scheduling_ddpm.py‎
Lines changed: 3 additions & 2 deletions b/‎src/diffusers/schedulers/scheduling_ddpm.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎src/diffusers/schedulers/scheduling_ddpm_parallel.py‎
Lines changed: 3 additions & 2 deletions b/‎src/diffusers/schedulers/scheduling_ddpm_parallel.py‎
Lines changed: 3 additions & 2 deletions
@@ -18,7 +18,7 @@ The model can be loaded with the following code snippet.
 ```python
 from diffusers import WanAnimateTransformer3DModel
 
-transformer = WanAnimateTransformer3DModel.from_pretrained("Wan-AI/Wan2.2-Animate-14B-720P-Diffusers", subfolder="transformer", torch_dtype=torch.bfloat16)
+transformer = WanAnimateTransformer3DModel.from_pretrained("Wan-AI/Wan2.2-Animate-14B-Diffusers", subfolder="transformer", torch_dtype=torch.bfloat16)
 ```
 
 ## WanAnimateTransformer3DModel
 
@@ -283,7 +283,7 @@ For replacement mode, you additionally need:
 - **Mask video**: A mask indicating where to generate content (white) vs. preserve original (black)
 
 > [!NOTE]
-> The preprocessing tools are available in the original Wan-Animate repository. Integration of these preprocessing steps into Diffusers is planned for a future release.
+> Raw videos should not be used for inputs such as `pose_video`, which the pipeline expects to be preprocessed to extract the proper information. Preprocessing scripts to prepare these inputs are available in the [original Wan-Animate repository](https://github.com/Wan-Video/Wan2.2?tab=readme-ov-file#1-preprocessing). Integration of these preprocessing steps into Diffusers is planned for a future release.
 
 The example below demonstrates how to use the Wan-Animate pipeline:
 
@@ -295,13 +295,10 @@ import numpy as np
 import torch
 from diffusers import AutoencoderKLWan, WanAnimatePipeline
 from diffusers.utils import export_to_video, load_image, load_video
-from transformers import CLIPVisionModel
 
 model_id = "Wan-AI/Wan2.2-Animate-14B-Diffusers"
 vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
-pipe = WanAnimatePipeline.from_pretrained(
-    model_id, vae=vae, torch_dtype=torch.bfloat16
-)
+pipe = WanAnimatePipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
 pipe.to("cuda")
 
 # Load character image and preprocessed videos
@@ -332,11 +329,11 @@ output = pipe(
     negative_prompt=negative_prompt,
     height=height,
     width=width,
-    num_frames=81,
-    guidance_scale=5.0,
-    mode="animation",  # Animation mode (default)
+    segment_frame_length=77,
+    guidance_scale=1.0,
+    mode="animate",  # Animation mode (default)
 ).frames[0]
-export_to_video(output, "animated_character.mp4", fps=16)
+export_to_video(output, "animated_character.mp4", fps=30)
 ```
 
 </hfoption>
@@ -347,14 +344,10 @@ import numpy as np
 import torch
 from diffusers import AutoencoderKLWan, WanAnimatePipeline
 from diffusers.utils import export_to_video, load_image, load_video
-from transformers import CLIPVisionModel
 
 model_id = "Wan-AI/Wan2.2-Animate-14B-Diffusers"
-image_encoder = CLIPVisionModel.from_pretrained(model_id, subfolder="image_encoder", torch_dtype=torch.float16)
 vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
-pipe = WanAnimatePipeline.from_pretrained(
-    model_id, vae=vae, image_encoder=image_encoder, torch_dtype=torch.bfloat16
-)
+pipe = WanAnimatePipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
 pipe.to("cuda")
 
 # Load all required inputs for replacement mode
@@ -389,11 +382,11 @@ output = pipe(
     negative_prompt=negative_prompt,
     height=height,
     width=width,
-    num_frames=81,
-    guidance_scale=5.0,
-    mode="replacement",  # Replacement mode
+    segment_frame_lengths=77,
+    guidance_scale=1.0,
+    mode="replace",  # Replacement mode
 ).frames[0]
-export_to_video(output, "character_replaced.mp4", fps=16)
+export_to_video(output, "character_replaced.mp4", fps=30)
 ```
 
 </hfoption>
@@ -404,14 +397,10 @@ import numpy as np
 import torch
 from diffusers import AutoencoderKLWan, WanAnimatePipeline
 from diffusers.utils import export_to_video, load_image, load_video
-from transformers import CLIPVisionModel
 
 model_id = "Wan-AI/Wan2.2-Animate-14B-Diffusers"
-image_encoder = CLIPVisionModel.from_pretrained(model_id, subfolder="image_encoder", torch_dtype=torch.float16)
 vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
-pipe = WanAnimatePipeline.from_pretrained(
-    model_id, vae=vae, image_encoder=image_encoder, torch_dtype=torch.bfloat16
-)
+pipe = WanAnimatePipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
 pipe.to("cuda")
 
 image = load_image("path/to/character.jpg")
@@ -445,25 +434,24 @@ output = pipe(
     negative_prompt=negative_prompt,
     height=height,
     width=width,
-    num_frames=81,
+    segment_frame_length=77,
     num_inference_steps=50,
     guidance_scale=5.0,
-    num_frames_for_temporal_guidance=5,  # Use 5 frames for temporal guidance (1 or 5 recommended)
+    prev_segment_conditioning_frames=5,  # Use 5 frames for temporal guidance (1 or 5 recommended)
     callback_on_step_end=callback_fn,
     callback_on_step_end_tensor_inputs=["latents"],
 ).frames[0]
-export_to_video(output, "animated_advanced.mp4", fps=16)
+export_to_video(output, "animated_advanced.mp4", fps=30)
 ```
 
 </hfoption>
 </hfoptions>
 
 #### Key Parameters
 
-- **mode**: Choose between `"animation"` (default) or `"replacement"`
-- **num_frames_for_temporal_guidance**: Number of frames for temporal guidance (1 or 5 recommended). Using 5 provides better temporal consistency but requires more memory
-- **guidance_scale**: Controls how closely the output follows the text prompt. Higher values (5-7) produce results more aligned with the prompt
-- **num_frames**: Total number of frames to generate. Should be divisible by `vae_scale_factor_temporal` (default: 4)
+- **mode**: Choose between `"animate"` (default) or `"replace"`
+- **prev_segment_conditioning_frames**: Number of frames for temporal guidance (1 or 5 recommended). Using 5 provides better temporal consistency but requires more memory
+- **guidance_scale**: Controls how closely the output follows the text prompt. Higher values (5-7) produce results more aligned with the prompt. For Wan-Animate, CFG is disabled by default (`guidance_scale=1.0`) but can be enabled to support negative prompts and finer control over facial expressions. (Note that CFG will only target the text prompt and face conditioning.)
 
 
 ### Wan-S2V: Audio-Driven Cinematic Video Generation
 
@@ -121,7 +121,7 @@ def set_begin_index(self, begin_index: int = 0):
         Sets the begin index for the scheduler. This function should be run from pipeline before the inference.
 
         Args:
-            begin_index (`int`):
+            begin_index (`int`, defaults to `0`):
                 The begin index for the scheduler.
         """
         self._begin_index = begin_index
@@ -287,7 +287,23 @@ def get_scalings_for_boundary_condition(self, sigma):
         return c_skip, c_out
 
     # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.index_for_timestep
-    def index_for_timestep(self, timestep, schedule_timesteps=None):
+    def index_for_timestep(
+        self, timestep: Union[float, torch.Tensor], schedule_timesteps: Optional[torch.Tensor] = None
+    ) -> int:
+        """
+        Find the index of a given timestep in the timestep schedule.
+
+        Args:
+            timestep (`float` or `torch.Tensor`):
+                The timestep value to find in the schedule.
+            schedule_timesteps (`torch.Tensor`, *optional*):
+                The timestep schedule to search in. If `None`, uses `self.timesteps`.
+
+        Returns:
+            `int`:
+                The index of the timestep in the schedule. For the very first step, returns the second index if
+                multiple matches exist to avoid skipping a sigma when starting mid-schedule (e.g., for image-to-image).
+        """
         if schedule_timesteps is None:
             schedule_timesteps = self.timesteps
 
@@ -302,7 +318,14 @@ def index_for_timestep(self, timestep, schedule_timesteps=None):
         return indices[pos].item()
 
     # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._init_step_index
-    def _init_step_index(self, timestep):
+    def _init_step_index(self, timestep: Union[float, torch.Tensor]) -> None:
+        """
+        Initialize the step index for the scheduler based on the given timestep.
+
+        Args:
+            timestep (`float` or `torch.Tensor`):
+                The current timestep to initialize the step index from.
+        """
         if self.begin_index is None:
             if isinstance(timestep, torch.Tensor):
                 timestep = timestep.to(self.timesteps.device)
@@ -410,6 +433,21 @@ def add_noise(
         noise: torch.Tensor,
         timesteps: torch.Tensor,
     ) -> torch.Tensor:
+        """
+        Add noise to the original samples according to the noise schedule at the specified timesteps.
+
+        Args:
+            original_samples (`torch.Tensor`):
+                The original samples to which noise will be added.
+            noise (`torch.Tensor`):
+                The noise tensor to add to the original samples.
+            timesteps (`torch.Tensor`):
+                The timesteps at which to add noise, determining the noise level from the schedule.
+
+        Returns:
+            `torch.Tensor`:
+                The noisy samples with added noise scaled according to the timestep schedule.
+        """
         # Make sure sigmas and timesteps have the same device and dtype as original_samples
         sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype)
         if original_samples.device.type == "mps" and torch.is_floating_point(timesteps):
 
@@ -137,7 +137,7 @@ def set_begin_index(self, begin_index: int = 0):
         Sets the begin index for the scheduler. This function should be run from pipeline before the inference.
 
         Args:
-            begin_index (`int`):
+            begin_index (`int`, defaults to `0`):
                 The begin index for the scheduler.
         """
         self._begin_index = begin_index
@@ -266,6 +266,19 @@ def _compute_exponential_sigmas(self, ramp, sigma_min=None, sigma_max=None) -> t
 
     # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t
     def _sigma_to_t(self, sigma, log_sigmas):
+        """
+        Convert sigma values to corresponding timestep values through interpolation.
+
+        Args:
+            sigma (`np.ndarray`):
+                The sigma value(s) to convert to timestep(s).
+            log_sigmas (`np.ndarray`):
+                The logarithm of the sigma schedule used for interpolation.
+
+        Returns:
+            `np.ndarray`:
+                The interpolated timestep value(s) corresponding to the input sigma(s).
+        """
         # get log sigma
         log_sigma = np.log(np.maximum(sigma, 1e-10))
 
@@ -537,6 +550,21 @@ def add_noise(
         noise: torch.Tensor,
         timesteps: torch.Tensor,
     ) -> torch.Tensor:
+        """
+        Add noise to the original samples according to the noise schedule at the specified timesteps.
+
+        Args:
+            original_samples (`torch.Tensor`):
+                The original samples to which noise will be added.
+            noise (`torch.Tensor`):
+                The noise tensor to add to the original samples.
+            timesteps (`torch.Tensor`):
+                The timesteps at which to add noise, determining the noise level from the schedule.
+
+        Returns:
+            `torch.Tensor`:
+                The noisy samples with added noise scaled according to the timestep schedule.
+        """
         # Make sure sigmas and timesteps have the same device and dtype as original_samples
         sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype)
         if original_samples.device.type == "mps" and torch.is_floating_point(timesteps):
 
@@ -99,10 +99,11 @@ def rescale_zero_terminal_snr(betas: torch.Tensor) -> torch.Tensor:
 
     Args:
         betas (`torch.Tensor`):
-            the betas that the scheduler is being initialized with.
+            The betas that the scheduler is being initialized with.
 
     Returns:
-        `torch.Tensor`: rescaled betas with zero terminal SNR
+        `torch.Tensor`:
+            Rescaled betas with zero terminal SNR.
     """
     # Convert betas to alphas_bar_sqrt
     alphas = 1.0 - betas
 
@@ -98,10 +98,11 @@ def rescale_zero_terminal_snr(betas):
 
     Args:
         betas (`torch.Tensor`):
-            the betas that the scheduler is being initialized with.
+            The betas that the scheduler is being initialized with.
 
     Returns:
-        `torch.Tensor`: rescaled betas with zero terminal SNR
+        `torch.Tensor`:
+            Rescaled betas with zero terminal SNR.
     """
     # Convert betas to alphas_bar_sqrt
     alphas = 1.0 - betas
 
@@ -100,10 +100,11 @@ def rescale_zero_terminal_snr(betas):
 
     Args:
         betas (`torch.Tensor`):
-            the betas that the scheduler is being initialized with.
+            The betas that the scheduler is being initialized with.
 
     Returns:
-        `torch.Tensor`: rescaled betas with zero terminal SNR
+        `torch.Tensor`:
+            Rescaled betas with zero terminal SNR.
     """
     # Convert betas to alphas_bar_sqrt
     alphas = 1.0 - betas
 
@@ -97,10 +97,11 @@ def rescale_zero_terminal_snr(betas: torch.Tensor) -> torch.Tensor:
 
     Args:
         betas (`torch.Tensor`):
-            the betas that the scheduler is being initialized with.
+            The betas that the scheduler is being initialized with.
 
     Returns:
-        `torch.Tensor`: rescaled betas with zero terminal SNR
+        `torch.Tensor`:
+            Rescaled betas with zero terminal SNR.
     """
     # Convert betas to alphas_bar_sqrt
     alphas = 1.0 - betas
 
@@ -99,10 +99,11 @@ def rescale_zero_terminal_snr(betas):
 
     Args:
         betas (`torch.Tensor`):
-            the betas that the scheduler is being initialized with.
+            The betas that the scheduler is being initialized with.
 
     Returns:
-        `torch.Tensor`: rescaled betas with zero terminal SNR
+        `torch.Tensor`:
+            Rescaled betas with zero terminal SNR.
     """
     # Convert betas to alphas_bar_sqrt
     alphas = 1.0 - betas