From 96864fbbba9e53e3b10cefc4650ca08190bdb1dc Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Tue, 5 Aug 2025 13:25:32 +0300 Subject: [PATCH 01/19] add alpha --- .../loaders/lora_conversion_utils.py | 120 +++++++++++++----- 1 file changed, 86 insertions(+), 34 deletions(-) diff --git a/src/diffusers/loaders/lora_conversion_utils.py b/src/diffusers/loaders/lora_conversion_utils.py index ba96dccbe358..55640cc9c134 100644 --- a/src/diffusers/loaders/lora_conversion_utils.py +++ b/src/diffusers/loaders/lora_conversion_utils.py @@ -1829,6 +1829,18 @@ def _convert_non_diffusers_wan_lora_to_diffusers(state_dict): k.startswith("time_projection") and k.endswith(".weight") for k in original_state_dict ) + def get_alpha_scales(down_weight, alpha_key): + rank = down_weight.shape[0] + alpha = original_state_dict.pop(alpha_key).item() + scale = alpha / rank # LoRA is scaled by 'alpha / rank' in forward pass, so we need to scale it back here + scale_down = scale + scale_up = 1.0 + while scale_down * 2 < scale_up: + scale_down *= 2 + scale_up /= 2 + return scale_down, scale_up + + for key in list(original_state_dict.keys()): if key.endswith((".diff", ".diff_b")) and "norm" in key: # NOTE: we don't support this because norm layer diff keys are just zeroed values. We can support it @@ -1848,15 +1860,25 @@ def _convert_non_diffusers_wan_lora_to_diffusers(state_dict): for i in range(min_block, max_block + 1): # Self-attention for o, c in zip(["q", "k", "v", "o"], ["to_q", "to_k", "to_v", "to_out.0"]): - original_key = f"blocks.{i}.self_attn.{o}.{lora_down_key}.weight" - converted_key = f"blocks.{i}.attn1.{c}.lora_A.weight" - if original_key in original_state_dict: - converted_state_dict[converted_key] = original_state_dict.pop(original_key) + has_alpha = f"blocks.{i}.self_attn.{o}.alpha" in original_state_dict + original_key_A = f"blocks.{i}.self_attn.{o}.{lora_down_key}.weight" + converted_key_A = f"blocks.{i}.attn1.{c}.lora_A.weight" - original_key = f"blocks.{i}.self_attn.{o}.{lora_up_key}.weight" - converted_key = f"blocks.{i}.attn1.{c}.lora_B.weight" - if original_key in original_state_dict: - converted_state_dict[converted_key] = original_state_dict.pop(original_key) + original_key_B = f"blocks.{i}.self_attn.{o}.{lora_up_key}.weight" + converted_key_B = f"blocks.{i}.attn1.{c}.lora_B.weight" + + if has_alpha: + down_weight = original_state_dict.pop(original_key_A) + up_weight = original_state_dict.pop(original_key_B) + scale_down, scale_up = get_alpha_scales(down_weight, f"blocks.{i}.self_attn.{o}.alpha") + converted_state_dict[converted_key_A] = down_weight * scale_down + converted_state_dict[converted_key_B] = up_weight * scale_up + + else: + if original_key_A in original_state_dict: + converted_state_dict[converted_key_A] = original_state_dict.pop(original_key_A) + if original_key_B in original_state_dict: + converted_state_dict[converted_key_B] = original_state_dict.pop(original_key_B) original_key = f"blocks.{i}.self_attn.{o}.diff_b" converted_key = f"blocks.{i}.attn1.{c}.lora_B.bias" @@ -1865,15 +1887,25 @@ def _convert_non_diffusers_wan_lora_to_diffusers(state_dict): # Cross-attention for o, c in zip(["q", "k", "v", "o"], ["to_q", "to_k", "to_v", "to_out.0"]): - original_key = f"blocks.{i}.cross_attn.{o}.{lora_down_key}.weight" - converted_key = f"blocks.{i}.attn2.{c}.lora_A.weight" - if original_key in original_state_dict: - converted_state_dict[converted_key] = original_state_dict.pop(original_key) + has_alpha = f"blocks.{i}.cross_attn.{o}.alpha" in original_state_dict + original_key_A = f"blocks.{i}.cross_attn.{o}.{lora_down_key}.weight" + converted_key_A = f"blocks.{i}.attn2.{c}.lora_A.weight" + + original_key_B = f"blocks.{i}.cross_attn.{o}.{lora_up_key}.weight" + converted_key_B = f"blocks.{i}.attn2.{c}.lora_B.weight" + + if has_alpha: + down_weight = original_state_dict.pop(original_key_A) + up_weight = original_state_dict.pop(original_key_B) + scale_down, scale_up = get_alpha_scales(down_weight, f"blocks.{i}.cross_attn.{o}.alpha") + converted_state_dict[converted_key_A] = down_weight * scale_down + converted_state_dict[converted_key_B] = up_weight * scale_up + else: + if original_key_A in original_state_dict: + converted_state_dict[converted_key_A] = original_state_dict.pop(original_key_A) - original_key = f"blocks.{i}.cross_attn.{o}.{lora_up_key}.weight" - converted_key = f"blocks.{i}.attn2.{c}.lora_B.weight" - if original_key in original_state_dict: - converted_state_dict[converted_key] = original_state_dict.pop(original_key) + if original_key_B in original_state_dict: + converted_state_dict[converted_key_B] = original_state_dict.pop(original_key_B) original_key = f"blocks.{i}.cross_attn.{o}.diff_b" converted_key = f"blocks.{i}.attn2.{c}.lora_B.bias" @@ -1882,15 +1914,25 @@ def _convert_non_diffusers_wan_lora_to_diffusers(state_dict): if is_i2v_lora: for o, c in zip(["k_img", "v_img"], ["add_k_proj", "add_v_proj"]): - original_key = f"blocks.{i}.cross_attn.{o}.{lora_down_key}.weight" - converted_key = f"blocks.{i}.attn2.{c}.lora_A.weight" - if original_key in original_state_dict: - converted_state_dict[converted_key] = original_state_dict.pop(original_key) - - original_key = f"blocks.{i}.cross_attn.{o}.{lora_up_key}.weight" - converted_key = f"blocks.{i}.attn2.{c}.lora_B.weight" - if original_key in original_state_dict: - converted_state_dict[converted_key] = original_state_dict.pop(original_key) + has_alpha = f"blocks.{i}.cross_attn.{o}.alpha" in original_state_dict + original_key_A = f"blocks.{i}.cross_attn.{o}.{lora_down_key}.weight" + converted_key_A = f"blocks.{i}.attn2.{c}.lora_A.weight" + + original_key_B = f"blocks.{i}.cross_attn.{o}.{lora_up_key}.weight" + converted_key_B = f"blocks.{i}.attn2.{c}.lora_B.weight" + + if has_alpha: + down_weight = original_state_dict.pop(original_key_A) + up_weight = original_state_dict.pop(original_key_B) + scale_down, scale_up = get_alpha_scales(down_weight, f"blocks.{i}.cross_attn.{o}.alpha") + converted_state_dict[converted_key_A] = down_weight * scale_down + converted_state_dict[converted_key_B] = up_weight * scale_up + else: + if original_key_A in original_state_dict: + converted_state_dict[converted_key_A] = original_state_dict.pop(original_key_A) + + if original_key_B in original_state_dict: + converted_state_dict[converted_key_B] = original_state_dict.pop(original_key_B) original_key = f"blocks.{i}.cross_attn.{o}.diff_b" converted_key = f"blocks.{i}.attn2.{c}.lora_B.bias" @@ -1899,15 +1941,25 @@ def _convert_non_diffusers_wan_lora_to_diffusers(state_dict): # FFN for o, c in zip(["ffn.0", "ffn.2"], ["net.0.proj", "net.2"]): - original_key = f"blocks.{i}.{o}.{lora_down_key}.weight" - converted_key = f"blocks.{i}.ffn.{c}.lora_A.weight" - if original_key in original_state_dict: - converted_state_dict[converted_key] = original_state_dict.pop(original_key) + has_alpha = f"blocks.{i}.{o}.alpha" in original_state_dict + original_key_A = f"blocks.{i}.{o}.{lora_down_key}.weight" + converted_key_A = f"blocks.{i}.ffn.{c}.lora_A.weight" + + original_key_B = f"blocks.{i}.{o}.{lora_up_key}.weight" + converted_key_B = f"blocks.{i}.ffn.{c}.lora_B.weight" + + if has_alpha: + down_weight = original_state_dict.pop(original_key_A) + up_weight = original_state_dict.pop(original_key_B) + scale_down, scale_up = get_alpha_scales(down_weight, f"blocks.{i}.{o}.alpha") + converted_state_dict[converted_key_A] = down_weight * scale_down + converted_state_dict[converted_key_B] = up_weight * scale_up + else: + if original_key_A in original_state_dict: + converted_state_dict[converted_key_A] = original_state_dict.pop(original_key_A) - original_key = f"blocks.{i}.{o}.{lora_up_key}.weight" - converted_key = f"blocks.{i}.ffn.{c}.lora_B.weight" - if original_key in original_state_dict: - converted_state_dict[converted_key] = original_state_dict.pop(original_key) + if original_key_B in original_state_dict: + converted_state_dict[converted_key_B] = original_state_dict.pop(original_key_B) original_key = f"blocks.{i}.{o}.diff_b" converted_key = f"blocks.{i}.ffn.{c}.lora_B.bias" @@ -2072,4 +2124,4 @@ def _convert_non_diffusers_ltxv_lora_to_diffusers(state_dict, non_diffusers_pref raise ValueError("Invalid LoRA state dict for LTX-Video.") converted_state_dict = {k.removeprefix(f"{non_diffusers_prefix}."): v for k, v in state_dict.items()} converted_state_dict = {f"transformer.{k}": v for k, v in converted_state_dict.items()} - return converted_state_dict + return converted_state_dict \ No newline at end of file From 084725587b51f0b34544aef9781bda1c5b0586d7 Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Tue, 5 Aug 2025 13:26:58 +0300 Subject: [PATCH 02/19] load into 2nd transformer --- src/diffusers/loaders/lora_pipeline.py | 33 ++++++++++++++++++-------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index 45c20e505cf5..f76f87212803 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -5064,7 +5064,7 @@ class WanLoraLoaderMixin(LoraBaseMixin): Load LoRA layers into [`WanTransformer3DModel`]. Specific to [`WanPipeline`] and `[WanImageToVideoPipeline`]. """ - _lora_loadable_modules = ["transformer"] + _lora_loadable_modules = ["transformer", "transformer_2"] transformer_name = TRANSFORMER_NAME @classmethod @@ -5269,15 +5269,28 @@ def load_lora_weights( if not is_correct_format: raise ValueError("Invalid LoRA checkpoint.") - self.load_lora_into_transformer( - state_dict, - transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer, - adapter_name=adapter_name, - metadata=metadata, - _pipeline=self, - low_cpu_mem_usage=low_cpu_mem_usage, - hotswap=hotswap, - ) + load_into_transformer_2 = kwargs.pop("load_into_transformer_2", False) + if load_into_transformer_2: + self.load_lora_into_transformer( + state_dict, + transformer=self.transformer_2, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + else: + self.load_lora_into_transformer( + state_dict, + transformer=getattr(self, self.transformer_name) if not hasattr(self, + "transformer") else self.transformer, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) @classmethod # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->WanTransformer3DModel From 5284a9cf881552644a6d3e74d110e2c71cabc252 Mon Sep 17 00:00:00 2001 From: Linoy Tsaban <57615435+linoytsaban@users.noreply.github.com> Date: Thu, 7 Aug 2025 16:15:11 +0300 Subject: [PATCH 03/19] Update src/diffusers/loaders/lora_conversion_utils.py Co-authored-by: Sayak Paul --- src/diffusers/loaders/lora_conversion_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/diffusers/loaders/lora_conversion_utils.py b/src/diffusers/loaders/lora_conversion_utils.py index 55640cc9c134..8db59a25351d 100644 --- a/src/diffusers/loaders/lora_conversion_utils.py +++ b/src/diffusers/loaders/lora_conversion_utils.py @@ -1860,7 +1860,8 @@ def get_alpha_scales(down_weight, alpha_key): for i in range(min_block, max_block + 1): # Self-attention for o, c in zip(["q", "k", "v", "o"], ["to_q", "to_k", "to_v", "to_out.0"]): - has_alpha = f"blocks.{i}.self_attn.{o}.alpha" in original_state_dict + alpha_key = f"blocks.{i}.self_attn.{o}.alpha" + has_alpha = alpha_key in original_state_dict original_key_A = f"blocks.{i}.self_attn.{o}.{lora_down_key}.weight" converted_key_A = f"blocks.{i}.attn1.{c}.lora_A.weight" From 0a7be77002625aa7f928c9caff7a6ed3af3fa593 Mon Sep 17 00:00:00 2001 From: Linoy Tsaban <57615435+linoytsaban@users.noreply.github.com> Date: Thu, 7 Aug 2025 16:15:39 +0300 Subject: [PATCH 04/19] Update src/diffusers/loaders/lora_conversion_utils.py Co-authored-by: Sayak Paul --- src/diffusers/loaders/lora_conversion_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/loaders/lora_conversion_utils.py b/src/diffusers/loaders/lora_conversion_utils.py index 8db59a25351d..dad5df3c53be 100644 --- a/src/diffusers/loaders/lora_conversion_utils.py +++ b/src/diffusers/loaders/lora_conversion_utils.py @@ -1871,7 +1871,7 @@ def get_alpha_scales(down_weight, alpha_key): if has_alpha: down_weight = original_state_dict.pop(original_key_A) up_weight = original_state_dict.pop(original_key_B) - scale_down, scale_up = get_alpha_scales(down_weight, f"blocks.{i}.self_attn.{o}.alpha") + scale_down, scale_up = get_alpha_scales(down_weight, alpha_key) converted_state_dict[converted_key_A] = down_weight * scale_down converted_state_dict[converted_key_B] = up_weight * scale_up From b7e24d9128729d883f65b41d399ae01f0afb0a8d Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Thu, 7 Aug 2025 16:43:52 +0300 Subject: [PATCH 05/19] pr comments --- src/diffusers/loaders/lora_conversion_utils.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/diffusers/loaders/lora_conversion_utils.py b/src/diffusers/loaders/lora_conversion_utils.py index dad5df3c53be..bdff5e219cae 100644 --- a/src/diffusers/loaders/lora_conversion_utils.py +++ b/src/diffusers/loaders/lora_conversion_utils.py @@ -1888,7 +1888,8 @@ def get_alpha_scales(down_weight, alpha_key): # Cross-attention for o, c in zip(["q", "k", "v", "o"], ["to_q", "to_k", "to_v", "to_out.0"]): - has_alpha = f"blocks.{i}.cross_attn.{o}.alpha" in original_state_dict + alpha_key = f"blocks.{i}.cross_attn.{o}.alpha" + has_alpha = alpha_key in original_state_dict original_key_A = f"blocks.{i}.cross_attn.{o}.{lora_down_key}.weight" converted_key_A = f"blocks.{i}.attn2.{c}.lora_A.weight" @@ -1898,7 +1899,7 @@ def get_alpha_scales(down_weight, alpha_key): if has_alpha: down_weight = original_state_dict.pop(original_key_A) up_weight = original_state_dict.pop(original_key_B) - scale_down, scale_up = get_alpha_scales(down_weight, f"blocks.{i}.cross_attn.{o}.alpha") + scale_down, scale_up = get_alpha_scales(down_weight, alpha_key) converted_state_dict[converted_key_A] = down_weight * scale_down converted_state_dict[converted_key_B] = up_weight * scale_up else: @@ -1915,7 +1916,8 @@ def get_alpha_scales(down_weight, alpha_key): if is_i2v_lora: for o, c in zip(["k_img", "v_img"], ["add_k_proj", "add_v_proj"]): - has_alpha = f"blocks.{i}.cross_attn.{o}.alpha" in original_state_dict + alpha_key = f"blocks.{i}.cross_attn.{o}.alpha" + has_alpha = alpha_key in original_state_dict original_key_A = f"blocks.{i}.cross_attn.{o}.{lora_down_key}.weight" converted_key_A = f"blocks.{i}.attn2.{c}.lora_A.weight" @@ -1925,7 +1927,7 @@ def get_alpha_scales(down_weight, alpha_key): if has_alpha: down_weight = original_state_dict.pop(original_key_A) up_weight = original_state_dict.pop(original_key_B) - scale_down, scale_up = get_alpha_scales(down_weight, f"blocks.{i}.cross_attn.{o}.alpha") + scale_down, scale_up = get_alpha_scales(down_weight, alpha_key) converted_state_dict[converted_key_A] = down_weight * scale_down converted_state_dict[converted_key_B] = up_weight * scale_up else: @@ -1942,7 +1944,8 @@ def get_alpha_scales(down_weight, alpha_key): # FFN for o, c in zip(["ffn.0", "ffn.2"], ["net.0.proj", "net.2"]): - has_alpha = f"blocks.{i}.{o}.alpha" in original_state_dict + alpha_key = f"blocks.{i}.{o}.alpha" + has_alpha = alpha_key in original_state_dict original_key_A = f"blocks.{i}.{o}.{lora_down_key}.weight" converted_key_A = f"blocks.{i}.ffn.{c}.lora_A.weight" @@ -1952,7 +1955,7 @@ def get_alpha_scales(down_weight, alpha_key): if has_alpha: down_weight = original_state_dict.pop(original_key_A) up_weight = original_state_dict.pop(original_key_B) - scale_down, scale_up = get_alpha_scales(down_weight, f"blocks.{i}.{o}.alpha") + scale_down, scale_up = get_alpha_scales(down_weight, alpha_key) converted_state_dict[converted_key_A] = down_weight * scale_down converted_state_dict[converted_key_B] = up_weight * scale_up else: From bcb0924f54064f67d60377243970f322e4ab066b Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Thu, 7 Aug 2025 17:04:24 +0300 Subject: [PATCH 06/19] pr comments --- .../loaders/lora_conversion_utils.py | 48 ++++++++----------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/src/diffusers/loaders/lora_conversion_utils.py b/src/diffusers/loaders/lora_conversion_utils.py index bdff5e219cae..3d958f8441a8 100644 --- a/src/diffusers/loaders/lora_conversion_utils.py +++ b/src/diffusers/loaders/lora_conversion_utils.py @@ -1896,18 +1896,16 @@ def get_alpha_scales(down_weight, alpha_key): original_key_B = f"blocks.{i}.cross_attn.{o}.{lora_up_key}.weight" converted_key_B = f"blocks.{i}.attn2.{c}.lora_B.weight" - if has_alpha: + if original_key_A in original_state_dict: down_weight = original_state_dict.pop(original_key_A) + converted_state_dict[converted_key_A] = down_weight + if original_key_B in original_state_dict: up_weight = original_state_dict.pop(original_key_B) + converted_state_dict[converted_key_B] = up_weight + if has_alpha: scale_down, scale_up = get_alpha_scales(down_weight, alpha_key) - converted_state_dict[converted_key_A] = down_weight * scale_down - converted_state_dict[converted_key_B] = up_weight * scale_up - else: - if original_key_A in original_state_dict: - converted_state_dict[converted_key_A] = original_state_dict.pop(original_key_A) - - if original_key_B in original_state_dict: - converted_state_dict[converted_key_B] = original_state_dict.pop(original_key_B) + converted_state_dict[converted_key_A] *= scale_down + converted_state_dict[converted_key_B] *= scale_up original_key = f"blocks.{i}.cross_attn.{o}.diff_b" converted_key = f"blocks.{i}.attn2.{c}.lora_B.bias" @@ -1924,18 +1922,16 @@ def get_alpha_scales(down_weight, alpha_key): original_key_B = f"blocks.{i}.cross_attn.{o}.{lora_up_key}.weight" converted_key_B = f"blocks.{i}.attn2.{c}.lora_B.weight" - if has_alpha: + if original_key_A in original_state_dict: down_weight = original_state_dict.pop(original_key_A) + converted_state_dict[converted_key_A] = down_weight + if original_key_B in original_state_dict: up_weight = original_state_dict.pop(original_key_B) + converted_state_dict[converted_key_B] = up_weight + if has_alpha: scale_down, scale_up = get_alpha_scales(down_weight, alpha_key) - converted_state_dict[converted_key_A] = down_weight * scale_down - converted_state_dict[converted_key_B] = up_weight * scale_up - else: - if original_key_A in original_state_dict: - converted_state_dict[converted_key_A] = original_state_dict.pop(original_key_A) - - if original_key_B in original_state_dict: - converted_state_dict[converted_key_B] = original_state_dict.pop(original_key_B) + converted_state_dict[converted_key_A] *= scale_down + converted_state_dict[converted_key_B] *= scale_up original_key = f"blocks.{i}.cross_attn.{o}.diff_b" converted_key = f"blocks.{i}.attn2.{c}.lora_B.bias" @@ -1952,18 +1948,16 @@ def get_alpha_scales(down_weight, alpha_key): original_key_B = f"blocks.{i}.{o}.{lora_up_key}.weight" converted_key_B = f"blocks.{i}.ffn.{c}.lora_B.weight" - if has_alpha: + if original_key_A in original_state_dict: down_weight = original_state_dict.pop(original_key_A) + converted_state_dict[converted_key_A] = down_weight + if original_key_B in original_state_dict: up_weight = original_state_dict.pop(original_key_B) + converted_state_dict[converted_key_B] = up_weight + if has_alpha: scale_down, scale_up = get_alpha_scales(down_weight, alpha_key) - converted_state_dict[converted_key_A] = down_weight * scale_down - converted_state_dict[converted_key_B] = up_weight * scale_up - else: - if original_key_A in original_state_dict: - converted_state_dict[converted_key_A] = original_state_dict.pop(original_key_A) - - if original_key_B in original_state_dict: - converted_state_dict[converted_key_B] = original_state_dict.pop(original_key_B) + converted_state_dict[converted_key_A] *= scale_down + converted_state_dict[converted_key_B] *= scale_up original_key = f"blocks.{i}.{o}.diff_b" converted_key = f"blocks.{i}.ffn.{c}.lora_B.bias" From cabcf3dc176285a25cba2153be3b134f54a73ea7 Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Thu, 7 Aug 2025 17:10:19 +0300 Subject: [PATCH 07/19] pr comments --- src/diffusers/loaders/lora_pipeline.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index f76f87212803..453ee471156c 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -5271,6 +5271,11 @@ def load_lora_weights( load_into_transformer_2 = kwargs.pop("load_into_transformer_2", False) if load_into_transformer_2: + if geattr(self, "transformer_2", None) is None: + raise ValueError( + "Cannot load LoRA into transformer_2: transformer_2 is not available for this model" + "Ensure the model has a transformer_2 component before setting load_into_transformer_2=True." + ) self.load_lora_into_transformer( state_dict, transformer=self.transformer_2, From 4fdf40055d71802e39d58b0c46f4076735cc2441 Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Thu, 7 Aug 2025 17:34:25 +0300 Subject: [PATCH 08/19] fix --- src/diffusers/loaders/lora_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index 453ee471156c..9da9eae8431a 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -5271,7 +5271,7 @@ def load_lora_weights( load_into_transformer_2 = kwargs.pop("load_into_transformer_2", False) if load_into_transformer_2: - if geattr(self, "transformer_2", None) is None: + if getattr(self, "transformer_2", None) is None: raise ValueError( "Cannot load LoRA into transformer_2: transformer_2 is not available for this model" "Ensure the model has a transformer_2 component before setting load_into_transformer_2=True." From 724b9a2126d92fcaef57908e79b15ffca6a256c0 Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Fri, 8 Aug 2025 10:39:24 +0300 Subject: [PATCH 09/19] fix --- src/diffusers/loaders/lora_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index 9da9eae8431a..d3a3cfc2f3d2 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -5271,7 +5271,7 @@ def load_lora_weights( load_into_transformer_2 = kwargs.pop("load_into_transformer_2", False) if load_into_transformer_2: - if getattr(self, "transformer_2", None) is None: + if not hasattr(self, "transformer_2"): raise ValueError( "Cannot load LoRA into transformer_2: transformer_2 is not available for this model" "Ensure the model has a transformer_2 component before setting load_into_transformer_2=True." From b09fc4875df26a66634dd208b6df26cd8470d571 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 11 Aug 2025 06:08:37 +0000 Subject: [PATCH 10/19] Apply style fixes --- src/diffusers/loaders/lora_conversion_utils.py | 1 - src/diffusers/loaders/lora_pipeline.py | 5 +++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/diffusers/loaders/lora_conversion_utils.py b/src/diffusers/loaders/lora_conversion_utils.py index 247e985a1bc2..da65a1208b69 100644 --- a/src/diffusers/loaders/lora_conversion_utils.py +++ b/src/diffusers/loaders/lora_conversion_utils.py @@ -1844,7 +1844,6 @@ def get_alpha_scales(down_weight, alpha_key): scale_up /= 2 return scale_down, scale_up - for key in list(original_state_dict.keys()): if key.endswith((".diff", ".diff_b")) and "norm" in key: # NOTE: we don't support this because norm layer diff keys are just zeroed values. We can support it diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index e854cc08c469..9282b498fa61 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -5289,8 +5289,9 @@ def load_lora_weights( else: self.load_lora_into_transformer( state_dict, - transformer=getattr(self, self.transformer_name) if not hasattr(self, - "transformer") else self.transformer, + transformer=getattr(self, self.transformer_name) + if not hasattr(self, "transformer") + else self.transformer, adapter_name=adapter_name, metadata=metadata, _pipeline=self, From ea451d11ede65fa3f24db38723bf8f291df13d8a Mon Sep 17 00:00:00 2001 From: linoy Date: Wed, 13 Aug 2025 11:35:28 +0000 Subject: [PATCH 11/19] fix copies --- src/diffusers/loaders/lora_pipeline.py | 37 +++++++++++++++++++------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index 9282b498fa61..1d9e5a088d2d 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -5687,15 +5687,34 @@ def load_lora_weights( if not is_correct_format: raise ValueError("Invalid LoRA checkpoint.") - self.load_lora_into_transformer( - state_dict, - transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer, - adapter_name=adapter_name, - metadata=metadata, - _pipeline=self, - low_cpu_mem_usage=low_cpu_mem_usage, - hotswap=hotswap, - ) + load_into_transformer_2 = kwargs.pop("load_into_transformer_2", False) + if load_into_transformer_2: + if not hasattr(self, "transformer_2"): + raise ValueError( + "Cannot load LoRA into transformer_2: transformer_2 is not available for this model" + "Ensure the model has a transformer_2 component before setting load_into_transformer_2=True." + ) + self.load_lora_into_transformer( + state_dict, + transformer=self.transformer_2, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + else: + self.load_lora_into_transformer( + state_dict, + transformer=getattr(self, self.transformer_name) + if not hasattr(self, "transformer") + else self.transformer, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) @classmethod # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->SkyReelsV2Transformer3DModel From 18382f4e322aeac2f09771ba7bb570b636b3b4d2 Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Wed, 13 Aug 2025 16:16:51 +0300 Subject: [PATCH 12/19] fix --- src/diffusers/loaders/lora_pipeline.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index 1d9e5a088d2d..735a52958995 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -5065,7 +5065,7 @@ class WanLoraLoaderMixin(LoraBaseMixin): Load LoRA layers into [`WanTransformer3DModel`]. Specific to [`WanPipeline`] and `[WanImageToVideoPipeline`]. """ - _lora_loadable_modules = ["transformer", "transformer_2"] + _lora_loadable_modules = ["transformer"] transformer_name = TRANSFORMER_NAME @classmethod @@ -5273,10 +5273,13 @@ def load_lora_weights( load_into_transformer_2 = kwargs.pop("load_into_transformer_2", False) if load_into_transformer_2: if not hasattr(self, "transformer_2"): - raise ValueError( - "Cannot load LoRA into transformer_2: transformer_2 is not available for this model" + raise AttributeError( + f"'{type(self).__name__}' object has no attribute transformer_2" + "Note that Wan2.1 models do not have a transformer_2 component." "Ensure the model has a transformer_2 component before setting load_into_transformer_2=True." ) + if "transformer_2" not in self._lora_loadable_modules: + self._lora_loadable_modules.append("transformer_2") self.load_lora_into_transformer( state_dict, transformer=self.transformer_2, From 4c425e2d2159c01be247ca4377593d2640d9ef34 Mon Sep 17 00:00:00 2001 From: linoy Date: Wed, 13 Aug 2025 13:40:29 +0000 Subject: [PATCH 13/19] fix copies --- src/diffusers/loaders/lora_pipeline.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index 735a52958995..7461143ad5f1 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -5693,10 +5693,13 @@ def load_lora_weights( load_into_transformer_2 = kwargs.pop("load_into_transformer_2", False) if load_into_transformer_2: if not hasattr(self, "transformer_2"): - raise ValueError( - "Cannot load LoRA into transformer_2: transformer_2 is not available for this model" + raise AttributeError( + f"'{type(self).__name__}' object has no attribute transformer_2" + "Note that Wan2.1 models do not have a transformer_2 component." "Ensure the model has a transformer_2 component before setting load_into_transformer_2=True." ) + if "transformer_2" not in self._lora_loadable_modules: + self._lora_loadable_modules.append("transformer_2") self.load_lora_into_transformer( state_dict, transformer=self.transformer_2, From 386cf1c974c719e7073e67c3049f2f16fe686723 Mon Sep 17 00:00:00 2001 From: Linoy Tsaban <57615435+linoytsaban@users.noreply.github.com> Date: Mon, 18 Aug 2025 21:29:10 +0300 Subject: [PATCH 14/19] Update src/diffusers/loaders/lora_pipeline.py Co-authored-by: Sayak Paul --- src/diffusers/loaders/lora_pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index 7461143ad5f1..0c952ef2ce00 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -5695,8 +5695,8 @@ def load_lora_weights( if not hasattr(self, "transformer_2"): raise AttributeError( f"'{type(self).__name__}' object has no attribute transformer_2" - "Note that Wan2.1 models do not have a transformer_2 component." - "Ensure the model has a transformer_2 component before setting load_into_transformer_2=True." + "Note that Wan2.1 models do not have a `transformer_2` component." + "Ensure the model has a `transformer_2` component before setting load_into_transformer_2=True." ) if "transformer_2" not in self._lora_loadable_modules: self._lora_loadable_modules.append("transformer_2") From 2a5b07da12fe0e25e1662419c06a293779b40e7d Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Mon, 18 Aug 2025 21:31:53 +0300 Subject: [PATCH 15/19] revert change --- src/diffusers/loaders/lora_pipeline.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index 0c952ef2ce00..124c4c943aec 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -5065,7 +5065,7 @@ class WanLoraLoaderMixin(LoraBaseMixin): Load LoRA layers into [`WanTransformer3DModel`]. Specific to [`WanPipeline`] and `[WanImageToVideoPipeline`]. """ - _lora_loadable_modules = ["transformer"] + _lora_loadable_modules = ["transformer", "transformer_2"] transformer_name = TRANSFORMER_NAME @classmethod @@ -5278,8 +5278,6 @@ def load_lora_weights( "Note that Wan2.1 models do not have a transformer_2 component." "Ensure the model has a transformer_2 component before setting load_into_transformer_2=True." ) - if "transformer_2" not in self._lora_loadable_modules: - self._lora_loadable_modules.append("transformer_2") self.load_lora_into_transformer( state_dict, transformer=self.transformer_2, From f1f1f3342d10b856a234a44fc6965a8135ca24f3 Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Mon, 18 Aug 2025 21:33:51 +0300 Subject: [PATCH 16/19] revert change --- src/diffusers/loaders/lora_pipeline.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index 604e91b43b85..66a97998a4b7 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -5696,8 +5696,6 @@ def load_lora_weights( "Note that Wan2.1 models do not have a `transformer_2` component." "Ensure the model has a `transformer_2` component before setting load_into_transformer_2=True." ) - if "transformer_2" not in self._lora_loadable_modules: - self._lora_loadable_modules.append("transformer_2") self.load_lora_into_transformer( state_dict, transformer=self.transformer_2, From d83a5924bb4271b911b869aba8a3b1e1dfe3e140 Mon Sep 17 00:00:00 2001 From: linoy Date: Mon, 18 Aug 2025 19:10:04 +0000 Subject: [PATCH 17/19] fix copies --- src/diffusers/loaders/lora_pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index 66a97998a4b7..572ace472f1d 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -5693,8 +5693,8 @@ def load_lora_weights( if not hasattr(self, "transformer_2"): raise AttributeError( f"'{type(self).__name__}' object has no attribute transformer_2" - "Note that Wan2.1 models do not have a `transformer_2` component." - "Ensure the model has a `transformer_2` component before setting load_into_transformer_2=True." + "Note that Wan2.1 models do not have a transformer_2 component." + "Ensure the model has a transformer_2 component before setting load_into_transformer_2=True." ) self.load_lora_into_transformer( state_dict, From ce5be55a9769c19cbc6b5aa43ec85bb6f279bf51 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 19 Aug 2025 06:55:55 +0530 Subject: [PATCH 18/19] up --- docs/source/en/api/pipelines/wan.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/en/api/pipelines/wan.md b/docs/source/en/api/pipelines/wan.md index e46aa55ad82a..b9c5990f2435 100644 --- a/docs/source/en/api/pipelines/wan.md +++ b/docs/source/en/api/pipelines/wan.md @@ -333,6 +333,8 @@ The general rule of thumb to keep in mind when preparing inputs for the VACE pip - Wan 2.1 and 2.2 support using [LightX2V LoRAs](https://huggingface.co/Kijai/WanVideo_comfy/tree/main/Lightx2v) to speed up inference. Using them on Wan 2.2 is slightly more involed. Refer to [this code snippet](https://github.com/huggingface/diffusers/pull/12040#issuecomment-3144185272) to learn more. +- Wan 2.2 has two denoisers. By default, LoRAs are only loaded into the first denoiser. One can set `load_into_transformer_2=True` to load LoRAs into the second denoiser. Refer to [this](https://github.com/huggingface/diffusers/pull/12074#issue-3292620048) and [this](https://github.com/huggingface/diffusers/pull/12074#issuecomment-3155896144) examples to learn more. + ## WanPipeline [[autodoc]] WanPipeline From 0559eac932469d155835c7805ad37f012b13368e Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 19 Aug 2025 07:48:43 +0530 Subject: [PATCH 19/19] fix --- src/diffusers/loaders/lora_base.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/diffusers/loaders/lora_base.py b/src/diffusers/loaders/lora_base.py index 3089086d5478..d18c82df4f62 100644 --- a/src/diffusers/loaders/lora_base.py +++ b/src/diffusers/loaders/lora_base.py @@ -754,7 +754,11 @@ def set_adapters( # Decompose weights into weights for denoiser and text encoders. _component_adapter_weights = {} for component in self._lora_loadable_modules: - model = getattr(self, component) + model = getattr(self, component, None) + # To guard for cases like Wan. In Wan2.1 and WanVace, we have a single denoiser. + # Whereas in Wan 2.2, we have two denoisers. + if model is None: + continue for adapter_name, weights in zip(adapter_names, adapter_weights): if isinstance(weights, dict):