Skip to content

Commit 4496d68

Browse files
zRzRzRzRzRzRzRIsotr0py
authored andcommitted
for glm-4.1V update (vllm-project#22000)
Cherry-pick: vllm-project@25373b6 Signed-off-by: Isotr0py <2037008807@qq.com> Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com> Co-authored-by: Isotr0py <2037008807@qq.com>
1 parent 236bb59 commit 4496d68

File tree

6 files changed

+24
-15
lines changed

6 files changed

+24
-15
lines changed

docs/models/supported_models.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,8 @@ Specified using `--task generate`.
523523
| `Gemma3ForConditionalGeneration` | Gemma 3 | T + I<sup>+</sup> | `google/gemma-3-4b-it`, `google/gemma-3-27b-it`, etc. | ✅︎ | ✅︎ | ⚠️ |
524524
| `GLM4VForCausalLM`<sup>^</sup> | GLM-4V | T + I | `THUDM/glm-4v-9b`, `THUDM/cogagent-9b-20241220` etc. | ✅︎ | ✅︎ | ✅︎ |
525525
| `Glm4vForConditionalGeneration` | GLM-4.1V-Thinking | T + I<sup>E+</sup> + V<sup>E+</sup> | `THUDM/GLM-4.1V-9B-Thinkg`, etc. | ✅︎ | ✅︎ | ✅︎ |
526-
| `Glm4MoeForCausalLM` | GLM-4.5 | T + I<sup>E+</sup> + V<sup>E+</sup> | `THUDM/GLM-4.5`, etc. | ✅︎ | ✅︎ | ✅︎ |
526+
| `Glm4MoeForCausalLM` | GLM-4.5 | T + I<sup>E+</sup> + V<sup>E+</sup> | `zai-org/GLM-4.5`, etc. | ✅︎ | ✅︎ | ✅︎ |
527+
| `Glm4v_moeForConditionalGeneration` | GLM-4.5V | T + I<sup>E+</sup> + V<sup>E+</sup> | `zai-org/GLM-4.5V-Air`, etc. | ✅︎ | ✅︎ | ✅︎ |
527528
| `GraniteSpeechForConditionalGeneration` | Granite Speech | T + A | `ibm-granite/granite-speech-3.3-8b` | ✅︎ | ✅︎ | ✅︎ |
528529
| `H2OVLChatModel` | H2OVL | T + I<sup>E+</sup> | `h2oai/h2ovl-mississippi-800m`, `h2oai/h2ovl-mississippi-2b`, etc. | | ✅︎ | ✅︎\* |
529530
| `Idefics3ForConditionalGeneration` | Idefics3 | T + I | `HuggingFaceM4/Idefics3-8B-Llama3` etc. | ✅︎ | | ✅︎ |

tests/models/registry.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -322,9 +322,10 @@ def check_available_online(
322322
"GLM4VForCausalLM": _HfExamplesInfo("THUDM/glm-4v-9b",
323323
trust_remote_code=True,
324324
hf_overrides={"architectures": ["GLM4VForCausalLM"]}), # noqa: E501
325-
"Glm4vForConditionalGeneration": _HfExamplesInfo("THUDM/GLM-4.1V-9B-Thinking", min_transformers_version="4.53"), # noqa: E501
326-
"Glm4MoeForCausalLM": _HfExamplesInfo("THUDM/GLM-4.5",
327-
min_transformers_version="4.54",
325+
"Glm4vForConditionalGeneration": _HfExamplesInfo("THUDM/GLM-4.1V-9B-Thinking"), # noqa: E501
326+
"Glm4MoeForCausalLM": _HfExamplesInfo("zai-org/GLM-4.5",
327+
min_transformers_version="4.54"), # noqa: E501
328+
"Glm4v_moeForConditionalGeneration": _HfExamplesInfo("zai-org/GLM-4.5V-Air",
328329
is_available_online=False), # noqa: E501
329330
"H2OVLChatModel": _HfExamplesInfo("h2oai/h2ovl-mississippi-800m",
330331
extras={"2b": "h2oai/h2ovl-mississippi-2b"}, # noqa: E501
@@ -431,8 +432,8 @@ def check_available_online(
431432
is_available_online=False,
432433
speculative_model="openbmb/MiniCPM-2B-sft-bf16",
433434
tokenizer="openbmb/MiniCPM-2B-sft-bf16"),
434-
"Glm4MoeMTPModel": _HfExamplesInfo("THUDM/GLM-4.5",
435-
speculative_model="THUDM/GLM-4.5",
435+
"Glm4MoeMTPModel": _HfExamplesInfo("zai-org/GLM-4.5",
436+
speculative_model="zai-org/GLM-4.5",
436437
min_transformers_version="4.54",
437438
is_available_online=False),
438439
"MiMoMTPModel": _HfExamplesInfo("XiaomiMiMo/MiMo-7B-RL",

tests/tool_use/test_glm4_moe_tool_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
pytest.skip("skip glm4_moe parser test", allow_module_level=True)
1414
# Use a common model that is likely to be available
15-
MODEL = "THUDM/GLM-4.5"
15+
MODEL = "zai-org/GLM-4.5"
1616

1717

1818
@pytest.fixture(scope="module")

vllm/model_executor/layers/rotary_embedding.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1268,7 +1268,7 @@ def get_input_positions_tensor(
12681268
audio_feature_lengths=audio_feature_lengths,
12691269
use_audio_in_video=use_audio_in_video,
12701270
)
1271-
elif "glm4v" in hf_config.model_type:
1271+
elif hf_config.model_type in ["glm4v", "glm4v_moe"]:
12721272
return cls._glm4v_get_input_positions_tensor(
12731273
input_tokens=input_tokens,
12741274
hf_config=hf_config,

vllm/model_executor/models/glm4_1v.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,7 @@
3636
import torch.nn.functional as F
3737
from einops import rearrange
3838
from transformers import BatchFeature
39-
from transformers.models.glm4v.configuration_glm4v import (Glm4vConfig,
40-
Glm4vVisionConfig)
39+
from transformers.models.glm4v.configuration_glm4v import Glm4vVisionConfig
4140
from transformers.models.glm4v.image_processing_glm4v import (
4241
Glm4vImageProcessor, smart_resize)
4342
from transformers.models.glm4v.video_processing_glm4v import (
@@ -815,7 +814,7 @@ def load_weights(self, weights: Iterable[tuple[str,
815814
class Glm4vProcessingInfo(BaseProcessingInfo):
816815

817816
def get_hf_config(self):
818-
return self.ctx.get_hf_config(Glm4vConfig)
817+
return self.ctx.get_hf_config()
819818

820819
def get_tokenizer(self):
821820
return self.ctx.tokenizer
@@ -1259,7 +1258,7 @@ class Glm4vForConditionalGeneration(nn.Module, SupportsMultiModal,
12591258

12601259
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
12611260
super().__init__()
1262-
config: Glm4vConfig = vllm_config.model_config.hf_config
1261+
config = vllm_config.model_config.hf_config
12631262
quant_config = vllm_config.quant_config
12641263
multimodal_config = vllm_config.model_config.multimodal_config
12651264

@@ -1273,11 +1272,18 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
12731272
prefix=maybe_prefix(prefix, "visual"),
12741273
)
12751274

1275+
if config.model_type == "glm4v":
1276+
architectures = ["Glm4ForCausalLM"]
1277+
elif config.model_type == "glm4v_moe":
1278+
architectures = ["Glm4MoeForCausalLM"]
1279+
else:
1280+
architectures = None
1281+
12761282
self.language_model = init_vllm_registered_model(
12771283
vllm_config=vllm_config,
1278-
prefix=maybe_prefix(prefix, ""),
1279-
architectures=["Glm4ForCausalLM"],
1280-
)
1284+
hf_config=config.text_config,
1285+
prefix=maybe_prefix(prefix, "language_model"),
1286+
architectures=architectures)
12811287

12821288
self.make_empty_intermediate_tensors = (
12831289
self.language_model.make_empty_intermediate_tensors)

vllm/model_executor/models/registry.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@
186186
"Gemma3ForConditionalGeneration": ("gemma3_mm", "Gemma3ForConditionalGeneration"), # noqa: E501
187187
"GLM4VForCausalLM": ("glm4v", "GLM4VForCausalLM"),
188188
"Glm4vForConditionalGeneration": ("glm4_1v", "Glm4vForConditionalGeneration"), # noqa: E501
189+
"Glm4v_moeForConditionalGeneration": ("glm4_1v", "Glm4vForConditionalGeneration"), # noqa: E501
189190
"GraniteSpeechForConditionalGeneration": ("granite_speech", "GraniteSpeechForConditionalGeneration"), # noqa: E501
190191
"H2OVLChatModel": ("h2ovl", "H2OVLChatModel"),
191192
"InternVLChatModel": ("internvl", "InternVLChatModel"),

0 commit comments

Comments
 (0)