Skip to content

Commit 286e5aa

Browse files
committed
valid suffix decoding args
1 parent 282687a commit 286e5aa

File tree

1 file changed

+2
-0
lines changed

1 file changed

+2
-0
lines changed

vllm_ascend/worker/model_runner_v1.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1952,6 +1952,8 @@ def _build_attn_state(self, num_reqs, num_scheduled_tokens,
19521952
if self.drafter and (self.drafter.name == SpecDcodeType.EAGLE
19531953
or self.drafter.name == SpecDcodeType.EAGLE3):
19541954
attn_state = AscendAttentionState.ChunkedPrefill
1955+
elif self.drafter and self.drafter.name in (SpecDcodeType.NGRAM, SpecDcodeType.SUFFIX):
1956+
attn_state = AscendAttentionState.DecodeOnly
19551957
else:
19561958
attn_state = AscendAttentionState.SpecDecoding
19571959
# splitfuse

0 commit comments

Comments
 (0)