File tree Expand file tree Collapse file tree 2 files changed +2
-5
lines changed
server/text_generation_server/layers/attention Expand file tree Collapse file tree 2 files changed +2
-5
lines changed Original file line number Diff line number Diff line change @@ -235,7 +235,6 @@ def attention(
235235 paged_kv_cache = (kv_cache .key , kv_cache .value ),
236236 logits_soft_cap = softcap ,
237237 sm_scale = softmax_scale ,
238- window_left = window_size_left ,
239238 k_scale = kv_scales .key_scale_cpu if can_scale else 1.0 ,
240239 v_scale = kv_scales .value_scale_cpu if can_scale else 1.0 ,
241240 )
Original file line number Diff line number Diff line change @@ -84,7 +84,7 @@ def use_prefill_with_paged_kv_state(
8484
8585 token = prefill_with_paged_kv_state .set (state )
8686 try :
87- state .begin_forward (
87+ state .plan (
8888 qo_indptr = cu_seqlens ,
8989 paged_kv_indptr = indptr ,
9090 paged_kv_indices = block_tables ,
@@ -99,7 +99,6 @@ def use_prefill_with_paged_kv_state(
9999 )
100100 yield
101101 finally :
102- state .end_forward ()
103102 if token is not None :
104103 prefill_with_paged_kv_state .reset (token )
105104
@@ -200,7 +199,7 @@ def use_decode_state(
200199 token = decode_state .set (state )
201200
202201 try :
203- state .begin_forward (
202+ state .plan (
204203 indptr = indptr ,
205204 indices = block_tables ,
206205 last_page_len = last_page_len ,
@@ -214,6 +213,5 @@ def use_decode_state(
214213 )
215214 yield
216215 finally :
217- state .end_forward ()
218216 if token is not None :
219217 decode_state .reset (token )
You can’t perform that action at this time.
0 commit comments