File tree Expand file tree Collapse file tree 1 file changed +2
-2
lines changed
Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Original file line number Diff line number Diff line change 4545from .configuration_doge import DogeConfig
4646
4747try :
48- from flash_sparse_attn .integrations .flash_sparse_attention import flash_dynamic_mask_attention_forward
48+ from flash_sparse_attn .integrations .flash_sparse_attention import flash_sparse_attention_forward
4949except ImportError :
5050 print ("Please install flash_sparse_attn to use this model: pip install flash-sparse-attn" )
5151
@@ -220,7 +220,7 @@ def forward(
220220 # original formula is exp(A * softplus(delta V)), but for numerical stability, it is changed to A * softplus(delta V)
221221 attn_bias = (self .A * F .softplus (dt_states )).transpose (- 1 , - 2 ).unsqueeze (- 2 ).to (hidden_states .dtype )
222222
223- attention_interface : Callable = flash_dynamic_mask_attention_forward
223+ attention_interface : Callable = flash_sparse_attention_forward
224224
225225 attn_output , attn_weights = attention_interface (
226226 self ,
You can’t perform that action at this time.
0 commit comments