Enables compilation for flex attention forward

LoserCheems · LoserCheems · commit 43adc40842d4 · 2025-08-07T22:43:56.000+08:00
Activates the compile flag to improve performance through kernel optimization during flex attention computation.
diff --git a/flash_dmattn/flash_dmattn_flex.py b/flash_dmattn/flash_dmattn_flex.py
@@ -37,7 +37,7 @@ def causal_mask_mod(batch_idx, head_idx, q_idx, kv_idx):
         Q_LEN=query.shape[2],
         KV_LEN=key.shape[2],
         device=query.device,
-        _compile=False,
+        _compile=True,
     )
 
     kernel_options = {

Original file line number	Diff line number	Diff line change
`@@ -37,7 +37,7 @@ def causal_mask_mod(batch_idx, head_idx, q_idx, kv_idx):`
`37`	`37`	`Q_LEN=query.shape[2],`
`38`	`38`	`KV_LEN=key.shape[2],`
`39`	`39`	`device=query.device,`
`40`		`- _compile=False,`
	`40`	`+ _compile=True,`
`41`	`41`	`)`
`42`	`42`
`43`	`43`	`kernel_options = {`