refactor: update function calls to use unified 'scan' operation

yoyolicoris · yoyolicoris · commit a4fd5359d8ee · 2025-05-03T23:26:55.000+01:00
diff --git a/torchlpc/core.py b/torchlpc/core.py
@@ -162,7 +162,7 @@ def forward(x: torch.Tensor, A: torch.Tensor, zi: torch.Tensor) -> torch.Tensor:
         if x.is_cuda:
             y = lpc_cuda(x.detach(), A.detach(), zi.detach())
         elif EXTENSION_LOADED:
-            y = torch.ops.torchlpc.lpc_cpu(x, A, zi)
+            y = torch.ops.torchlpc.lpc(x, A, zi)
         else:
             warnings.warn(
                 "Cannot find custom extension. Falling back to Numba implementation which will be deprecated in v1.0."
diff --git a/torchlpc/recurrence.py b/torchlpc/recurrence.py
@@ -19,22 +19,25 @@ def forward(
         n_dims, n_steps = decay.shape
         if decay.is_cuda:
             if n_dims * WARPSIZE < n_steps:
-                out = torch.empty_like(impulse)
-                compute_linear_recurrence(
-                    cuda.as_cuda_array(decay.detach()),
-                    cuda.as_cuda_array(impulse.detach()),
-                    cuda.as_cuda_array(initial_state.detach()),
-                    cuda.as_cuda_array(out),
-                    n_dims,
-                    n_steps,
-                )
+                if EXTENSION_LOADED:
+                    out = torch.ops.torchlpc.scan(impulse, decay, initial_state)
+                else:
+                    out = torch.empty_like(impulse)
+                    compute_linear_recurrence(
+                        cuda.as_cuda_array(decay.detach()),
+                        cuda.as_cuda_array(impulse.detach()),
+                        cuda.as_cuda_array(initial_state.detach()),
+                        cuda.as_cuda_array(out),
+                        n_dims,
+                        n_steps,
+                    )
             else:
                 out = lpc_cuda(impulse, -decay.unsqueeze(2), initial_state.unsqueeze(1))
         else:
             num_threads = torch.get_num_threads()
             # This is just a rough estimation of the computational cost
             if EXTENSION_LOADED and min(n_dims, num_threads) < num_threads / 3:
-                out = torch.ops.torchlpc.scan_cpu(impulse, decay, initial_state)
+                out = torch.ops.torchlpc.scan(impulse, decay, initial_state)
             else:
                 out = torch.from_numpy(
                     lpc_np(