move setting of TORCH_CUDA_ARCH_LIST

Raymond Li · Raymond Li · commit ac497ce61581 · 2023-06-13T21:46:04.000Z
diff --git a/megatron/fused_kernels/__init__.py b/megatron/fused_kernels/__init__.py
@@ -1,6 +1,15 @@
+import os
 import torch
 
 
+# Setting this param to a list has a problem of generating different
+# compilation commands (with diferent order of architectures) and
+# leading to recompilation of fused kernels. Set it to empty string
+# to avoid recompilation and assign arch flags explicity in
+# extra_cuda_cflags below
+os.environ["TORCH_CUDA_ARCH_LIST"] = ""
+
+
 def load(args):
     if torch.version.hip is None:
         print("running on CUDA devices")
diff --git a/megatron/fused_kernels/cuda/__init__.py b/megatron/fused_kernels/cuda/__init__.py
@@ -6,9 +6,6 @@
 from torch.utils import cpp_extension
 from megatron.fused_kernels.utils import _create_build_dir
 
-# Do not override TORCH_CUDA_ARCH_LIST to allow for pre-compilation in Dockerfile
-# os.environ["TORCH_CUDA_ARCH_LIST"] = ""
-
 
 def load(args):
 
diff --git a/megatron/fused_kernels/rocm/__init__.py b/megatron/fused_kernels/rocm/__init__.py
@@ -19,14 +19,6 @@
 from megatron.fused_kernels.utils import _create_build_dir
 
 
-# Setting this param to a list has a problem of generating different
-# compilation commands (with diferent order of architectures) and
-# leading to recompilation of fused kernels. Set it to empty string
-# to avoid recompilation and assign arch flags explicity in
-# extra_cuda_cflags below
-os.environ["TORCH_CUDA_ARCH_LIST"] = ""
-
-
 def load(args):
 
     # Build path