Skip to content

Commit ac497ce

Browse files
author
Raymond Li
committed
move setting of TORCH_CUDA_ARCH_LIST
1 parent 48c8046 commit ac497ce

File tree

3 files changed

+9
-11
lines changed

3 files changed

+9
-11
lines changed

megatron/fused_kernels/__init__.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,15 @@
1+
import os
12
import torch
23

34

5+
# Setting this param to a list has a problem of generating different
6+
# compilation commands (with diferent order of architectures) and
7+
# leading to recompilation of fused kernels. Set it to empty string
8+
# to avoid recompilation and assign arch flags explicity in
9+
# extra_cuda_cflags below
10+
os.environ["TORCH_CUDA_ARCH_LIST"] = ""
11+
12+
413
def load(args):
514
if torch.version.hip is None:
615
print("running on CUDA devices")

megatron/fused_kernels/cuda/__init__.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,6 @@
66
from torch.utils import cpp_extension
77
from megatron.fused_kernels.utils import _create_build_dir
88

9-
# Do not override TORCH_CUDA_ARCH_LIST to allow for pre-compilation in Dockerfile
10-
# os.environ["TORCH_CUDA_ARCH_LIST"] = ""
11-
129

1310
def load(args):
1411

megatron/fused_kernels/rocm/__init__.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,6 @@
1919
from megatron.fused_kernels.utils import _create_build_dir
2020

2121

22-
# Setting this param to a list has a problem of generating different
23-
# compilation commands (with diferent order of architectures) and
24-
# leading to recompilation of fused kernels. Set it to empty string
25-
# to avoid recompilation and assign arch flags explicity in
26-
# extra_cuda_cflags below
27-
os.environ["TORCH_CUDA_ARCH_LIST"] = ""
28-
29-
3022
def load(args):
3123

3224
# Build path

0 commit comments

Comments
 (0)