Skip to content

Commit 15cb80a

Browse files
committed
Update base image and CUDA arch list for new hardware and new cluster, remove flash-attn installation as vllm has built-in installation, remove flash-infer as there is a torch version conflict with latest vllm
1 parent aa7800f commit 15cb80a

File tree

1 file changed

+3
-7
lines changed

1 file changed

+3
-7
lines changed

Dockerfile

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
1-
FROM nvidia/cuda:12.4.1-devel-ubuntu20.04
1+
FROM nvidia/cuda:12.6.3-cudnn-devel-ubuntu22.04
22

33
# Non-interactive apt-get commands
44
ARG DEBIAN_FRONTEND=noninteractive
55

66
# No GPUs visible during build
77
ARG CUDA_VISIBLE_DEVICES=none
88

9-
# Specify CUDA architectures -> 7.5: RTX 6000 & T4, 8.0: A100, 8.6+PTX
10-
ARG TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6+PTX"
9+
# Specify CUDA architectures -> 7.5: Quadro RTX 6000 & T4, 8.0: A100, 8.6: A40, 8.9: L40S, 9.0: H100
10+
ARG TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;8.9;9.0+PTX"
1111

1212
# Set the Python version
1313
ARG PYTHON_VERSION=3.10.12
@@ -41,10 +41,6 @@ COPY . /vec-inf
4141

4242
# Install project dependencies with build requirements
4343
RUN PIP_INDEX_URL="https://download.pytorch.org/whl/cu121" uv pip install --system -e .[dev]
44-
# Install FlashAttention
45-
RUN python3.10 -m pip install flash-attn --no-build-isolation
46-
# Install FlashInfer
47-
RUN python3.10 -m pip install flashinfer-python -i https://flashinfer.ai/whl/cu124/torch2.6/
4844

4945
# Final configuration
5046
RUN mkdir -p /vec-inf/nccl && \

0 commit comments

Comments
 (0)