Skip to content

Commit d082732

Browse files
committed
Split vllm and sglang dependencies into 2 groups and 2 docker images
1 parent 5cb259e commit d082732

File tree

5 files changed

+2948
-1193
lines changed

5 files changed

+2948
-1193
lines changed

.github/workflows/docker.yml

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,31 +7,36 @@ on:
77
branches:
88
- main
99
paths:
10-
- Dockerfile
10+
- vllm.Dockerfile
11+
- sglang.Dockerfile
1112
- .github/workflows/docker.yml
1213
- uv.lock
1314
pull_request:
1415
branches:
1516
- main
1617
- f/sglang-support
1718
paths:
18-
- Dockerfile
19+
- vllm.Dockerfile
20+
- sglang.Dockerfile
1921
- .github/workflows/docker.yml
2022
- uv.lock
2123

2224
jobs:
2325
push_to_registry:
24-
name: Push Docker image to Docker Hub
26+
name: Build and push Docker images
2527
runs-on:
2628
- ubuntu-latest
29+
strategy:
30+
matrix:
31+
backend: [vllm, sglang]
2732
steps:
2833
- name: Checkout repository
2934
uses: actions/checkout@v5.0.0
3035

31-
- name: Extract vLLM version
32-
id: vllm-version
36+
- name: Extract backend version
37+
id: backend-version
3338
run: |
34-
VERSION=$(grep -A 1 'name = "vllm"' uv.lock | grep version | cut -d '"' -f 2)
39+
VERSION=$(grep -A 1 "name = \"${{ matrix.backend }}\"" uv.lock | grep version | cut -d '"' -f 2)
3540
echo "version=$VERSION" >> $GITHUB_OUTPUT
3641
3742
- name: Maximize build space
@@ -65,15 +70,16 @@ jobs:
6570
id: meta
6671
uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893
6772
with:
68-
images: vectorinstitute/vector-inference
73+
images: vectorinstitute/vector-inference-${{ matrix.backend }}
6974

7075
- name: Build and push Docker image
7176
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83
7277
with:
7378
context: .
74-
file: ./Dockerfile
79+
file: ./${{ matrix.backend }}.Dockerfile
7580
push: true
7681
tags: |
7782
${{ steps.meta.outputs.tags }}
78-
vectorinstitute/vector-inference:${{ steps.vllm-version.outputs.version }}
83+
vectorinstitute/vector-inference-${{ matrix.backend }}:${{ steps.backend-version.outputs.version }}
84+
vectorinstitute/vector-inference-${{ matrix.backend }}:latest
7985
labels: ${{ steps.meta.outputs.labels }}

pyproject.toml

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,19 +36,38 @@ docs = [
3636
"pymdown-extensions>=10.7.1",
3737
"mike>=2.0.0",
3838
]
39-
40-
[project.optional-dependencies]
41-
dev = [
39+
# Common inference dependencies (shared by both backends)
40+
inference = [
4241
"xgrammar>=0.1.11",
4342
"torch>=2.7.0",
44-
"vllm>=0.10.0",
45-
"ray[default]>=2.50.0",
4643
"cupy-cuda12x>=12.3.0",
4744
"flashinfer-python>=0.4.0",
48-
"sglang>=0.5.0",
49-
"orjson>=3.11.0",
45+
]
46+
47+
[project.optional-dependencies]
48+
# vLLM inference backend (conflicts with sglang due to dependency version conflicts)
49+
# Install with: uv sync --extra vllm --group inference
50+
vllm = [
51+
"vllm>=0.11.2",
52+
"ray[default]>=2.51.0",
53+
]
54+
# SGLang inference backend (conflicts with vllm due to dependency version conflicts)
55+
# Install with: uv sync --extra sglang --group inference
56+
sglang = [
57+
"sglang>=0.5.5",
5058
"sgl_kernel>=0.3.0",
51-
"torchao>=0.14.0"
59+
"orjson>=3.11.0",
60+
"torchao>=0.9.0",
61+
]
62+
63+
[tool.uv]
64+
# Declare that vllm and sglang extras are mutually exclusive
65+
# This prevents uv from trying to resolve both together in the lock file
66+
conflicts = [
67+
[
68+
{ extra = "vllm" },
69+
{ extra = "sglang" },
70+
],
5271
]
5372

5473
[project.scripts]

sglang.Dockerfile

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
FROM nvidia/cuda:12.8.1-cudnn-devel-ubuntu24.04
2+
3+
# Non-interactive apt-get commands
4+
ARG DEBIAN_FRONTEND=noninteractive
5+
6+
# No GPUs visible during build
7+
ARG CUDA_VISIBLE_DEVICES=none
8+
9+
# Specify CUDA architectures -> 7.5: Quadro RTX 6000 & T4, 8.0: A100, 8.6: A40, 8.9: L40S, 9.0: H100
10+
ARG TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;8.9;9.0+PTX"
11+
12+
# Set the Python version
13+
ARG PYTHON_VERSION=3.12.12
14+
15+
# Install system dependencies
16+
RUN apt-get update && apt-get install -y \
17+
wget build-essential libssl-dev zlib1g-dev libbz2-dev \
18+
libreadline-dev libsqlite3-dev libffi-dev libncursesw5-dev \
19+
xz-utils tk-dev libxml2-dev libxmlsec1-dev liblzma-dev libnuma1 \
20+
git vim \
21+
&& rm -rf /var/lib/apt/lists/*
22+
23+
# Install Python
24+
RUN wget https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz && \
25+
tar -xzf Python-$PYTHON_VERSION.tgz && \
26+
cd Python-$PYTHON_VERSION && \
27+
./configure --enable-optimizations && \
28+
make -j$(nproc) && \
29+
make altinstall && \
30+
cd .. && \
31+
rm -rf Python-$PYTHON_VERSION.tgz Python-$PYTHON_VERSION
32+
33+
# Install pip and core Python tools
34+
RUN wget https://bootstrap.pypa.io/get-pip.py && \
35+
python3.12 get-pip.py && \
36+
rm get-pip.py && \
37+
python3.12 -m pip install --upgrade pip setuptools wheel uv
38+
39+
# Install RDMA support
40+
RUN apt-get update && apt-get install -y \
41+
libibverbs1 libibverbs-dev ibverbs-utils \
42+
librdmacm1 librdmacm-dev rdmacm-utils \
43+
rdma-core ibverbs-providers infiniband-diags perftest \
44+
&& rm -rf /var/lib/apt/lists/*
45+
46+
# Set up RDMA environment (these will persist in the final container)
47+
ENV LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH"
48+
ENV NCCL_IB_DISABLE=0
49+
ENV NCCL_SOCKET_IFNAME="^lo,docker0"
50+
ENV NCCL_NET_GDR_LEVEL=PHB
51+
ENV NCCL_IB_TIMEOUT=22
52+
ENV NCCL_IB_RETRY_CNT=7
53+
ENV NCCL_DEBUG=INFO
54+
55+
# Set up project
56+
WORKDIR /vec-inf
57+
COPY . /vec-inf
58+
59+
# Install project dependencies with sglang backend and inference group
60+
# Use --no-cache to prevent uv from storing both downloaded and extracted packages
61+
RUN uv pip install --system -e .[sglang] --group inference --prerelease=allow --no-cache && \
62+
rm -rf /root/.cache/uv /tmp/*
63+
64+
# Install a single, system NCCL (from NVIDIA CUDA repo in base image)
65+
RUN apt-get update && apt-get install -y --allow-change-held-packages\
66+
libnccl2 libnccl-dev \
67+
&& rm -rf /var/lib/apt/lists/*
68+
69+
# Set the default command to start an interactive shell
70+
CMD ["bash"]

0 commit comments

Comments
 (0)