Skip to content

Commit 5cb259e

Browse files
committed
Merge branch 'f/sglang-support' of https://github.com/VectorInstitute/vector-inference into f/sglang-support
2 parents c73e163 + f94dd84 commit 5cb259e

File tree

5 files changed

+50
-9
lines changed

5 files changed

+50
-9
lines changed

.github/workflows/docker.yml

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ on:
1313
pull_request:
1414
branches:
1515
- main
16+
- f/sglang-support
1617
paths:
1718
- Dockerfile
1819
- .github/workflows/docker.yml
@@ -22,8 +23,7 @@ jobs:
2223
push_to_registry:
2324
name: Push Docker image to Docker Hub
2425
runs-on:
25-
- self-hosted
26-
- docker
26+
- ubuntu-latest
2727
steps:
2828
- name: Checkout repository
2929
uses: actions/checkout@v5.0.0
@@ -34,6 +34,24 @@ jobs:
3434
VERSION=$(grep -A 1 'name = "vllm"' uv.lock | grep version | cut -d '"' -f 2)
3535
echo "version=$VERSION" >> $GITHUB_OUTPUT
3636
37+
- name: Maximize build space
38+
run: |
39+
echo "Disk space before cleanup:"
40+
df -h
41+
# Remove unnecessary pre-installed software
42+
sudo rm -rf /usr/share/dotnet
43+
sudo rm -rf /usr/local/lib/android
44+
sudo rm -rf /opt/ghc
45+
sudo rm -rf /opt/hostedtoolcache/CodeQL
46+
sudo rm -rf /usr/local/share/boost
47+
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
48+
# Clean apt cache
49+
sudo apt-get clean
50+
# Remove docker images
51+
docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true
52+
echo "Disk space after cleanup:"
53+
df -h
54+
3755
- name: Set up Docker Buildx
3856
uses: docker/setup-buildx-action@v3
3957

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ repos:
1717
- id: check-toml
1818

1919
- repo: https://github.com/astral-sh/ruff-pre-commit
20-
rev: 'v0.14.4'
20+
rev: 'v0.14.5'
2121
hooks:
2222
- id: ruff
2323
args: [--fix, --exit-non-zero-on-fix]

Dockerfile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,9 @@ WORKDIR /vec-inf
5757
COPY . /vec-inf
5858

5959
# Install project dependencies with build requirements
60-
RUN uv pip install --system -e .[dev] --prerelease=allow
60+
# Use --no-cache to prevent uv from storing both downloaded and extracted packages
61+
RUN uv pip install --system -e .[dev] --prerelease=allow --no-cache && \
62+
rm -rf /root/.cache/uv /tmp/*
6163

6264
# Install a single, system NCCL (from NVIDIA CUDA repo in base image)
6365
RUN apt-get update && apt-get install -y --allow-change-held-packages\

vec_inf/config/models.yaml

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1059,12 +1059,24 @@ models:
10591059
model_family: gpt-oss
10601060
model_variant: 120b
10611061
model_type: LLM
1062-
gpus_per_node: 4
1063-
num_nodes: 2
1062+
gpus_per_node: 2
1063+
num_nodes: 1
10641064
vocab_size: 201088
10651065
time: 08:00:00
10661066
resource_type: l40s
10671067
vllm_args:
1068+
--tensor-parallel-size: 2
1069+
--max-model-len: 32768
1070+
Llama-4-Maverick-17B-128E-Instruct:
1071+
model_family: Llama-4
1072+
model_variant: Maverick-17B-128E-Instruct
1073+
model_type: VLM
1074+
gpus_per_node: 4
1075+
num_nodes: 8
1076+
vocab_size: 202048
1077+
time: 03:00:00
1078+
resource_type: l40s
1079+
vllm_args:
1080+
--max-model-len: 16384
10681081
--tensor-parallel-size: 4
1069-
--pipeline-parallel-size: 2
1070-
--max-model-len: 40960
1082+
--pipeline-parallel-size: 8

vec_inf/find_port.sh

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,16 @@ find_available_port() {
2828
local base_port=$2
2929
local max_port=$3
3030

31-
for ((port=base_port; port<=max_port; port++)); do
31+
# Generate shuffled list of ports; fallback to sequential if shuf not present
32+
if command -v shuf >/dev/null 2>&1; then
33+
local port_list
34+
port_list=$(shuf -i "${base_port}-${max_port}")
35+
else
36+
local port_list
37+
port_list=$(seq $base_port $max_port)
38+
fi
39+
40+
for port in $port_list; do
3241
if is_port_available $ip $port; then
3342
echo $port
3443
return

0 commit comments

Comments
 (0)