Skip to content

Commit ba764c2

Browse files
authored
Merge branch 'develop' into c/extend-python-version-test-coverage
2 parents 1ed3d2f + 95ea1e0 commit ba764c2

26 files changed

+4753
-1317
lines changed

.github/workflows/code_checks.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ jobs:
3030
steps:
3131
- uses: actions/checkout@v4.2.2
3232
- name: Install uv
33-
uses: astral-sh/setup-uv@v5.2.2
33+
uses: astral-sh/setup-uv@v5.3.1
3434
with:
3535
# Install a specific version of uv.
3636
version: "0.5.21"
@@ -46,6 +46,6 @@ jobs:
4646
source .venv/bin/activate
4747
pre-commit run --all-files
4848
- name: pip-audit (gh-action-pip-audit)
49-
uses: pypa/gh-action-pip-audit@v1.0.8
49+
uses: pypa/gh-action-pip-audit@v1.1.0
5050
with:
5151
virtual-environment: .venv/

.github/workflows/docker.yml

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
name: docker
2+
3+
on:
4+
release:
5+
types: [published]
6+
push:
7+
branches:
8+
- main
9+
paths:
10+
- Dockerfile
11+
- .github/workflows/docker.yml
12+
pull_request:
13+
branches:
14+
- main
15+
- develop
16+
paths:
17+
- Dockerfile
18+
- .github/workflows/docker.yml
19+
20+
jobs:
21+
push_to_registry:
22+
name: Push Docker image to Docker Hub
23+
runs-on: ubuntu-latest
24+
steps:
25+
- name: Checkout repository
26+
uses: actions/checkout@v4.2.2
27+
28+
- name: Extract vLLM version
29+
id: vllm-version
30+
run: |
31+
VERSION=$(grep -A 1 'name = "vllm"' uv.lock | grep version | cut -d '"' -f 2)
32+
echo "version=$VERSION" >> $GITHUB_OUTPUT
33+
34+
- name: Log in to Docker Hub
35+
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772
36+
with:
37+
username: ${{ secrets.DOCKER_USERNAME }}
38+
password: ${{ secrets.DOCKER_PASSWORD }}
39+
40+
- name: Extract metadata (tags, labels) for Docker
41+
id: meta
42+
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804
43+
with:
44+
images: vectorinstitute/vector-inference
45+
46+
- name: Build and push Docker image
47+
uses: docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4
48+
with:
49+
context: .
50+
file: ./Dockerfile
51+
push: true
52+
tags: |
53+
${{ steps.meta.outputs.tags }}
54+
vectorinstitute/vector-inference:${{ steps.vllm-version.outputs.version }}
55+
labels: ${{ steps.meta.outputs.labels }}

.github/workflows/docs_build.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,18 +27,18 @@ jobs:
2727
- uses: actions/checkout@v4.2.2
2828

2929
- name: Install uv
30-
uses: astral-sh/setup-uv@4db96194c378173c656ce18a155ffc14a9fc4355
30+
uses: astral-sh/setup-uv@f94ec6bedd8674c4426838e6b50417d36b6ab231
3131
with:
3232
version: "0.5.21"
3333
enable-cache: true
3434

3535
- name: "Set up Python"
36-
uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38
36+
uses: actions/setup-python@8039c45ed9a312fba91f3399cd0605ba2ebfe93c
3737
with:
3838
python-version-file: ".python-version"
3939

4040
- name: Install the project
41-
run: uv sync --all-extras --all-groups
41+
run: uv sync --dev --group docs
4242

4343
- name: Build docs
4444
run: cd docs && rm -rf source/reference/api/_autosummary && uv run make html

.github/workflows/docs_deploy.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,19 +31,19 @@ jobs:
3131
submodules: 'true'
3232

3333
- name: Install uv
34-
uses: astral-sh/setup-uv@4db96194c378173c656ce18a155ffc14a9fc4355
34+
uses: astral-sh/setup-uv@f94ec6bedd8674c4426838e6b50417d36b6ab231
3535
with:
3636
# Install a specific version of uv.
3737
version: "0.5.21"
3838
enable-cache: true
3939

4040
- name: "Set up Python"
41-
uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38
41+
uses: actions/setup-python@8039c45ed9a312fba91f3399cd0605ba2ebfe93c
4242
with:
4343
python-version-file: ".python-version"
4444

4545
- name: Install the project
46-
run: uv sync --all-extras --all-groups
46+
run: uv sync --dev --group docs
4747

4848
- name: Build docs
4949
run: |
@@ -53,7 +53,7 @@ jobs:
5353
touch build/html/.nojekyll
5454
5555
- name: Deploy to Github pages
56-
uses: JamesIves/github-pages-deploy-action@15de0f09300eea763baee31dff6c6184995c5f6a
56+
uses: JamesIves/github-pages-deploy-action@6c2d9db40f9296374acc17b90404b6e8864128c8
5757
with:
5858
branch: github_pages
5959
folder: docs/build/html

.github/workflows/publish.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,16 @@ jobs:
1212
run: |
1313
sudo apt-get update
1414
sudo apt-get install libcurl4-openssl-dev libssl-dev
15-
- uses: actions/checkout@v4.1.1
15+
- uses: actions/checkout@v4.2.2
1616
- name: Install poetry
1717
run: python3 -m pip install --upgrade pip && python3 -m pip install poetry
18-
- uses: actions/setup-python@v5.0.0
18+
- uses: actions/setup-python@v5.4.0
1919
with:
2020
python-version: '3.10'
2121
- name: Build package
2222
run: poetry build
2323
- name: Publish package
24-
uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
24+
uses: pypa/gh-action-pypi-publish@76f52bc884231f62b9a034ebfe128415bbaabdfc
2525
with:
2626
user: __token__
2727
password: ${{ secrets.PYPI_API_TOKEN }}

.github/workflows/unit_tests.yml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ jobs:
4646
- uses: actions/checkout@v4.2.2
4747

4848
- name: Install uv
49-
uses: astral-sh/setup-uv@v5.2.2
49+
uses: astral-sh/setup-uv@v5.3.1
5050
with:
5151
# Install a specific version of uv.
5252
version: "0.5.21"
@@ -58,17 +58,18 @@ jobs:
5858
python-version: ${{ matrix.python-version }}
5959

6060
- name: Install the project
61-
run: uv sync --all-extras --dev
61+
run: uv sync --dev
6262

6363
- name: Install dependencies and check code
6464
run: |
6565
uv run pytest -m "not integration_test" --cov vec_inf --cov-report=xml tests
6666
6767
# Uncomment this once this repo is configured on Codecov
6868
- name: Upload coverage to Codecov
69-
uses: codecov/codecov-action@v5.3.1
69+
uses: codecov/codecov-action@v5.4.0
7070
with:
7171
token: ${{ secrets.CODECOV_TOKEN }}
72-
slug: VectorInstitute/vec-inf
72+
file: ./coverage.xml
73+
name: codecov-umbrella
7374
fail_ci_if_error: true
7475
verbose: true

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ repos:
1616
- id: check-toml
1717

1818
- repo: https://github.com/astral-sh/ruff-pre-commit
19-
rev: 'v0.9.6'
19+
rev: 'v0.11.0'
2020
hooks:
2121
- id: ruff
2222
args: [--fix, --exit-non-zero-on-fix]

Dockerfile

Lines changed: 15 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -12,27 +12,14 @@ ARG TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6+PTX"
1212
# Set the Python version
1313
ARG PYTHON_VERSION=3.10.12
1414

15-
# Install dependencies for building Python
15+
# Install system dependencies
1616
RUN apt-get update && apt-get install -y \
17-
wget \
18-
build-essential \
19-
libssl-dev \
20-
zlib1g-dev \
21-
libbz2-dev \
22-
libreadline-dev \
23-
libsqlite3-dev \
24-
libffi-dev \
25-
libncursesw5-dev \
26-
xz-utils \
27-
tk-dev \
28-
libxml2-dev \
29-
libxmlsec1-dev \
30-
liblzma-dev \
31-
git \
32-
vim \
17+
wget build-essential libssl-dev zlib1g-dev libbz2-dev \
18+
libreadline-dev libsqlite3-dev libffi-dev libncursesw5-dev \
19+
xz-utils tk-dev libxml2-dev libxmlsec1-dev liblzma-dev git vim \
3320
&& rm -rf /var/lib/apt/lists/*
3421

35-
# Download and install Python from precompiled binaries
22+
# Install Python
3623
RUN wget https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz && \
3724
tar -xzf Python-$PYTHON_VERSION.tgz && \
3825
cd Python-$PYTHON_VERSION && \
@@ -42,38 +29,24 @@ RUN wget https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSIO
4229
cd .. && \
4330
rm -rf Python-$PYTHON_VERSION.tgz Python-$PYTHON_VERSION
4431

45-
# Download and install pip using get-pip.py
32+
# Install pip and core Python tools
4633
RUN wget https://bootstrap.pypa.io/get-pip.py && \
4734
python3.10 get-pip.py && \
48-
rm get-pip.py
35+
rm get-pip.py && \
36+
python3.10 -m pip install --upgrade pip setuptools wheel uv
4937

50-
# Ensure pip for Python 3.10 is used
51-
RUN python3.10 -m pip install --upgrade pip setuptools wheel
52-
53-
# Install Poetry using Python 3.10
54-
RUN python3.10 -m pip install poetry
55-
56-
# Don't create venv
57-
RUN poetry config virtualenvs.create false
58-
59-
# Set working directory
38+
# Set up project
6039
WORKDIR /vec-inf
61-
62-
# Copy current directory
6340
COPY . /vec-inf
6441

65-
# Update Poetry lock file if necessary
66-
RUN poetry lock
67-
68-
# Install vec-inf
69-
RUN poetry install --extras "dev"
70-
71-
# Install Flash Attention 2 backend
42+
# Install project dependencies with build requirements
43+
RUN PIP_INDEX_URL="https://download.pytorch.org/whl/cu121" uv pip install --system -e .[dev]
44+
# Install Flash Attention
7245
RUN python3.10 -m pip install flash-attn --no-build-isolation
7346

74-
# Move nccl to accessible location
75-
RUN mkdir -p /vec-inf/nccl
76-
RUN mv /root/.config/vllm/nccl/cu12/libnccl.so.2.18.1 /vec-inf/nccl/libnccl.so.2.18.1;
47+
# Final configuration
48+
RUN mkdir -p /vec-inf/nccl && \
49+
mv /root/.config/vllm/nccl/cu12/libnccl.so.2.18.1 /vec-inf/nccl/libnccl.so.2.18.1
7750

7851
# Set the default command to start an interactive shell
7952
CMD ["bash"]

README.md

Lines changed: 62 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,36 +3,90 @@
33
----------------------------------------------------
44

55
[![code checks](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml/badge.svg)](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml)
6-
[![docs](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs_build.yml/badge.svg)](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs_build.yml)
7-
[![codecov](https://codecov.io/github/VectorInstitute/vector-inference/graph/badge.svg?token=83MYFZ3UPA)](https://codecov.io/github/VectorInstitute/vector-inference)
6+
[![docs](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs_deploy.yml/badge.svg)](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs_deploy.yml)
7+
[![codecov](https://codecov.io/github/VectorInstitute/vector-inference/branch/develop/graph/badge.svg?token=NI88QSIGAC)](https://app.codecov.io/github/VectorInstitute/vector-inference/tree/develop)
88
![GitHub License](https://img.shields.io/github/license/VectorInstitute/vector-inference)
99

10-
This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using [vLLM](https://docs.vllm.ai/en/latest/). **All scripts in this repository runs natively on the Vector Institute cluster environment**. To adapt to other environments, update [`launch_server.sh`](vec_inf/launch_server.sh), [`vllm.slurm`](vec_inf/vllm.slurm), [`multinode_vllm.slurm`](vec_inf/multinode_vllm.slurm) and [`models.csv`](vec_inf/models/models.csv) accordingly.
10+
This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using [vLLM](https://docs.vllm.ai/en/latest/). **All scripts in this repository runs natively on the Vector Institute cluster environment**. To adapt to other environments, update [`launch_server.sh`](vec_inf/launch_server.sh), [`vllm.slurm`](vec_inf/vllm.slurm), [`multinode_vllm.slurm`](vec_inf/multinode_vllm.slurm) and [`models.csv`](vec_inf/config/models.yaml) accordingly.
1111

1212
## Installation
1313
If you are using the Vector cluster environment, and you don't need any customization to the inference server environment, run the following to install package:
14+
1415
```bash
1516
pip install vec-inf
1617
```
1718
Otherwise, we recommend using the provided [`Dockerfile`](Dockerfile) to set up your own environment with the package
1819

19-
## Launch an inference server
20+
## Usage
21+
2022
### `launch` command
23+
24+
The `launch` command allows users to deploy a model as a slurm job. If the job successfully launches, a URL endpoint is exposed for
25+
the user to send requests for inference.
26+
2127
We will use the Llama 3.1 model as example, to launch an OpenAI compatible inference server for Meta-Llama-3.1-8B-Instruct, run:
28+
2229
```bash
2330
vec-inf launch Meta-Llama-3.1-8B-Instruct
2431
```
2532
You should see an output like the following:
2633

2734
<img width="600" alt="launch_img" src="https://github.com/user-attachments/assets/ab658552-18b2-47e0-bf70-e539c3b898d5">
2835

29-
The model would be launched using the [default parameters](vec_inf/models/models.csv), you can override these values by providing additional parameters, use `--help` to see the full list. You can also launch your own customized model as long as the model architecture is [supported by vLLM](https://docs.vllm.ai/en/stable/models/supported_models.html), and make sure to follow the instructions below:
36+
#### Overrides
37+
38+
Models that are already supported by `vec-inf` would be launched using the [default parameters](vec_inf/config/models.yaml). You can override these values by providing additional parameters. Use `vec-inf launch --help` to see the full list of parameters that can be
39+
overriden. For example, if `qos` is to be overriden:
40+
41+
```bash
42+
vec-inf launch Meta-Llama-3.1-8B-Instruct --qos <new_qos>
43+
```
44+
45+
#### Custom models
46+
47+
You can also launch your own custom model as long as the model architecture is [supported by vLLM](https://docs.vllm.ai/en/stable/models/supported_models.html), and make sure to follow the instructions below:
3048
* Your model weights directory naming convention should follow `$MODEL_FAMILY-$MODEL_VARIANT`.
31-
* Your model weights directory should contain HF format weights.
32-
* The following launch parameters will conform to default value if not specified: `--max-num-seqs`, `--partition`, `--data-type`, `--venv`, `--log-dir`, `--model-weights-parent-dir`, `--pipeline-parallelism`, `--enforce-eager`. All other launch parameters need to be specified for custom models.
33-
* Example for setting the model weights parent directory: `--model-weights-parent-dir /h/user_name/my_weights`.
49+
* Your model weights directory should contain HuggingFace format weights.
50+
* You should create a custom configuration file for your model and specify its path via setting the environment variable `VEC_INF_CONFIG`
51+
Check the [default parameters](vec_inf/config/models.yaml) file for the format of the config file. All the parameters for the model
52+
should be specified in that config file.
3453
* For other model launch parameters you can reference the default values for similar models using the [`list` command ](#list-command).
3554

55+
Here is an example to deploy a custom [Qwen2.5-7B-Instruct-1M](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct-1M) model which is not
56+
supported in the default list of models using a user custom config. In this case, the model weights are assumed to be downloaded to
57+
a `model-weights` directory inside the user's home directory. The weights directory of the model follows the naming convention so it
58+
would be named `Qwen2.5-7B-Instruct-1M`. The following yaml file would need to be created, lets say it is named `/h/<username>/my-model-config.yaml`.
59+
60+
```yaml
61+
models:
62+
Qwen2.5-7B-Instruct-1M:
63+
model_family: Qwen2.5
64+
model_variant: 7B-Instruct-1M
65+
model_type: LLM
66+
num_gpus: 2
67+
num_nodes: 1
68+
vocab_size: 152064
69+
max_model_len: 1010000
70+
max_num_seqs: 256
71+
pipeline_parallelism: true
72+
enforce_eager: false
73+
qos: m2
74+
time: 08:00:00
75+
partition: a40
76+
data_type: auto
77+
venv: singularity
78+
log_dir: default
79+
model_weights_parent_dir: /h/<username>/model-weights
80+
```
81+
82+
You would then set the `VEC_INF_CONFIG` path using:
83+
84+
```bash
85+
export VEC_INF_CONFIG=/h/<username>/my-model-config.yaml
86+
```
87+
88+
Alternatively, you can also use launch parameters to set these values instead of using a user-defined config.
89+
3690
### `status` command
3791
You can check the inference server status by providing the Slurm job ID to the `status` command:
3892
```bash

codecov.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
codecov:
2+
branch: develop
23
require_ci_to_pass: true
34
notify:
45
after_n_builds: 2

0 commit comments

Comments
 (0)