Skip to content

Commit de19e7e

Browse files
authored
Moving to uv instead of poetry. (#2919)
* Moving to `uv` instead of `poetry`. More in the standard, faster, seemingly better lockfile. * Creating venv if not created. * Create the venv. * Fix ? * Fixing the test by activating the environment ? * Install system ? * Add the cli entry point. * docker install on system * Monkeying this... * `--system` is redundant. * Trying to force-include this pb folder. * TRying to check that pb is imported correctly. * Editable install necessary ? * Non editable? * Editable it is.
1 parent d61f14f commit de19e7e

File tree

9 files changed

+3536
-4208
lines changed

9 files changed

+3536
-4208
lines changed

.github/workflows/tests.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,14 @@ jobs:
4444
run: |
4545
sudo apt update
4646
sudo apt install python3.11-dev -y
47+
pip install -U pip uv
48+
uv venv
49+
source ./.venv/bin/activate
4750
make install-cpu
4851
- name: Run server tests
4952
run: |
50-
pip install pytest
53+
source ./.venv/bin/activate
54+
uv pip install pytest
5155
export HF_TOKEN=${{ secrets.HF_TOKEN }}
5256
pytest -s -vv server/tests
5357
- name: Pre-commit checks

Dockerfile

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -224,17 +224,19 @@ COPY --from=mamba-builder /usr/src/causal-conv1d/build/lib.linux-x86_64-cpython-
224224
COPY --from=flashinfer-builder /opt/conda/lib/python3.11/site-packages/flashinfer/ /opt/conda/lib/python3.11/site-packages/flashinfer/
225225

226226
# Install flash-attention dependencies
227-
RUN pip install einops --no-cache-dir
227+
# RUN pip install einops --no-cache-dir
228228

229229
# Install server
230230
COPY proto proto
231231
COPY server server
232232
COPY server/Makefile server/Makefile
233+
ENV UV_SYSTEM_PYTHON=1
233234
RUN cd server && \
234235
make gen-server && \
235-
pip install -r requirements_cuda.txt && \
236-
pip install ".[attention, bnb, accelerate, compressed-tensors, marlin, moe, quantize, peft, outlines]" --no-cache-dir && \
237-
pip install nvidia-nccl-cu12==2.22.3
236+
python -c "from text_generation_server.pb import generate_pb2" && \
237+
pip install -U pip uv && \
238+
uv pip install -e ".[attention, bnb, accelerate, compressed-tensors, marlin, moe, quantize, peft, outlines]" --no-cache-dir && \
239+
uv pip install nvidia-nccl-cu12==2.22.3
238240

239241
ENV LD_PRELOAD=/opt/conda/lib/python3.11/site-packages/nvidia/nccl/lib/libnccl.so.2
240242
# Required to find libpython within the rust binaries

Dockerfile_amd

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ RUN case ${TARGETPLATFORM} in \
104104
/opt/conda/bin/conda clean -ya
105105

106106
# Install flash-attention, torch dependencies
107-
RUN python3 -m pip install --upgrade pip && pip install numpy einops ninja joblib msgpack cmake --no-cache-dir && rm -rf /var/lib/apt/lists/*
107+
RUN python3 -m pip install --upgrade pip uv && pip install numpy einops ninja joblib msgpack cmake --no-cache-dir && rm -rf /var/lib/apt/lists/*
108108

109109
RUN conda install mkl=2021
110110
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib/:/opt/conda/lib/python3.11/site-packages/torch/lib:/opt/conda/lib/
@@ -318,10 +318,11 @@ COPY --from=moe-kernels /usr/src/moe-kernels/build/lib.linux-x86_64-cpython-311
318318
COPY proto proto
319319
COPY server server
320320
COPY server/Makefile server/Makefile
321+
ENV UV_SYSTEM_PYTHON=1
321322
RUN cd server && \
322323
make gen-server && \
323-
pip install -r requirements_rocm.txt && \
324-
pip install ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
324+
pip install -U pip uv && \
325+
uv pip install -e ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
325326

326327
# Install benchmarker
327328
COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark

Dockerfile_intel

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,10 +108,11 @@ RUN pip install triton-xpu==3.0.0b2 --no-cache-dir
108108
COPY proto proto
109109
COPY server server
110110
COPY server/Makefile server/Makefile
111+
ENV UV_SYSTEM_PYTHON=1
111112
RUN cd server && \
112113
make gen-server && \
113-
pip install -r requirements_intel.txt && \
114-
pip install ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
114+
pip install -U pip uv && \
115+
uv pip install -e ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
115116

116117
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/intel/oneapi/pti/0.9/lib:/opt/conda/lib
117118
ENV CCL_ZE_IPC_EXCHANGE=sockets
@@ -211,10 +212,11 @@ ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib/"
211212
COPY proto proto
212213
COPY server server
213214
COPY server/Makefile server/Makefile
215+
ENV UV_SYSTEM_PYTHON=1
214216
RUN cd server && \
215217
make gen-server && \
216-
pip install -r requirements_intel.txt && \
217-
pip install ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
218+
pip install -U pip uv && \
219+
uv pip install -e ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
218220

219221
# Install benchmarker
220222
COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark

flake.lock

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

server/Makefile

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,36 +9,29 @@ include Makefile-exllamav2
99
include Makefile-flashinfer
1010

1111
unit-tests:
12+
pip install -U pip uv
13+
uv pip install -e ".[dev]"
1214
pytest -s -vv -m "not private" tests
1315

1416
gen-server:
1517
# Compile protos
16-
pip install grpcio-tools==1.62.2 mypy-protobuf==3.6.0 'types-protobuf' --no-cache-dir
18+
pip install -U pip uv
19+
uv pip install ".[gen]"
1720
mkdir text_generation_server/pb || true
1821
python -m grpc_tools.protoc -I../proto/v3 --python_out=text_generation_server/pb \
1922
--grpc_python_out=text_generation_server/pb --mypy_out=text_generation_server/pb ../proto/v3/generate.proto
2023
find text_generation_server/pb/ -type f -name "*.py" -print0 -exec sed -i -e 's/^\(import.*pb2\)/from . \1/g' {} \;
2124
touch text_generation_server/pb/__init__.py
2225

2326
install-server: gen-server
24-
pip install pip --upgrade
25-
pip install -r requirements_cuda.txt
26-
pip install -e ".[accelerate, compressed-tensors, quantize, peft, outlines]"
27+
uv pip install -e ".[accelerate, compressed-tensors, quantize, peft, outlines]"
2728

2829

2930
install: install-cuda
3031
echo "Installed server"
3132

3233
install-cuda: install-server install-flash-attention-v2-cuda install-flash-attention
33-
pip install -e ".[attention,bnb,marlin,moe]"
34-
pip install nvidia-nccl-cu12==2.22.3
34+
uv pip install -e ".[attention,bnb,marlin,moe]"
35+
uv pip install nvidia-nccl-cu12==2.22.3
3536

3637
install-rocm: install-server install-flash-attention-v2-rocm install-vllm-rocm
37-
38-
run-dev:
39-
SAFETENSORS_FAST_GPU=1 python -m torch.distributed.run --nproc_per_node=2 text_generation_server/cli.py serve bigscience/bloom-560m --sharded
40-
41-
export-requirements:
42-
poetry export -o requirements_cuda.txt --without-hashes
43-
poetry export -o requirements_rocm.txt --without-hashes
44-
poetry export -o requirements_intel.txt --without-hashes

0 commit comments

Comments
 (0)