chore: update Dockerfile to use Python base image and add kwaak configuration file

kwaak · kwaak · commit 675515705b22 · 2025-03-18T18:26:32.000Z
diff --git a/Dockerfile b/Dockerfile
@@ -1,52 +1,7 @@
-FROM nvidia/cuda:12.3.1-devel-ubuntu20.04
+FROM python:3.10
 
-# Non-interactive apt-get commands
-ARG DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && apt install git -y --no-install-recommends
 
-# No GPUs visible during build
-ARG CUDA_VISIBLE_DEVICES=none
+COPY . /app
 
-# Specify CUDA architectures -> 7.5: RTX 6000 & T4, 8.0: A100, 8.6+PTX
-ARG TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6+PTX"
-
-# Set the Python version
-ARG PYTHON_VERSION=3.10.12
-
-# Install system dependencies
-RUN apt-get update && apt-get install -y \
-    wget build-essential libssl-dev zlib1g-dev libbz2-dev \
-    libreadline-dev libsqlite3-dev libffi-dev libncursesw5-dev \
-    xz-utils tk-dev libxml2-dev libxmlsec1-dev liblzma-dev git vim \
-    && rm -rf /var/lib/apt/lists/*
-
-# Install Python
-RUN wget https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz && \
-    tar -xzf Python-$PYTHON_VERSION.tgz && \
-    cd Python-$PYTHON_VERSION && \
-    ./configure --enable-optimizations && \
-    make -j$(nproc) && \
-    make altinstall && \
-    cd .. && \
-    rm -rf Python-$PYTHON_VERSION.tgz Python-$PYTHON_VERSION
-
-# Install pip and core Python tools
-RUN wget https://bootstrap.pypa.io/get-pip.py && \
-    python3.10 get-pip.py && \
-    rm get-pip.py && \
-    python3.10 -m pip install --upgrade pip setuptools wheel uv
-
-# Set up project
-WORKDIR /vec-inf
-COPY . /vec-inf
-
-# Install project dependencies with build requirements
-RUN PIP_INDEX_URL="https://download.pytorch.org/whl/cu121" uv pip install --system -e .[dev]
-# Install Flash Attention
-RUN python3.10 -m pip install flash-attn --no-build-isolation
-
-# Final configuration
-RUN mkdir -p /vec-inf/nccl && \
-    mv /root/.config/vllm/nccl/cu12/libnccl.so.2.18.1 /vec-inf/nccl/libnccl.so.2.18.1
-
-# Set the default command to start an interactive shell
-CMD ["bash"]
+WORKDIR /app
diff --git a/kwaak.toml b/kwaak.toml
@@ -0,0 +1,71 @@
+# This is a a prefilled template for `kwaak`
+#
+# Several assumptions and defaults have been filled in. For proper usage, please customize the values to your needs.
+project_name = "vector-inference"
+language = "Python"
+
+## If you are using OpenAI, set the api key here
+openai_api_key = "env:OPENAI_API_KEY"
+
+
+## If you are using Anthropic, set the api key here
+#anthropic_api_key = "env:ANTHROPIC_API_KEY"
+
+
+## Optional: Connect kwaak to github to create PRs, search code, and automatically push to a  remote
+github_api_key = "env:GITHUB_TOKEN"
+
+
+## Optional: Connect kwaak to tavily to enable it to search the web
+#tavily_api_key = "env:TAVILY_API_KEY"
+
+## Commands the agent uses for tools
+## Kwaak can use tests, coverage, and lints to verify generated code.
+## At the moment, the format of the output does not matter.
+[commands]
+## Optional: Allows an agent to run tests. Recommended.
+# Example: test = "cargo test --no-fail-fast --color=never"
+#test = "<YOUR TEST COMMAND>"
+## Optional: Allows an agent to run coverage. The coverage command should run the tests and output the coverage results to stdout.
+# Example: coverage = "cargo llvm-cov --no-clean --summary-only"
+#coverage = "<YOUR COVERAGE COMMAND>"
+## Optional: Lint and fix command. This command is run after each completion cycle, before committing the code.
+# Recommended to use, as it avoids the LLM getting distracted by linting issues
+# Example: lint_and_fix = "cargo clippy --fix --allow-dirty --allow-staged && cargo fmt"
+#lint_and_fix = "<YOUR LINT AND FIX COMMAND>"
+
+## Git and GitHub configuration
+#
+## Kwaak can create and update PRs on Github, search github code, and interact with the git repository. This requires a github token.
+## If you leave the token empty, kwaak will not create PRs.
+[git]
+main_branch = "develop"
+owner = "VectorInstitute"
+repository = "vector-inference"
+auto_push_remote = false
+
+## Kwaak uses different LLMs for different tasks. As a rule of thumb, tasks that happen often (like indexing, summarizing) require a small, fast model
+## and tasks that happen less often (like completion) can use a larger, more accurate model.
+#
+## You can overwrite the api key and base url per kind of task if needed.
+[llm.indexing]
+provider = "OpenAI"
+prompt_model = "gpt-4o-mini"
+[llm.query]
+provider = "OpenAI"
+prompt_model = "gpt-4o"
+[llm.embedding]
+provider = "OpenAI"
+embedding_model = "text-embedding-3-large"
+## Docker configuration
+## kwaak requires a Dockerfile for the tool execution environment.
+## Besides the dependencies to run the code, there are several additional dependencies:
+## - `git` for interacting with the codebase
+## - `rg` (ripgrep) for searching the codebase
+## - `fd` (fd) for effective file searching
+##
+## In the future, an executor is planned that does not have these dependencies, but for now, they are required.
+##
+## If your project already has a Dockerfile and you want to keep it clean, you can specify a different file to use.
+[docker]
+dockerfile = "Dockerfile"