Skip to content

[Opt] Manipulation of Q8_0 tensors with Tornado ByteArrays #151

[Opt] Manipulation of Q8_0 tensors with Tornado ByteArrays

[Opt] Manipulation of Q8_0 tensors with Tornado ByteArrays #151

Workflow file for this run

name: GPULlama3 Build & Run
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
types: [opened, synchronize, reopened]
env:
JAVA_HOME: /opt/jenkins/jdks/graal-23.1.0/jdk-21.0.3
TORNADO_ROOT: ${{ github.workspace }}/GPULlama3.java/external/tornadovm
LLAMA_ROOT: ${{ github.workspace }}
GRAAL_JARS: /opt/graalJars
MODELS_DIR: /opt/models
jobs:
code-quality:
runs-on: self-hosted
timeout-minutes: 30
steps:
- name: Checkout GPULlama3
uses: actions/checkout@v4
- name: Check code formatting (Spotless)
run: |
cd ${{ github.workspace }}
# ./mvnw -T12C -Pspotless spotless:check
build-and-run:
runs-on: [self-hosted]
needs: code-quality
timeout-minutes: 30
strategy:
fail-fast: true
matrix:
backend:
- name: opencl
- name: ptx
steps:
- name: Checkout GPULlama3
uses: actions/checkout@v4
- name: Clone TornadoVM master
run: |
git clone --depth 1 --branch master \
https://github.com/beehive-lab/TornadoVM.git \
$TORNADO_ROOT
- name: Set up Python venv for TornadoVM
run: |
python3 -m venv $TORNADO_ROOT/venv
source $TORNADO_ROOT/venv/bin/activate
python --version
- name: Build TornadoVM
run: |
cd $TORNADO_ROOT
mkdir -p graalJars && cp $GRAAL_JARS/* graalJars/
source venv/bin/activate
echo "=== Building TornadoVM ==="
make BACKEND=${{ matrix.backend.name }}
echo "=== Searching for TornadoVM SDK directory ==="
SDK_DIR=$(find dist -type d -maxdepth 3 -path "*/tornadovm-*-${{ matrix.backend.name }}" | head -n 1)
if [ -z "$SDK_DIR" ]; then
echo "::error::Could not locate TornadoVM SDK directory!"
find dist -maxdepth 5 -type d
exit 1
fi
FULL_SDK="${PWD}/${SDK_DIR}"
echo "Detected TornadoVM SDK: $FULL_SDK"
# Export for current shell session
export TORNADO_SDK="$FULL_SDK"
export PATH="$FULL_SDK/bin:$JAVA_HOME/bin:$PATH"
# Save for subsequent steps
echo "TORNADO_SDK=$FULL_SDK" >> $GITHUB_ENV
echo "PATH=$PATH" >> $GITHUB_ENV
echo "=== Checking tornado CLI ==="
which tornado || { echo "::error::tornado not in PATH"; exit 1; }
tornado --devices
- name: Build GPULlama3.java
run: |
cd ${{ github.workspace }}
echo "Using TORNADO_SDK=$TORNADO_SDK"
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
tornado --version
./mvnw clean package -DskipTests
- name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf
run: |
cd ${{ github.workspace }}
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
--prompt "Say hello"
- name: FP16 - Run Qwen3-4B-f16.gguf
run: |
cd ${{ github.workspace }}
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model $MODELS_DIR/Qwen3-4B-f16.gguf \
--prompt "Say hello"
- name: FP16 - Run Mistral-7B-Instruct-v0.3.fp16.gguf
run: |
cd ${{ github.workspace }}
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model $MODELS_DIR/Mistral-7B-Instruct-v0.3.fp16.gguf \
--prompt "Say hello"
- name: FP16 - Run Qwen2.5-1.5b-instruct-fp16.gguf
run: |
cd ${{ github.workspace }}
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model $MODELS_DIR/qwen2.5-1.5b-instruct-fp16.gguf \
--prompt "Say hello"
- name: FP16 - Run Phi-3-mini-4k-instruct-fp16.gguf
run: |
cd ${{ github.workspace }}
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model /$MODELS_DIR/Phi-3-mini-4k-instruct-fp16.gguf \
--prompt "Say hello"
- name: Q8 - Run Llama-3.2-1B-Instruct-Q8_0.gguf
run: |
cd ${{ github.workspace }}
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model $MODELS_DIR/Llama-3.2-1B-Instruct-Q8_0.gguf \
--prompt "Say hello"
- name: Q8 - Run Qwen3-0.6B-Q8_0.gguf
run: |
cd ${{ github.workspace }}
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model $MODELS_DIR/Qwen3-0.6B-Q8_0.gguf \
--prompt "Say hello"
- name: Q8 - Run Phi-3-mini-4k-instruct-Q8_0.gguf
run: |
cd ${{ github.workspace }}
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model $MODELS_DIR/Phi-3-mini-4k-instruct-Q8_0.gguf \
--prompt "Say hello"
- name: Q8 - Run Qwen2.5-1.5b-instruct-q8_0.gguf
run: |
cd ${{ github.workspace }}
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model $MODELS_DIR/qwen2.5-1.5b-instruct-q8_0.gguf \
--prompt "Say hello"
- name: Q8 - Mistral-7B-Instruct-v0.3.Q8_0.gguf
run: |
cd ${{ github.workspace }}
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model $MODELS_DIR/Mistral-7B-Instruct-v0.3.Q8_0.gguf \
--prompt "Say hello"