Skip to content

Commit 4d2a878

Browse files
committed
test quantized lora
1 parent 8fb423c commit 4d2a878

File tree

1 file changed

+86
-9
lines changed

1 file changed

+86
-9
lines changed

.ci/scripts/test_lora.sh

Lines changed: 86 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,7 @@ $PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
5353
HF_QWEN_PATH=$(python -c "from huggingface_hub import snapshot_download; print(snapshot_download('unsloth/Qwen3-0.6B'))")
5454
echo "Model downloaded to: $HF_QWEN_PATH"
5555

56-
<<<<<<< HEAD
5756
### BUILD LLAMA RUNNER.
58-
=======
59-
# Build llama runner.
60-
>>>>>>> 3c0898753d (qwen lora test)
6157
cmake_install_executorch_libraries
6258
cmake_build_llama_runner
6359

@@ -95,7 +91,7 @@ else
9591
fi
9692

9793
### PROGRAM DATA SEPARATION ###
98-
# Export LoRA PTE, LoRA PTD, foundation PTD file.
94+
Export LoRA PTE, LoRA PTD, foundation PTD file.
9995
$PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
10096
--config examples/models/qwen3/config/qwen3_xnnpack.yaml \
10197
+base.adapter_checkpoint="${HF_ADAPTER_PATH}/adapter_model.safetensors" \
@@ -112,17 +108,98 @@ cmake-out/examples/models/llama/llama_main --model_path=qwen_lora_math.pte --dat
112108
NOW=$(date +"%H:%M:%S")
113109
echo "Finished at ${NOW}"
114110

115-
RESULT2=$(cat result2.txt)
116-
if [[ "${RESULT2}" == "${EXPECTED_PREFIX}"* ]]; then
111+
RESULT=$(cat result.txt)
112+
if [[ "${RESULT}" == "${EXPECTED_PREFIX}"* ]]; then
117113
echo "Expected result prefix: ${EXPECTED_PREFIX}"
118-
echo "Actual result: ${RESULT2}"
114+
echo "Actual result: ${RESULT}"
119115
echo "Success"
120116
else
121117
echo "Expected result prefix: ${EXPECTED_PREFIX}"
122-
echo "Actual result: ${RESULT2}"
118+
echo "Actual result: ${RESULT}"
123119
echo "Failure; results not the same"
124120
cleanup_files
125121
exit 1
126122
fi
127123

124+
### QUANTIZATION & PROGRAM DATA SEPARATION ###
125+
EXPECTED_QUANT_PREFIX="
126+
<|im_start|>user Calculate 15% of 80?<|im_end|><|im_start|>assistant:
127+
<think>
128+
Okay, so I need to calculate 15% of 80.
129+
"
130+
EXPECTED_QUANT_LORA_PREFIX="<|im_start|>user Calculate 15% of 80?<|im_end|><|im_start|>assistant
131+
To calculate 15% of 80, we can multiply 80 by 15/100.
132+
So, 15% of 80 is equal to (80 * 15) / 100 = 1200 / 100 = 12.
133+
#### 12
134+
The answer is: 12<|im_end|>
135+
"
136+
137+
# Export Quantized PTE, PTD file, no LoRA.
138+
$PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
139+
--config examples/models/qwen3/config/qwen3_xnnpack.yaml \
140+
+export.output_name="qwen_q.pte" \
141+
+export.foundation_weights_file="qwen_foundation_q.ptd" \
142+
+quantization.qmode="8da4w" \
143+
+quantization.group_size=32
144+
145+
# Export Quantized LoRA PTE, LoRA PTD, foundation PTD file.
146+
$PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
147+
--config examples/models/qwen3/config/qwen3_xnnpack.yaml \
148+
+base.adapter_checkpoint="${HF_ADAPTER_PATH}/adapter_model.safetensors" \
149+
+base.adapter_config="${HF_ADAPTER_PATH}/adapter_config.json" \
150+
+export.output_name="qwen_lora_math_q.pte" \
151+
+export.foundation_weights_file="qwen_foundation_lora_q.ptd" \
152+
+export.lora_weights_file="qwen_lora_math_q.ptd" \
153+
+quantization.qmode="8da4w" \
154+
+quantization.group_size=32
155+
156+
# Confirm that qwen_foundation_lora_q.ptd and qwen_foundation_q.ptd are the same.
157+
if diff -q qwen_foundation_lora_q.ptd qwen_foundation_q.ptd > /dev/null; then
158+
echo "qwen_foundation_lora_q.ptd and qwen_foundation_q.ptd are identical."
159+
else
160+
echo "qwen_foundation_lora_q.ptd and qwen_foundation_q.ptd are not identical."
161+
# cleanup_files
162+
exit 1
163+
fi
164+
165+
# Run quantized qwen model (no adapter).
166+
NOW=$(date +"%H:%M:%S")
167+
echo "Starting to run llama runner at ${NOW}"
168+
# shellcheck source=/dev/null
169+
cmake-out/examples/models/llama/llama_main --model_path=qwen_q.pte --data_paths="qwen_foundation_q.ptd" --prompt="${PROMPT}" ${RUNTIME_ARGS} > result.txt
170+
NOW=$(date +"%H:%M:%S")
171+
echo "Finished at ${NOW}"
172+
RESULT=$(cat result.txt)
173+
if [[ "${RESULT}" == "${EXPECTED_QUANT_LORA_PREFIX}"* ]]; then
174+
echo "Expected result prefix: ${EXPECTED_QUANT_LORA_PREFIX}"
175+
echo "Actual result: ${RESULT}"
176+
echo "Success"
177+
else
178+
echo "Expected result prefix: ${EXPECTED_QUANT_LORA_PREFIX}"
179+
echo "Actual result: ${RESULT}"
180+
echo "Failure; results not the same"
181+
cleanup_files
182+
exit 1
183+
fi
184+
185+
# Run quantized lora adapter.
186+
NOW=$(date +"%H:%M:%S")
187+
echo "Starting to run llama runner at ${NOW}"
188+
# shellcheck source=/dev/null
189+
cmake-out/examples/models/llama/llama_main --model_path=qwen_lora_math_q.pte --data_paths="qwen_foundation_q.ptd,qwen_lora_math_q.ptd" --prompt="${PROMPT}" ${RUNTIME_ARGS} > result.txt
190+
NOW=$(date +"%H:%M:%S")
191+
echo "Finished at ${NOW}"
192+
193+
RESULT=$(cat result.txt)
194+
if [[ "${RESULT}" == "${EXPECTED_QUANT_PREFIX}"* ]]; then
195+
echo "Expected result prefix: ${EXPECTED_QUANT_PREFIX}"
196+
echo "Actual result: ${RESULT}"
197+
echo "Success"
198+
else
199+
echo "Expected result prefix: ${EXPECTED_QUANT_PREFIX}"
200+
echo "Actual result: ${RESULT}"
201+
echo "Failure; results not the same"
202+
cleanup_files
203+
exit 1
204+
fi
128205
cleanup_files

0 commit comments

Comments
 (0)