Skip to content

Commit 56c9b2c

Browse files
authored
Qwen lora quantize test (#16188)
Add test for quantized lora. Expected file sizes: ``` full file: -rw-r--r-- 1 lfq users 3051988096 Dec 10 11:34 qwen_lora_math_full.pte program-data separated: -rw-r--r-- 1 lfq users 2388436736 Dec 9 12:20 qwen_foundation.ptd -rw-r--r-- 1 lfq users 40430544 Dec 9 12:20 qwen_lora_math.ptd -rw-r--r-- 1 lfq users 810064 Dec 9 12:20 qwen_lora_math.pte quantized: -rw-r--r-- 1 lfq users 962094448 Dec 10 14:16 qwen_foundation_lora_q.ptd -rw-r--r-- 1 lfq users 40430544 Dec 10 14:16 qwen_lora_math_q.ptd -rw-r--r-- 1 lfq users 874912 Dec 10 14:16 qwen_lora_math_q.pte quantized, no lora adapter: -rw-r--r-- 1 lfq users 962094448 Dec 10 13:52 qwen_foundation_q.ptd -rw-r--r-- 1 lfq users 635552 Dec 10 13:52 qwen_q.pte ``` Quantized lora weights are the same as non-quantized, as the rank is 16, and quantization for xnnpack works for group_size >= 32
1 parent 47851f9 commit 56c9b2c

File tree

1 file changed

+110
-10
lines changed

1 file changed

+110
-10
lines changed

.ci/scripts/test_lora.sh

Lines changed: 110 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ The answer is: 12<|im_end|>"
7070

7171
# Run llama runner on single lora PTE file.
7272
NOW=$(date +"%H:%M:%S")
73-
echo "Starting to run llama runner at ${NOW}"
73+
echo "Test 1: Single lora file. Starting to run llama runner at ${NOW}"
7474
# shellcheck source=/dev/null
7575
cmake-out/examples/models/llama/llama_main --model_path=qwen_lora_math_full.pte --prompt="${PROMPT}" ${RUNTIME_ARGS} > result.txt
7676
NOW=$(date +"%H:%M:%S")
@@ -81,11 +81,11 @@ if [[ "${RESULT}" == "${EXPECTED_PREFIX}"* ]]; then
8181
echo "Expected result prefix: ${EXPECTED_PREFIX}"
8282
echo "Actual result: ${RESULT}"
8383
# Do not clean up files if test passes, as they're re-used in the next test.
84-
echo "Success"
84+
echo "Test 1: Success"
8585
else
8686
echo "Expected result prefix: ${EXPECTED_PREFIX}"
8787
echo "Actual result: ${RESULT}"
88-
echo "Failure; results not the same"
88+
echo "Test 1: Failure; results not the same"
8989
cleanup_files
9090
exit 1
9191
fi
@@ -102,23 +102,123 @@ $PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
102102

103103
# Run llama runner on PTE, PTD files.
104104
NOW=$(date +"%H:%M:%S")
105-
echo "Starting to run llama runner at ${NOW}"
105+
echo "Test 2: Program data separation lora. Starting to run llama runner at ${NOW}"
106106
# shellcheck source=/dev/null
107107
cmake-out/examples/models/llama/llama_main --model_path=qwen_lora_math.pte --data_paths="qwen_foundation.ptd,qwen_lora_math.ptd" --prompt="${PROMPT}" ${RUNTIME_ARGS} > result2.txt
108108
NOW=$(date +"%H:%M:%S")
109109
echo "Finished at ${NOW}"
110110

111-
RESULT2=$(cat result2.txt)
112-
if [[ "${RESULT2}" == "${EXPECTED_PREFIX}"* ]]; then
111+
RESULT=$(cat result.txt)
112+
if [[ "${RESULT}" == "${EXPECTED_PREFIX}"* ]]; then
113113
echo "Expected result prefix: ${EXPECTED_PREFIX}"
114-
echo "Actual result: ${RESULT2}"
115-
echo "Success"
114+
echo "Actual result: ${RESULT}"
115+
echo "Test 2: Success"
116116
else
117117
echo "Expected result prefix: ${EXPECTED_PREFIX}"
118-
echo "Actual result: ${RESULT2}"
119-
echo "Failure; results not the same"
118+
echo "Actual result: ${RESULT}"
119+
echo "Test 2: Failure; results not the same"
120+
# cleanup_files
121+
exit 1
122+
fi
123+
124+
# Confirm file sizes.
125+
FOUNDATION_SIZE=$(stat -c%s qwen_foundation.ptd)
126+
if [[ $FOUNDATION_SIZE -le "2400000000" ]]; then
127+
echo "qwen_foundation_q.ptd size is: $FOUNDATION_SIZE"
128+
else
129+
echo "qwen_foundation_q.ptd size: $FOUNDATION_SIZE is greater than threshold 2.4GB"
130+
cleanup_files
131+
exit 1
132+
fi
133+
134+
### QUANTIZATION & PROGRAM DATA SEPARATION ###
135+
EXPECTED_QUANT_PREFIX="<|im_start|>user Calculate 15% of 80?<|im_end|><|im_start|>assistant:
136+
<think>
137+
Okay, so I need to calculate 15% of 80."
138+
EXPECTED_QUANT_LORA_PREFIX="
139+
<|im_start|>user Calculate 15% of 80?<|im_end|><|im_start|>assistant
140+
To calculate 15% of 80, we can multiply 80 by 15/100.
141+
So, 15% of 80 is equal to (80 * 15) / 100 = 1200 / 100 = 12.
142+
#### 12
143+
The answer is: 12<|im_end|>"
144+
145+
# Export Quantized PTE, PTD file, no LoRA.
146+
$PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
147+
--config examples/models/qwen3/config/qwen3_xnnpack.yaml \
148+
+export.output_name="qwen_q.pte" \
149+
+export.foundation_weights_file="qwen_foundation_q.ptd" \
150+
+quantization.qmode="8da4w" \
151+
+quantization.group_size=32
152+
153+
# Export Quantized LoRA PTE, LoRA PTD, foundation PTD file.
154+
$PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
155+
--config examples/models/qwen3/config/qwen3_xnnpack.yaml \
156+
+base.adapter_checkpoint="${HF_ADAPTER_PATH}/adapter_model.safetensors" \
157+
+base.adapter_config="${HF_ADAPTER_PATH}/adapter_config.json" \
158+
+export.output_name="qwen_lora_math_q.pte" \
159+
+export.foundation_weights_file="qwen_foundation_lora_q.ptd" \
160+
+export.lora_weights_file="qwen_lora_math_q.ptd" \
161+
+quantization.qmode="8da4w" \
162+
+quantization.group_size=32
163+
164+
# Confirm that qwen_foundation_lora_q.ptd and qwen_foundation_q.ptd are the same.
165+
if diff -q qwen_foundation_lora_q.ptd qwen_foundation_q.ptd > /dev/null; then
166+
echo "qwen_foundation_lora_q.ptd and qwen_foundation_q.ptd are identical."
167+
else
168+
echo "qwen_foundation_lora_q.ptd and qwen_foundation_q.ptd are not identical."
169+
cleanup_files
170+
exit 1
171+
fi
172+
173+
# Run quantized qwen model (no adapter).
174+
NOW=$(date +"%H:%M:%S")
175+
echo "Test 3: Quantized qwen model (no lora). Starting to run llama runner at ${NOW}"
176+
# shellcheck source=/dev/null
177+
cmake-out/examples/models/llama/llama_main --model_path=qwen_q.pte --data_paths="qwen_foundation_q.ptd" --prompt="${PROMPT}" ${RUNTIME_ARGS} > result.txt
178+
NOW=$(date +"%H:%M:%S")
179+
echo "Finished at ${NOW}"
180+
RESULT=$(cat result.txt)
181+
if [[ "${RESULT}" == "${EXPECTED_QUANT_PREFIX}"* ]]; then
182+
echo "Expected result prefix: ${EXPECTED_QUANT_PREFIX}"
183+
echo "Actual result: ${RESULT}"
184+
echo "Test 3: Success"
185+
else
186+
echo "Expected result prefix: ${EXPECTED_QUANT_PREFIX}"
187+
echo "Actual result: ${RESULT}"
188+
echo "Test 3: Failure; results not the same"
189+
cleanup_files
190+
exit 1
191+
fi
192+
193+
# Run quantized lora adapter.
194+
NOW=$(date +"%H:%M:%S")
195+
echo "Test 4: Quantized, program-data separation lora. Starting to run llama runner at ${NOW}"
196+
# shellcheck source=/dev/null
197+
cmake-out/examples/models/llama/llama_main --model_path=qwen_lora_math_q.pte --data_paths="qwen_foundation_q.ptd,qwen_lora_math_q.ptd" --prompt="${PROMPT}" ${RUNTIME_ARGS} > result.txt
198+
NOW=$(date +"%H:%M:%S")
199+
echo "Finished at ${NOW}"
200+
201+
RESULT=$(cat result.txt)
202+
if [[ "${RESULT}" == "${EXPECTED_QUANT_LORA_PREFIX}"* ]]; then
203+
echo "Expected result prefix: ${EXPECTED_QUANT_LORA_PREFIX}"
204+
echo "Actual result: ${RESULT}"
205+
echo "Test 4: Success"
206+
else
207+
echo "Expected result prefix: ${EXPECTED_QUANT_LORA_PREFIX}"
208+
echo "Actual result: ${RESULT}"
209+
echo "Test 4: Failure; results not the same"
120210
cleanup_files
121211
exit 1
122212
fi
123213

214+
# Confirm qwen_foundation_q.ptd file size.
215+
FOUNDATION_Q_SIZE=$(stat -c%s qwen_foundation_q.ptd)
216+
if [[ $FOUNDATION_Q_SIZE -le "1000000000" ]]; then
217+
echo "qwen_foundation_q.ptd size is: $FOUNDATION_Q_SIZE"
218+
else
219+
echo "qwen_foundation_q.ptd size: $FOUNDATION_Q_SIZE is greater than threshold 1GB"
220+
cleanup_files
221+
exit 1
222+
fi
223+
124224
cleanup_files

0 commit comments

Comments
 (0)