@@ -70,7 +70,7 @@ The answer is: 12<|im_end|>"
7070
7171# Run llama runner on single lora PTE file.
7272NOW=$( date +" %H:%M:%S" )
73- echo " Starting to run llama runner at ${NOW} "
73+ echo " Test 1: Single lora file. Starting to run llama runner at ${NOW} "
7474# shellcheck source=/dev/null
7575cmake-out/examples/models/llama/llama_main --model_path=qwen_lora_math_full.pte --prompt=" ${PROMPT} " ${RUNTIME_ARGS} > result.txt
7676NOW=$( date +" %H:%M:%S" )
@@ -81,11 +81,11 @@ if [[ "${RESULT}" == "${EXPECTED_PREFIX}"* ]]; then
8181 echo " Expected result prefix: ${EXPECTED_PREFIX} "
8282 echo " Actual result: ${RESULT} "
8383 # Do not clean up files if test passes, as they're re-used in the next test.
84- echo " Success"
84+ echo " Test 1: Success"
8585else
8686 echo " Expected result prefix: ${EXPECTED_PREFIX} "
8787 echo " Actual result: ${RESULT} "
88- echo " Failure; results not the same"
88+ echo " Test 1: Failure; results not the same"
8989 cleanup_files
9090 exit 1
9191fi
@@ -102,23 +102,123 @@ $PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
102102
103103# Run llama runner on PTE, PTD files.
104104NOW=$( date +" %H:%M:%S" )
105- echo " Starting to run llama runner at ${NOW} "
105+ echo " Test 2: Program data separation lora. Starting to run llama runner at ${NOW} "
106106# shellcheck source=/dev/null
107107cmake-out/examples/models/llama/llama_main --model_path=qwen_lora_math.pte --data_paths=" qwen_foundation.ptd,qwen_lora_math.ptd" --prompt=" ${PROMPT} " ${RUNTIME_ARGS} > result2.txt
108108NOW=$( date +" %H:%M:%S" )
109109echo " Finished at ${NOW} "
110110
111- RESULT2 =$( cat result2 .txt)
112- if [[ " ${RESULT2 } " == " ${EXPECTED_PREFIX} " * ]]; then
111+ RESULT =$( cat result .txt)
112+ if [[ " ${RESULT } " == " ${EXPECTED_PREFIX} " * ]]; then
113113 echo " Expected result prefix: ${EXPECTED_PREFIX} "
114- echo " Actual result: ${RESULT2 } "
115- echo " Success"
114+ echo " Actual result: ${RESULT } "
115+ echo " Test 2: Success"
116116else
117117 echo " Expected result prefix: ${EXPECTED_PREFIX} "
118- echo " Actual result: ${RESULT2} "
119- echo " Failure; results not the same"
118+ echo " Actual result: ${RESULT} "
119+ echo " Test 2: Failure; results not the same"
120+ # cleanup_files
121+ exit 1
122+ fi
123+
124+ # Confirm file sizes.
125+ FOUNDATION_SIZE=$( stat -c%s qwen_foundation.ptd)
126+ if [[ $FOUNDATION_SIZE -le " 2400000000" ]]; then
127+ echo " qwen_foundation_q.ptd size is: $FOUNDATION_SIZE "
128+ else
129+ echo " qwen_foundation_q.ptd size: $FOUNDATION_SIZE is greater than threshold 2.4GB"
130+ cleanup_files
131+ exit 1
132+ fi
133+
134+ # ## QUANTIZATION & PROGRAM DATA SEPARATION ###
135+ EXPECTED_QUANT_PREFIX=" <|im_start|>user Calculate 15% of 80?<|im_end|><|im_start|>assistant:
136+ <think>
137+ Okay, so I need to calculate 15% of 80."
138+ EXPECTED_QUANT_LORA_PREFIX="
139+ <|im_start|>user Calculate 15% of 80?<|im_end|><|im_start|>assistant
140+ To calculate 15% of 80, we can multiply 80 by 15/100.
141+ So, 15% of 80 is equal to (80 * 15) / 100 = 1200 / 100 = 12.
142+ #### 12
143+ The answer is: 12<|im_end|>"
144+
145+ # Export Quantized PTE, PTD file, no LoRA.
146+ $PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
147+ --config examples/models/qwen3/config/qwen3_xnnpack.yaml \
148+ +export.output_name=" qwen_q.pte" \
149+ +export.foundation_weights_file=" qwen_foundation_q.ptd" \
150+ +quantization.qmode=" 8da4w" \
151+ +quantization.group_size=32
152+
153+ # Export Quantized LoRA PTE, LoRA PTD, foundation PTD file.
154+ $PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
155+ --config examples/models/qwen3/config/qwen3_xnnpack.yaml \
156+ +base.adapter_checkpoint=" ${HF_ADAPTER_PATH} /adapter_model.safetensors" \
157+ +base.adapter_config=" ${HF_ADAPTER_PATH} /adapter_config.json" \
158+ +export.output_name=" qwen_lora_math_q.pte" \
159+ +export.foundation_weights_file=" qwen_foundation_lora_q.ptd" \
160+ +export.lora_weights_file=" qwen_lora_math_q.ptd" \
161+ +quantization.qmode=" 8da4w" \
162+ +quantization.group_size=32
163+
164+ # Confirm that qwen_foundation_lora_q.ptd and qwen_foundation_q.ptd are the same.
165+ if diff -q qwen_foundation_lora_q.ptd qwen_foundation_q.ptd > /dev/null; then
166+ echo " qwen_foundation_lora_q.ptd and qwen_foundation_q.ptd are identical."
167+ else
168+ echo " qwen_foundation_lora_q.ptd and qwen_foundation_q.ptd are not identical."
169+ cleanup_files
170+ exit 1
171+ fi
172+
173+ # Run quantized qwen model (no adapter).
174+ NOW=$( date +" %H:%M:%S" )
175+ echo " Test 3: Quantized qwen model (no lora). Starting to run llama runner at ${NOW} "
176+ # shellcheck source=/dev/null
177+ cmake-out/examples/models/llama/llama_main --model_path=qwen_q.pte --data_paths=" qwen_foundation_q.ptd" --prompt=" ${PROMPT} " ${RUNTIME_ARGS} > result.txt
178+ NOW=$( date +" %H:%M:%S" )
179+ echo " Finished at ${NOW} "
180+ RESULT=$( cat result.txt)
181+ if [[ " ${RESULT} " == " ${EXPECTED_QUANT_PREFIX} " * ]]; then
182+ echo " Expected result prefix: ${EXPECTED_QUANT_PREFIX} "
183+ echo " Actual result: ${RESULT} "
184+ echo " Test 3: Success"
185+ else
186+ echo " Expected result prefix: ${EXPECTED_QUANT_PREFIX} "
187+ echo " Actual result: ${RESULT} "
188+ echo " Test 3: Failure; results not the same"
189+ cleanup_files
190+ exit 1
191+ fi
192+
193+ # Run quantized lora adapter.
194+ NOW=$( date +" %H:%M:%S" )
195+ echo " Test 4: Quantized, program-data separation lora. Starting to run llama runner at ${NOW} "
196+ # shellcheck source=/dev/null
197+ cmake-out/examples/models/llama/llama_main --model_path=qwen_lora_math_q.pte --data_paths=" qwen_foundation_q.ptd,qwen_lora_math_q.ptd" --prompt=" ${PROMPT} " ${RUNTIME_ARGS} > result.txt
198+ NOW=$( date +" %H:%M:%S" )
199+ echo " Finished at ${NOW} "
200+
201+ RESULT=$( cat result.txt)
202+ if [[ " ${RESULT} " == " ${EXPECTED_QUANT_LORA_PREFIX} " * ]]; then
203+ echo " Expected result prefix: ${EXPECTED_QUANT_LORA_PREFIX} "
204+ echo " Actual result: ${RESULT} "
205+ echo " Test 4: Success"
206+ else
207+ echo " Expected result prefix: ${EXPECTED_QUANT_LORA_PREFIX} "
208+ echo " Actual result: ${RESULT} "
209+ echo " Test 4: Failure; results not the same"
120210 cleanup_files
121211 exit 1
122212fi
123213
214+ # Confirm qwen_foundation_q.ptd file size.
215+ FOUNDATION_Q_SIZE=$( stat -c%s qwen_foundation_q.ptd)
216+ if [[ $FOUNDATION_Q_SIZE -le " 1000000000" ]]; then
217+ echo " qwen_foundation_q.ptd size is: $FOUNDATION_Q_SIZE "
218+ else
219+ echo " qwen_foundation_q.ptd size: $FOUNDATION_Q_SIZE is greater than threshold 1GB"
220+ cleanup_files
221+ exit 1
222+ fi
223+
124224cleanup_files
0 commit comments