@@ -53,11 +53,7 @@ $PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
5353HF_QWEN_PATH=$( python -c " from huggingface_hub import snapshot_download; print(snapshot_download('unsloth/Qwen3-0.6B'))" )
5454echo " Model downloaded to: $HF_QWEN_PATH "
5555
56- <<< <<< < HEAD
5756# ## BUILD LLAMA RUNNER.
58- =======
59- # Build llama runner.
60- >>>>>>> 3c0898753d (qwen lora test)
6157cmake_install_executorch_libraries
6258cmake_build_llama_runner
6359
9591fi
9692
9793# ## PROGRAM DATA SEPARATION ###
98- # Export LoRA PTE, LoRA PTD, foundation PTD file.
94+ Export LoRA PTE, LoRA PTD, foundation PTD file.
9995$PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
10096 --config examples/models/qwen3/config/qwen3_xnnpack.yaml \
10197 +base.adapter_checkpoint=" ${HF_ADAPTER_PATH} /adapter_model.safetensors" \
@@ -112,17 +108,98 @@ cmake-out/examples/models/llama/llama_main --model_path=qwen_lora_math.pte --dat
112108NOW=$( date +" %H:%M:%S" )
113109echo " Finished at ${NOW} "
114110
115- RESULT2 =$( cat result2 .txt)
116- if [[ " ${RESULT2 } " == " ${EXPECTED_PREFIX} " * ]]; then
111+ RESULT =$( cat result .txt)
112+ if [[ " ${RESULT } " == " ${EXPECTED_PREFIX} " * ]]; then
117113 echo " Expected result prefix: ${EXPECTED_PREFIX} "
118- echo " Actual result: ${RESULT2 } "
114+ echo " Actual result: ${RESULT } "
119115 echo " Success"
120116else
121117 echo " Expected result prefix: ${EXPECTED_PREFIX} "
122- echo " Actual result: ${RESULT2 } "
118+ echo " Actual result: ${RESULT } "
123119 echo " Failure; results not the same"
124120 cleanup_files
125121 exit 1
126122fi
127123
124+ # ## QUANTIZATION & PROGRAM DATA SEPARATION ###
125+ EXPECTED_QUANT_PREFIX="
126+ <|im_start|>user Calculate 15% of 80?<|im_end|><|im_start|>assistant:
127+ <think>
128+ Okay, so I need to calculate 15% of 80.
129+ "
130+ EXPECTED_QUANT_LORA_PREFIX=" <|im_start|>user Calculate 15% of 80?<|im_end|><|im_start|>assistant
131+ To calculate 15% of 80, we can multiply 80 by 15/100.
132+ So, 15% of 80 is equal to (80 * 15) / 100 = 1200 / 100 = 12.
133+ #### 12
134+ The answer is: 12<|im_end|>
135+ "
136+
137+ # Export Quantized PTE, PTD file, no LoRA.
138+ $PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
139+ --config examples/models/qwen3/config/qwen3_xnnpack.yaml \
140+ +export.output_name=" qwen_q.pte" \
141+ +export.foundation_weights_file=" qwen_foundation_q.ptd" \
142+ +quantization.qmode=" 8da4w" \
143+ +quantization.group_size=32
144+
145+ # Export Quantized LoRA PTE, LoRA PTD, foundation PTD file.
146+ $PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
147+ --config examples/models/qwen3/config/qwen3_xnnpack.yaml \
148+ +base.adapter_checkpoint=" ${HF_ADAPTER_PATH} /adapter_model.safetensors" \
149+ +base.adapter_config=" ${HF_ADAPTER_PATH} /adapter_config.json" \
150+ +export.output_name=" qwen_lora_math_q.pte" \
151+ +export.foundation_weights_file=" qwen_foundation_lora_q.ptd" \
152+ +export.lora_weights_file=" qwen_lora_math_q.ptd" \
153+ +quantization.qmode=" 8da4w" \
154+ +quantization.group_size=32
155+
156+ # Confirm that qwen_foundation_lora_q.ptd and qwen_foundation_q.ptd are the same.
157+ if diff -q qwen_foundation_lora_q.ptd qwen_foundation_q.ptd > /dev/null; then
158+ echo " qwen_foundation_lora_q.ptd and qwen_foundation_q.ptd are identical."
159+ else
160+ echo " qwen_foundation_lora_q.ptd and qwen_foundation_q.ptd are not identical."
161+ # cleanup_files
162+ exit 1
163+ fi
164+
165+ # Run quantized qwen model (no adapter).
166+ NOW=$( date +" %H:%M:%S" )
167+ echo " Starting to run llama runner at ${NOW} "
168+ # shellcheck source=/dev/null
169+ cmake-out/examples/models/llama/llama_main --model_path=qwen_q.pte --data_paths=" qwen_foundation_q.ptd" --prompt=" ${PROMPT} " ${RUNTIME_ARGS} > result.txt
170+ NOW=$( date +" %H:%M:%S" )
171+ echo " Finished at ${NOW} "
172+ RESULT=$( cat result.txt)
173+ if [[ " ${RESULT} " == " ${EXPECTED_QUANT_LORA_PREFIX} " * ]]; then
174+ echo " Expected result prefix: ${EXPECTED_QUANT_LORA_PREFIX} "
175+ echo " Actual result: ${RESULT} "
176+ echo " Success"
177+ else
178+ echo " Expected result prefix: ${EXPECTED_QUANT_LORA_PREFIX} "
179+ echo " Actual result: ${RESULT} "
180+ echo " Failure; results not the same"
181+ cleanup_files
182+ exit 1
183+ fi
184+
185+ # Run quantized lora adapter.
186+ NOW=$( date +" %H:%M:%S" )
187+ echo " Starting to run llama runner at ${NOW} "
188+ # shellcheck source=/dev/null
189+ cmake-out/examples/models/llama/llama_main --model_path=qwen_lora_math_q.pte --data_paths=" qwen_foundation_q.ptd,qwen_lora_math_q.ptd" --prompt=" ${PROMPT} " ${RUNTIME_ARGS} > result.txt
190+ NOW=$( date +" %H:%M:%S" )
191+ echo " Finished at ${NOW} "
192+
193+ RESULT=$( cat result.txt)
194+ if [[ " ${RESULT} " == " ${EXPECTED_QUANT_PREFIX} " * ]]; then
195+ echo " Expected result prefix: ${EXPECTED_QUANT_PREFIX} "
196+ echo " Actual result: ${RESULT} "
197+ echo " Success"
198+ else
199+ echo " Expected result prefix: ${EXPECTED_QUANT_PREFIX} "
200+ echo " Actual result: ${RESULT} "
201+ echo " Failure; results not the same"
202+ cleanup_files
203+ exit 1
204+ fi
128205cleanup_files
0 commit comments