@@ -53,11 +53,7 @@ $PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
5353HF_QWEN_PATH=$( python -c " from huggingface_hub import snapshot_download; print(snapshot_download('unsloth/Qwen3-0.6B'))" )
5454echo " Model downloaded to: $HF_QWEN_PATH "
5555
56- <<< <<< < HEAD
5756# ## BUILD LLAMA RUNNER.
58- =======
59- # Build llama runner.
60- >>>>>>> 3c0898753d (qwen lora test)
6157cmake_install_executorch_libraries
6258cmake_build_llama_runner
6359
@@ -74,7 +70,7 @@ The answer is: 12<|im_end|>"
7470
7571# Run llama runner on single lora PTE file.
7672NOW=$( date +" %H:%M:%S" )
77- echo " Starting to run llama runner at ${NOW} "
73+ echo " Test 1: Single lora file. Starting to run llama runner at ${NOW} "
7874# shellcheck source=/dev/null
7975cmake-out/examples/models/llama/llama_main --model_path=qwen_lora_math_full.pte --prompt=" ${PROMPT} " ${RUNTIME_ARGS} > result.txt
8076NOW=$( date +" %H:%M:%S" )
@@ -85,11 +81,11 @@ if [[ "${RESULT}" == "${EXPECTED_PREFIX}"* ]]; then
8581 echo " Expected result prefix: ${EXPECTED_PREFIX} "
8682 echo " Actual result: ${RESULT} "
8783 # Do not clean up files if test passes, as they're re-used in the next test.
88- echo " Success"
84+ echo " Test 1: Success"
8985else
9086 echo " Expected result prefix: ${EXPECTED_PREFIX} "
9187 echo " Actual result: ${RESULT} "
92- echo " Failure; results not the same"
88+ echo " Test 1: Failure; results not the same"
9389 cleanup_files
9490 exit 1
9591fi
@@ -106,23 +102,123 @@ $PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
106102
107103# Run llama runner on PTE, PTD files.
108104NOW=$( date +" %H:%M:%S" )
109- echo " Starting to run llama runner at ${NOW} "
105+ echo " Test 2: Program data separation lora. Starting to run llama runner at ${NOW} "
110106# shellcheck source=/dev/null
111107cmake-out/examples/models/llama/llama_main --model_path=qwen_lora_math.pte --data_paths=" qwen_foundation.ptd,qwen_lora_math.ptd" --prompt=" ${PROMPT} " ${RUNTIME_ARGS} > result2.txt
112108NOW=$( date +" %H:%M:%S" )
113109echo " Finished at ${NOW} "
114110
115- RESULT2 =$( cat result2 .txt)
116- if [[ " ${RESULT2 } " == " ${EXPECTED_PREFIX} " * ]]; then
111+ RESULT =$( cat result .txt)
112+ if [[ " ${RESULT } " == " ${EXPECTED_PREFIX} " * ]]; then
117113 echo " Expected result prefix: ${EXPECTED_PREFIX} "
118- echo " Actual result: ${RESULT2 } "
119- echo " Success"
114+ echo " Actual result: ${RESULT } "
115+ echo " Test 2: Success"
120116else
121117 echo " Expected result prefix: ${EXPECTED_PREFIX} "
122- echo " Actual result: ${RESULT2} "
123- echo " Failure; results not the same"
118+ echo " Actual result: ${RESULT} "
119+ echo " Test 2: Failure; results not the same"
120+ # cleanup_files
121+ exit 1
122+ fi
123+
124+ # Confirm file sizes.
125+ FOUNDATION_SIZE=$( stat -c%s qwen_foundation.ptd)
126+ if [[ $FOUNDATION_SIZE -le " 2400000000" ]]; then
127+ echo " qwen_foundation_q.ptd size is: $FOUNDATION_SIZE "
128+ else
129+ echo " qwen_foundation_q.ptd size: $FOUNDATION_SIZE is greater than threshold 2.4GB"
130+ cleanup_files
131+ exit 1
132+ fi
133+
134+ # ## QUANTIZATION & PROGRAM DATA SEPARATION ###
135+ EXPECTED_QUANT_PREFIX=" <|im_start|>user Calculate 15% of 80?<|im_end|><|im_start|>assistant:
136+ <think>
137+ Okay, so I need to calculate 15% of 80."
138+ EXPECTED_QUANT_LORA_PREFIX="
139+ <|im_start|>user Calculate 15% of 80?<|im_end|><|im_start|>assistant
140+ To calculate 15% of 80, we can multiply 80 by 15/100.
141+ So, 15% of 80 is equal to (80 * 15) / 100 = 1200 / 100 = 12.
142+ #### 12
143+ The answer is: 12<|im_end|>"
144+
145+ # Export Quantized PTE, PTD file, no LoRA.
146+ $PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
147+ --config examples/models/qwen3/config/qwen3_xnnpack.yaml \
148+ +export.output_name=" qwen_q.pte" \
149+ +export.foundation_weights_file=" qwen_foundation_q.ptd" \
150+ +quantization.qmode=" 8da4w" \
151+ +quantization.group_size=32
152+
153+ # Export Quantized LoRA PTE, LoRA PTD, foundation PTD file.
154+ $PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
155+ --config examples/models/qwen3/config/qwen3_xnnpack.yaml \
156+ +base.adapter_checkpoint=" ${HF_ADAPTER_PATH} /adapter_model.safetensors" \
157+ +base.adapter_config=" ${HF_ADAPTER_PATH} /adapter_config.json" \
158+ +export.output_name=" qwen_lora_math_q.pte" \
159+ +export.foundation_weights_file=" qwen_foundation_lora_q.ptd" \
160+ +export.lora_weights_file=" qwen_lora_math_q.ptd" \
161+ +quantization.qmode=" 8da4w" \
162+ +quantization.group_size=32
163+
164+ # Confirm that qwen_foundation_lora_q.ptd and qwen_foundation_q.ptd are the same.
165+ if diff -q qwen_foundation_lora_q.ptd qwen_foundation_q.ptd > /dev/null; then
166+ echo " qwen_foundation_lora_q.ptd and qwen_foundation_q.ptd are identical."
167+ else
168+ echo " qwen_foundation_lora_q.ptd and qwen_foundation_q.ptd are not identical."
169+ cleanup_files
170+ exit 1
171+ fi
172+
173+ # Run quantized qwen model (no adapter).
174+ NOW=$( date +" %H:%M:%S" )
175+ echo " Test 3: Quantized qwen model (no lora). Starting to run llama runner at ${NOW} "
176+ # shellcheck source=/dev/null
177+ cmake-out/examples/models/llama/llama_main --model_path=qwen_q.pte --data_paths=" qwen_foundation_q.ptd" --prompt=" ${PROMPT} " ${RUNTIME_ARGS} > result.txt
178+ NOW=$( date +" %H:%M:%S" )
179+ echo " Finished at ${NOW} "
180+ RESULT=$( cat result.txt)
181+ if [[ " ${RESULT} " == " ${EXPECTED_QUANT_PREFIX} " * ]]; then
182+ echo " Expected result prefix: ${EXPECTED_QUANT_PREFIX} "
183+ echo " Actual result: ${RESULT} "
184+ echo " Test 3: Success"
185+ else
186+ echo " Expected result prefix: ${EXPECTED_QUANT_PREFIX} "
187+ echo " Actual result: ${RESULT} "
188+ echo " Test 3: Failure; results not the same"
189+ cleanup_files
190+ exit 1
191+ fi
192+
193+ # Run quantized lora adapter.
194+ NOW=$( date +" %H:%M:%S" )
195+ echo " Test 4: Quantized, program-data separation lora. Starting to run llama runner at ${NOW} "
196+ # shellcheck source=/dev/null
197+ cmake-out/examples/models/llama/llama_main --model_path=qwen_lora_math_q.pte --data_paths=" qwen_foundation_q.ptd,qwen_lora_math_q.ptd" --prompt=" ${PROMPT} " ${RUNTIME_ARGS} > result.txt
198+ NOW=$( date +" %H:%M:%S" )
199+ echo " Finished at ${NOW} "
200+
201+ RESULT=$( cat result.txt)
202+ if [[ " ${RESULT} " == " ${EXPECTED_QUANT_LORA_PREFIX} " * ]]; then
203+ echo " Expected result prefix: ${EXPECTED_QUANT_LORA_PREFIX} "
204+ echo " Actual result: ${RESULT} "
205+ echo " Test 4: Success"
206+ else
207+ echo " Expected result prefix: ${EXPECTED_QUANT_LORA_PREFIX} "
208+ echo " Actual result: ${RESULT} "
209+ echo " Test 4: Failure; results not the same"
124210 cleanup_files
125211 exit 1
126212fi
127213
214+ # Confirm qwen_foundation_q.ptd file size.
215+ FOUNDATION_Q_SIZE=$( stat -c%s qwen_foundation_q.ptd)
216+ if [[ $FOUNDATION_Q_SIZE -le " 1000000000" ]]; then
217+ echo " qwen_foundation_q.ptd size is: $FOUNDATION_Q_SIZE "
218+ else
219+ echo " qwen_foundation_q.ptd size: $FOUNDATION_Q_SIZE is greater than threshold 1GB"
220+ cleanup_files
221+ exit 1
222+ fi
223+
128224cleanup_files
0 commit comments