Skip to content

Commit 8abecfa

Browse files
committed
Merge tag 'b7207' into dev-fix-rope
2 parents 0376146 + beb1f0c commit 8abecfa

File tree

85 files changed

+9162
-5070
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

85 files changed

+9162
-5070
lines changed

ci/run.sh

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ sd=`dirname $0`
4545
cd $sd/../
4646
SRC=`pwd`
4747

48-
CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON"
48+
CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DGGML_SCHED_NO_REALLOC=ON"
4949

5050
if [ ! -z ${GG_BUILD_METAL} ]; then
5151
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON"
@@ -428,10 +428,10 @@ function gg_run_qwen3_0_6b {
428428

429429
(time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
430430

431-
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 1024 -fa off ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
432-
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 1024 -fa on ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
433-
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 1024 -fa off ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
434-
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 1024 -fa on ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
431+
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 1024 -fa off --no-op-offload) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
432+
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 1024 -fa on --no-op-offload) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
433+
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 1024 -fa off ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
434+
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 1024 -fa on ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
435435

436436
function check_ppl {
437437
qnt="$1"
@@ -523,8 +523,8 @@ function gg_run_embd_bge_small {
523523

524524
./bin/llama-quantize ${model_f16} ${model_q8_0} q8_0
525525

526-
(time ./bin/llama-embedding --model ${model_f16} -p "I believe the meaning of life is" -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
527-
(time ./bin/llama-embedding --model ${model_q8_0} -p "I believe the meaning of life is" -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
526+
(time ./bin/llama-embedding --model ${model_f16} -p "I believe the meaning of life is" -ngl 99 -c 0 --no-op-offload) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
527+
(time ./bin/llama-embedding --model ${model_q8_0} -p "I believe the meaning of life is" -ngl 99 -c 0 --no-op-offload) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
528528

529529
set +e
530530
}
@@ -564,7 +564,7 @@ function gg_run_rerank_tiny {
564564
model_f16="${path_models}/ggml-model-f16.gguf"
565565

566566
# for this model, the SEP token is "</s>"
567-
(time ./bin/llama-embedding --model ${model_f16} -p "what is panda?\thi\nwhat is panda?\tit's a bear\nwhat is panda?\tThe giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." -ngl 99 -c 0 --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log
567+
(time ./bin/llama-embedding --model ${model_f16} -p "what is panda?\thi\nwhat is panda?\tit's a bear\nwhat is panda?\tThe giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." -ngl 99 -c 0 --pooling rank --embd-normalize -1 --no-op-offload --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log
568568

569569
# sample output
570570
# rerank score 0: 0.029

common/arg.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -980,7 +980,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
980980
[](common_params & params) {
981981
params.kv_unified = true;
982982
}
983-
).set_env("LLAMA_ARG_KV_SPLIT"));
983+
).set_env("LLAMA_ARG_KV_UNIFIED"));
984984
add_opt(common_arg(
985985
{"--no-context-shift"},
986986
string_format("disables context shift on infinite text generation (default: %s)", params.ctx_shift ? "disabled" : "enabled"),
@@ -2639,7 +2639,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
26392639
[](common_params &, const std::string & value) {
26402640
common_log_set_file(common_log_main(), value.c_str());
26412641
}
2642-
));
2642+
).set_env("LLAMA_LOG_FILE"));
26432643
add_opt(common_arg(
26442644
{"--log-colors"}, "[on|off|auto]",
26452645
"Set colored logging ('on', 'off', or 'auto', default: 'auto')\n"

0 commit comments

Comments
 (0)