Skip to content

Commit 8062559

Browse files
authored
Merge pull request #221 from menloresearch/update-dev-from-master-2025-08-29-00-12
Sync master with upstream release b6314
2 parents 335ad88 + b942ade commit 8062559

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+1326
-406
lines changed

common/arg.cpp

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1106,7 +1106,7 @@ static void common_params_print_completion(common_params_context & ctx_arg) {
11061106
printf("\"\n\n");
11071107

11081108
printf(" case \"$prev\" in\n");
1109-
printf(" --model)\n");
1109+
printf(" --model|-m)\n");
11101110
printf(" COMPREPLY=( $(compgen -f -X '!*.gguf' -- \"$cur\") $(compgen -d -- \"$cur\") )\n");
11111111
printf(" return 0\n");
11121112
printf(" ;;\n");
@@ -2555,15 +2555,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
25552555
{"--lora"}, "FNAME",
25562556
"path to LoRA adapter (can be repeated to use multiple adapters)",
25572557
[](common_params & params, const std::string & value) {
2558-
params.lora_adapters.push_back({ std::string(value), 1.0, nullptr });
2558+
params.lora_adapters.push_back({ std::string(value), 1.0, "", "", nullptr });
25592559
}
25602560
// we define this arg on both COMMON and EXPORT_LORA, so when showing help message of export-lora, it will be categorized as "example-specific" arg
25612561
).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA}));
25622562
add_opt(common_arg(
25632563
{"--lora-scaled"}, "FNAME", "SCALE",
25642564
"path to LoRA adapter with user defined scaling (can be repeated to use multiple adapters)",
25652565
[](common_params & params, const std::string & fname, const std::string & scale) {
2566-
params.lora_adapters.push_back({ fname, std::stof(scale), nullptr });
2566+
params.lora_adapters.push_back({ fname, std::stof(scale), "", "", nullptr });
25672567
}
25682568
// we define this arg on both COMMON and EXPORT_LORA, so when showing help message of export-lora, it will be categorized as "example-specific" arg
25692569
).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA}));
@@ -3538,6 +3538,22 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
35383538
}
35393539
).set_examples({LLAMA_EXAMPLE_SERVER}));
35403540

3541+
add_opt(common_arg(
3542+
{"--fim-qwen-30b-default"},
3543+
string_format("use default Qwen 3 Coder 30B A3B Instruct (note: can download weights from the internet)"),
3544+
[](common_params & params) {
3545+
params.model.hf_repo = "ggml-org/Qwen3-Coder-30B-A3B-Instruct-Q8_0-GGUF";
3546+
params.model.hf_file = "qwen3-coder-30b-a3b-instruct-q8_0.gguf";
3547+
params.port = 8012;
3548+
params.n_gpu_layers = 99;
3549+
params.flash_attn = true;
3550+
params.n_ubatch = 1024;
3551+
params.n_batch = 1024;
3552+
params.n_ctx = 0;
3553+
params.n_cache_reuse = 256;
3554+
}
3555+
).set_examples({LLAMA_EXAMPLE_SERVER}));
3556+
35413557
add_opt(common_arg(
35423558
{ "--diffusion-steps" }, "N",
35433559
string_format("number of diffusion steps (default: %d)", params.diffusion.steps),

common/common.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -988,7 +988,12 @@ struct common_init_result common_init_from_params(common_params & params) {
988988
return iparams;
989989
}
990990

991+
char buf[1024];
991992
la.ptr = lora.get();
993+
llama_adapter_meta_val_str(la.ptr, "adapter.lora.task_name", buf, sizeof(buf));
994+
la.task_name = buf;
995+
llama_adapter_meta_val_str(la.ptr, "adapter.lora.prompt_prefix", buf, sizeof(buf));
996+
la.prompt_prefix = buf;
992997
iparams.lora.emplace_back(std::move(lora)); // copy to list of loaded adapters
993998
}
994999

common/common.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ struct common_adapter_lora_info {
3434
std::string path;
3535
float scale;
3636

37+
std::string task_name;
38+
std::string prompt_prefix;
39+
3740
struct llama_adapter_lora * ptr;
3841
};
3942

convert_hf_to_gguf.py

Lines changed: 77 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ class ModelBase:
7272
endianess: gguf.GGUFEndian
7373
use_temp_file: bool
7474
lazy: bool
75+
dry_run: bool
7576
part_names: list[str]
7677
is_safetensors: bool
7778
hparams: dict[str, Any]
@@ -111,6 +112,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
111112
self.endianess = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE
112113
self.use_temp_file = use_temp_file
113114
self.lazy = not eager or (remote_hf_model_id is not None)
115+
self.dry_run = dry_run
114116
self.remote_hf_model_id = remote_hf_model_id
115117
if remote_hf_model_id is not None:
116118
self.is_safetensors = True
@@ -4871,11 +4873,35 @@ def modify_tensors(self, data_torch, name, bid):
48714873
@ModelBase.register("XLMRobertaModel", "XLMRobertaForSequenceClassification")
48724874
class XLMRobertaModel(BertModel):
48734875
model_arch = gguf.MODEL_ARCH.BERT
4876+
_lora_files = {}
4877+
_lora_names = []
48744878

4875-
def __init__(self, *args, **kwargs):
4876-
super().__init__(*args, **kwargs)
4879+
def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, **kwargs: Any):
4880+
hparams = kwargs.pop("hparams", None)
4881+
if hparams is None:
4882+
hparams = ModelBase.load_hparams(dir_model, False)
4883+
4884+
if lora_names := hparams.get("lora_adaptations"):
4885+
self._lora_names = lora_names
4886+
self.model_arch = gguf.MODEL_ARCH.JINA_BERT_V3
4887+
4888+
super().__init__(dir_model, ftype, fname_out, hparams=hparams, **kwargs)
48774889
self._xlmroberta_tokenizer_init()
48784890

4891+
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
4892+
if self._lora_names:
4893+
for name in self._lora_names:
4894+
fname = self.add_prefix_to_filename(self.fname_out, f"lora-{name}-")
4895+
self._lora_files[name] = gguf.GGUFWriter(fname, arch=gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file, dry_run=self.dry_run)
4896+
4897+
return super().generate_extra_tensors()
4898+
4899+
def set_type(self):
4900+
for lora_writer in self._lora_files.values():
4901+
lora_writer.add_type(gguf.GGUFType.ADAPTER)
4902+
lora_writer.add_string(gguf.Keys.Adapter.TYPE, "lora")
4903+
super().set_type()
4904+
48794905
def set_vocab(self):
48804906
self._xlmroberta_set_vocab()
48814907

@@ -4885,13 +4911,62 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
48854911
if name.startswith("roberta."):
48864912
name = name[8:]
48874913

4914+
# jina-embeddings-v3
4915+
if ".parametrizations." in name:
4916+
name = name.replace(".parametrizations.", ".")
4917+
if name.endswith(".original"):
4918+
name = name[:-9]
4919+
48884920
# position embeddings start at pad_token_id + 1, so just chop down the weight tensor
48894921
if name == "embeddings.position_embeddings.weight":
48904922
if self._position_offset is not None:
48914923
data_torch = data_torch[self._position_offset:,:]
48924924

4925+
if name.endswith(".0.lora_A") or name.endswith(".0.lora_B"):
4926+
if name.startswith("pooler.dense"):
4927+
return []
4928+
4929+
num_loras = data_torch.size(0)
4930+
assert num_loras == len(self._lora_names)
4931+
4932+
# Split out each LoRA in their own GGUF
4933+
for i, lora_writer in enumerate(self._lora_files.values()):
4934+
new_name = self.map_tensor_name(name[:-9]) + name[-7:].lower()
4935+
data = data_torch[i, :, :]
4936+
# Transpose/flip token_embd/types into correct shape
4937+
if new_name == "token_embd.weight.lora_b":
4938+
data = data.T
4939+
elif new_name.startswith("token_types.weight."):
4940+
new_name = new_name[:-1] + ("a" if new_name[-1:] == "b" else "b")
4941+
lora_writer.add_tensor(new_name, data.float().numpy(), raw_dtype=gguf.GGMLQuantizationType.F32)
4942+
4943+
return []
4944+
48934945
return super().modify_tensors(data_torch, name, bid)
48944946

4947+
def set_gguf_parameters(self):
4948+
super().set_gguf_parameters()
4949+
4950+
# jina-embeddings-v3
4951+
if rotary_emb_base := self.hparams.get("rotary_emb_base"):
4952+
self.gguf_writer.add_rope_freq_base(rotary_emb_base)
4953+
lora_alpha = self.hparams.get("lora_alpha")
4954+
if lora_prompt_prefixes := self.hparams.get("task_instructions"):
4955+
assert self._lora_files and all(lora_name in lora_prompt_prefixes for lora_name in self._lora_files.keys())
4956+
for lora_name, lora_writer in self._lora_files.items():
4957+
lora_writer.add_float32(gguf.Keys.Adapter.LORA_ALPHA, lora_alpha if lora_alpha is not None else 1.0)
4958+
lora_writer.add_string(gguf.Keys.Adapter.LORA_TASK_NAME, lora_name)
4959+
if lora_prompt_prefixes:
4960+
lora_writer.add_string(gguf.Keys.Adapter.LORA_PROMPT_PREFIX, lora_prompt_prefixes[lora_name])
4961+
4962+
def write(self):
4963+
super().write()
4964+
for lora_writer in self._lora_files.values():
4965+
lora_writer.write_header_to_file()
4966+
lora_writer.write_kv_data_to_file()
4967+
lora_writer.write_tensors_to_file(progress=True)
4968+
lora_writer.close()
4969+
48954970

48964971
@ModelBase.register("GemmaForCausalLM")
48974972
class GemmaModel(TextModel):

examples/eval-callback/eval-callback.cpp

Lines changed: 32 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,40 @@ static std::string ggml_ne_string(const ggml_tensor * t) {
2828
return str;
2929
}
3030

31+
static float ggml_get_float_value(uint8_t * data, ggml_type type, const size_t * nb, size_t i0, size_t i1, size_t i2, size_t i3) {
32+
size_t i = i3 * nb[3] + i2 * nb[2] + i1 * nb[1] + i0 * nb[0];
33+
float v;
34+
if (type == GGML_TYPE_F16) {
35+
v = ggml_fp16_to_fp32(*(ggml_fp16_t *) &data[i]);
36+
} else if (type == GGML_TYPE_F32) {
37+
v = *(float *) &data[i];
38+
} else if (type == GGML_TYPE_I64) {
39+
v = (float) *(int64_t *) &data[i];
40+
} else if (type == GGML_TYPE_I32) {
41+
v = (float) *(int32_t *) &data[i];
42+
} else if (type == GGML_TYPE_I16) {
43+
v = (float) *(int16_t *) &data[i];
44+
} else if (type == GGML_TYPE_I8) {
45+
v = (float) *(int8_t *) &data[i];
46+
} else {
47+
GGML_ABORT("fatal error");
48+
}
49+
return v;
50+
}
51+
3152
static void ggml_print_tensor(uint8_t * data, ggml_type type, const int64_t * ne, const size_t * nb, int64_t n) {
3253
GGML_ASSERT(n > 0);
3354
float sum = 0;
55+
for (int64_t i3 = 0; i3 < ne[3]; i3++) {
56+
for (int64_t i2 = 0; i2 < ne[2]; i2++) {
57+
for (int64_t i1 = 0; i1 < ne[1]; i1++) {
58+
for (int64_t i0 = 0; i0 < ne[0]; i0++) {
59+
const float v = ggml_get_float_value(data, type, nb, i0, i1, i2, i3);
60+
sum += v;
61+
}
62+
}
63+
}
64+
}
3465
for (int64_t i3 = 0; i3 < ne[3]; i3++) {
3566
LOG(" [\n");
3667
for (int64_t i2 = 0; i2 < ne[2]; i2++) {
@@ -50,25 +81,8 @@ static void ggml_print_tensor(uint8_t * data, ggml_type type, const int64_t * ne
5081
LOG("..., ");
5182
i0 = ne[0] - n;
5283
}
53-
size_t i = i3 * nb[3] + i2 * nb[2] + i1 * nb[1] + i0 * nb[0];
54-
float v;
55-
if (type == GGML_TYPE_F16) {
56-
v = ggml_fp16_to_fp32(*(ggml_fp16_t *) &data[i]);
57-
} else if (type == GGML_TYPE_F32) {
58-
v = *(float *) &data[i];
59-
} else if (type == GGML_TYPE_I64) {
60-
v = (float) *(int64_t *) &data[i];
61-
} else if (type == GGML_TYPE_I32) {
62-
v = (float) *(int32_t *) &data[i];
63-
} else if (type == GGML_TYPE_I16) {
64-
v = (float) *(int16_t *) &data[i];
65-
} else if (type == GGML_TYPE_I8) {
66-
v = (float) *(int8_t *) &data[i];
67-
} else {
68-
GGML_ABORT("fatal error");
69-
}
84+
const float v = ggml_get_float_value(data, type, nb, i0, i1, i2, i3);
7085
LOG("%12.4f", v);
71-
sum += v;
7286
if (i0 < ne[0] - 1) LOG(", ");
7387
}
7488
LOG("],\n");

examples/model-conversion/Makefile

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,20 @@ causal-convert-model:
3737
METADATA_OVERRIDE="$(METADATA_OVERRIDE)" \
3838
./scripts/causal/convert-model.sh
3939

40+
causal-convert-mm-model-bf16: OUTTYPE=bf16
41+
causal-convert-mm-model-bf16: MM_OUTTYPE=f16
42+
causal-convert-mm-model-bf16: causal-convert-mm-model
43+
44+
causal-convert-mm-model:
45+
$(call validate_model_path,causal-convert-mm-model)
46+
@MODEL_NAME="$(MODEL_NAME)" OUTTYPE="$(OUTTYPE)" MODEL_PATH="$(MODEL_PATH)" \
47+
METADATA_OVERRIDE="$(METADATA_OVERRIDE)" \
48+
./scripts/causal/convert-model.sh
49+
50+
@MODEL_NAME="$(MODEL_NAME)" OUTTYPE="$(MM_OUTTYPE)" MODEL_PATH="$(MODEL_PATH)" \
51+
METADATA_OVERRIDE="$(METADATA_OVERRIDE)" \
52+
./scripts/causal/convert-model.sh --mmproj
53+
4054
causal-run-original-model:
4155
$(call validate_model_path,causal-run-original-model)
4256
@MODEL_PATH="$(MODEL_PATH)" ./scripts/causal/run-org-model.py
Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,21 @@
11
#!/bin/bash
22

3+
set -e
4+
5+
# Parse command line arguments
6+
MMPROJ=""
7+
while [[ $# -gt 0 ]]; do
8+
case $1 in
9+
--mmproj)
10+
MMPROJ="--mmproj"
11+
shift
12+
;;
13+
*)
14+
shift
15+
;;
16+
esac
17+
done
18+
319
MODEL_NAME="${MODEL_NAME:-$(basename "$MODEL_PATH")}"
420
OUTPUT_DIR="${OUTPUT_DIR:-../../models}"
521
TYPE="${OUTTYPE:-f16}"
@@ -11,12 +27,20 @@ echo "Model name: ${MODEL_NAME}"
1127
echo "Data type: ${TYPE}"
1228
echo "Converted model path:: ${CONVERTED_MODEL}"
1329
echo "Metadata override: ${METADATA_OVERRIDE}"
14-
python ../../convert_hf_to_gguf.py --verbose \
15-
${MODEL_PATH} \
16-
--outfile ${CONVERTED_MODEL} \
17-
--outtype ${TYPE} \
18-
--metadata "${METADATA_OVERRIDE}"
30+
31+
CMD_ARGS=("python" "../../convert_hf_to_gguf.py" "--verbose")
32+
CMD_ARGS+=("${MODEL_PATH}")
33+
CMD_ARGS+=("--outfile" "${CONVERTED_MODEL}")
34+
CMD_ARGS+=("--outtype" "${TYPE}")
35+
[[ -n "$METADATA_OVERRIDE" ]] && CMD_ARGS+=("--metadata" "${METADATA_OVERRIDE}")
36+
[[ -n "$MMPROJ" ]] && CMD_ARGS+=("${MMPROJ}")
37+
38+
"${CMD_ARGS[@]}"
1939

2040
echo ""
2141
echo "The environment variable CONVERTED_MODEL can be set to this path using:"
2242
echo "export CONVERTED_MODEL=$(realpath ${CONVERTED_MODEL})"
43+
if [[ -n "$MMPROJ" ]]; then
44+
mmproj_file="${OUTPUT_DIR}/mmproj-$(basename "${CONVERTED_MODEL}")"
45+
echo "The mmproj model was created in $(realpath "$mmproj_file")"
46+
fi

0 commit comments

Comments
 (0)