Skip to content

Commit 51eff66

Browse files
Final cleaned code to test
1 parent a8ad535 commit 51eff66

File tree

12 files changed

+181
-826
lines changed

12 files changed

+181
-826
lines changed

convert_hf_to_gguf.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -643,7 +643,6 @@ def print_registered_models(cls):
643643
@classmethod
644644
def from_model_architecture(cls, arch: str, model_type = ModelType.TEXT) -> type[ModelBase]:
645645
try:
646-
print(cls._model_classes)
647646
return cls._model_classes[model_type][arch]
648647
except KeyError:
649648
raise NotImplementedError(f'Architecture {arch!r} not supported!') from None
@@ -7979,7 +7978,7 @@ def set_gguf_parameters(self):
79797978
self.gguf_writer.add_context_length(self.hparams.get("seq_length", n_embed))
79807979
self.gguf_writer.add_embedding_length(n_embed)
79817980
self.gguf_writer.add_feed_forward_length(self.hparams.get("ffn_hidden_size", self.hparams.get("intermediate_size", 4 * n_embed)))
7982-
self.gguf_writer.add_block_count(self.hparams.get("num_layers", self.hparams.get("num_hidden_layers", 0)))
7981+
self.gguf_writer.add_block_count(self.hparams.get("num_layers", self.hparams["num_hidden_layers"]))
79837982
self.gguf_writer.add_head_count(n_head)
79847983
self.gguf_writer.add_head_count_kv(n_head_kv)
79857984
self.gguf_writer.add_layer_norm_rms_eps(self.hparams.get("layernorm_epsilon",1e-5))
@@ -10186,7 +10185,6 @@ def get_model_architecture(hparams: dict[str, Any], model_type: ModelType) -> st
1018610185
# maybe we should fallback to text model's arch in that case, since not many models have both
1018710186
text_config = hparams.get("text_config", {})
1018810187
vision_config = hparams.get("vision_config", {})
10189-
print(hparams.get("architectures"))
1019010188
arch = None
1019110189
if (arches := hparams.get("architectures")) is not None and len(arches) > 0:
1019210190
arch = arches[0]

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 2 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -193,11 +193,6 @@ typedef pthread_t ggml_thread_t;
193193
#include <TargetConditionals.h>
194194
#endif
195195

196-
#include <stdatomic.h>
197-
198-
static _Atomic uint64_t ggml_op_us[GGML_OP_COUNT];
199-
static _Atomic uint64_t ggml_op_calls[GGML_OP_COUNT];
200-
201196
static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
202197
[GGML_TYPE_F32] = {
203198
.from_float = (ggml_from_float_t) ggml_cpu_fp32_to_fp32,
@@ -2869,44 +2864,6 @@ struct ggml_cplan ggml_graph_plan(
28692864
return cplan;
28702865
}
28712866

2872-
// static thread_ret_t ggml_graph_compute_thread(void * data) {
2873-
// struct ggml_compute_state * state = (struct ggml_compute_state *) data;
2874-
// struct ggml_threadpool * tp = state->threadpool;
2875-
//
2876-
// const struct ggml_cgraph * cgraph = tp->cgraph;
2877-
// const struct ggml_cplan * cplan = tp->cplan;
2878-
//
2879-
// set_numa_thread_affinity(state->ith);
2880-
//
2881-
// struct ggml_compute_params params = {
2882-
// /*.ith =*/ state->ith,
2883-
// /*.nth =*/ atomic_load_explicit(&tp->n_threads_cur, memory_order_relaxed),
2884-
// /*.wsize =*/ cplan->work_size,
2885-
// /*.wdata =*/ cplan->work_data,
2886-
// /*.threadpool=*/ tp,
2887-
// };
2888-
//
2889-
// for (int node_n = 0; node_n < cgraph->n_nodes && atomic_load_explicit(&tp->abort, memory_order_relaxed) != node_n; node_n++) {
2890-
// struct ggml_tensor * node = cgraph->nodes[node_n];
2891-
//
2892-
// ggml_compute_forward(&params, node);
2893-
//
2894-
// if (state->ith == 0 && cplan->abort_callback &&
2895-
// cplan->abort_callback(cplan->abort_callback_data)) {
2896-
// atomic_store_explicit(&tp->abort, node_n + 1, memory_order_relaxed);
2897-
// tp->ec = GGML_STATUS_ABORTED;
2898-
// }
2899-
//
2900-
// if (node_n + 1 < cgraph->n_nodes) {
2901-
// ggml_barrier(state->threadpool);
2902-
// }
2903-
// }
2904-
//
2905-
// ggml_barrier(state->threadpool);
2906-
//
2907-
// return 0;
2908-
// }
2909-
29102867
static thread_ret_t ggml_graph_compute_thread(void * data) {
29112868
struct ggml_compute_state * state = (struct ggml_compute_state *) data;
29122869
struct ggml_threadpool * tp = state->threadpool;
@@ -2927,25 +2884,21 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
29272884
for (int node_n = 0; node_n < cgraph->n_nodes && atomic_load_explicit(&tp->abort, memory_order_relaxed) != node_n; node_n++) {
29282885
struct ggml_tensor * node = cgraph->nodes[node_n];
29292886

2930-
uint64_t t0 = ggml_time_us();
29312887
ggml_compute_forward(&params, node);
2932-
uint64_t dt = ggml_time_us() - t0;
2933-
2934-
atomic_fetch_add_explicit(&ggml_op_us[node->op], dt, memory_order_relaxed);
2935-
atomic_fetch_add_explicit(&ggml_op_calls[node->op], 1, memory_order_relaxed);
29362888

29372889
if (state->ith == 0 && cplan->abort_callback &&
29382890
cplan->abort_callback(cplan->abort_callback_data)) {
29392891
atomic_store_explicit(&tp->abort, node_n + 1, memory_order_relaxed);
29402892
tp->ec = GGML_STATUS_ABORTED;
2941-
}
2893+
}
29422894

29432895
if (node_n + 1 < cgraph->n_nodes) {
29442896
ggml_barrier(state->threadpool);
29452897
}
29462898
}
29472899

29482900
ggml_barrier(state->threadpool);
2901+
29492902
return 0;
29502903
}
29512904

@@ -3248,33 +3201,6 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
32483201
ggml_threadpool_free(threadpool);
32493202
}
32503203

3251-
// printf("\n========= GGML OP PERF =========\n");
3252-
// for (int i = 0; i < GGML_OP_COUNT; i++) {
3253-
// uint64_t us = atomic_load(&ggml_op_us[i]);
3254-
// uint64_t calls = atomic_load(&ggml_op_calls[i]);
3255-
// if (calls == 0) continue;
3256-
//
3257-
// printf("%-16s : %8llu us %6llu calls avg %6llu us\n",
3258-
// ggml_op_name(i),
3259-
// (unsigned long long)us,
3260-
// (unsigned long long)calls,
3261-
// (unsigned long long)(us / calls));
3262-
// }
3263-
// printf("================================\n\n");
3264-
3265-
// printf("\n");
3266-
// for (int i = 0; i < GGML_OP_COUNT; i++) {
3267-
// uint64_t us = atomic_load(&ggml_op_us[i]);
3268-
// uint64_t calls = atomic_load(&ggml_op_calls[i]);
3269-
// if (calls == 0) continue;
3270-
//
3271-
// printf("%-16s,%8llu us,%6llu,%6llu us,",
3272-
// ggml_op_name(i),
3273-
// (unsigned long long)us,
3274-
// (unsigned long long)calls,
3275-
// (unsigned long long)(us / calls));
3276-
// }
3277-
32783204
return ret;
32793205
}
32803206

gguf-py/gguf/constants.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,6 @@ class MODEL_ARCH(IntEnum):
356356
QWEN3VLMOE = auto()
357357
PHI2 = auto()
358358
PHI3 = auto()
359-
PHI3_VISION = auto()
360359
PHIMOE = auto()
361360
PLAMO = auto()
362361
PLAMO2 = auto()
@@ -724,7 +723,6 @@ class MODEL_TENSOR(IntEnum):
724723
MODEL_ARCH.QWEN3VLMOE: "qwen3vlmoe",
725724
MODEL_ARCH.PHI2: "phi2",
726725
MODEL_ARCH.PHI3: "phi3",
727-
MODEL_ARCH.PHI3_VISION: "phi3_vision",
728726
MODEL_ARCH.PHIMOE: "phimoe",
729727
MODEL_ARCH.PLAMO: "plamo",
730728
MODEL_ARCH.PLAMO2: "plamo2",
@@ -1672,22 +1670,6 @@ class MODEL_TENSOR(IntEnum):
16721670
MODEL_TENSOR.FFN_DOWN,
16731671
MODEL_TENSOR.FFN_UP,
16741672
],
1675-
MODEL_ARCH.PHI3_VISION: [
1676-
MODEL_TENSOR.TOKEN_EMBD,
1677-
MODEL_TENSOR.OUTPUT_NORM,
1678-
MODEL_TENSOR.OUTPUT,
1679-
MODEL_TENSOR.ROPE_FACTORS_LONG,
1680-
MODEL_TENSOR.ROPE_FACTORS_SHORT,
1681-
MODEL_TENSOR.ATTN_NORM,
1682-
MODEL_TENSOR.ATTN_QKV,
1683-
MODEL_TENSOR.ATTN_Q,
1684-
MODEL_TENSOR.ATTN_K,
1685-
MODEL_TENSOR.ATTN_V,
1686-
MODEL_TENSOR.ATTN_OUT,
1687-
MODEL_TENSOR.FFN_NORM,
1688-
MODEL_TENSOR.FFN_DOWN,
1689-
MODEL_TENSOR.FFN_UP,
1690-
],
16911673
MODEL_ARCH.PHIMOE: [
16921674
MODEL_TENSOR.TOKEN_EMBD,
16931675
MODEL_TENSOR.OUTPUT_NORM,

src/llama-arch.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
3636
{ LLM_ARCH_QWEN3VLMOE, "qwen3vlmoe" },
3737
{ LLM_ARCH_PHI2, "phi2" },
3838
{ LLM_ARCH_PHI3, "phi3" },
39-
{ LLM_ARCH_PHI3_VISION, "phi3_vision" },
4039
{ LLM_ARCH_PHIMOE, "phimoe" },
4140
{ LLM_ARCH_PLAMO, "plamo" },
4241
{ LLM_ARCH_PLAMO2, "plamo2" },

src/llama-arch.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ enum llm_arch {
4040
LLM_ARCH_QWEN3VLMOE,
4141
LLM_ARCH_PHI2,
4242
LLM_ARCH_PHI3,
43-
LLM_ARCH_PHI3_VISION,
4443
LLM_ARCH_PHIMOE,
4544
LLM_ARCH_PLAMO,
4645
LLM_ARCH_PLAMO2,

src/llama-graph.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -473,7 +473,7 @@ class llm_graph_result {
473473

474474
virtual ~llm_graph_result() = default;
475475

476-
auto get_tokens() const -> ggml_tensor * { return t_tokens; }
476+
ggml_tensor * get_tokens() const { return t_tokens; }
477477
ggml_tensor * get_logits() const { return t_logits; }
478478
ggml_tensor * get_embd() const { return t_embd; }
479479
ggml_tensor * get_embd_pooled() const { return t_embd_pooled; }

tools/mtmd/clip-impl.h

Lines changed: 7 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,10 @@
3939
#define KEY_FEATURE_LAYER "clip.vision.feature_layer"
4040
#define KEY_PROJ_SCALE_FACTOR "clip.vision.projector.scale_factor"
4141
#define KEY_SPATIAL_MERGE_SIZE "clip.vision.spatial_merge_size"
42-
// [NEW] Phi-3-Vision Specific Keys
43-
#define KEY_PHI3_HD_ORDER "clip.vision.hd_transform_order" // Stores "sub_glb"
44-
#define KEY_PHI3_NUM_IMG_TOKENS "clip.vision.num_img_tokens" // Stores 144
45-
#define KEY_PHI3_USE_HD "clip.vision.use_hd_transform" // Stores true
46-
#define KEY_PHI3_WITH_SEP "clip.vision.with_learnable_separator" // Stores true
42+
#define KEY_PHI3_HD_ORDER "clip.vision.hd_transform_order"
43+
#define KEY_PHI3_NUM_IMG_TOKENS "clip.vision.num_img_tokens"
44+
#define KEY_PHI3_USE_HD "clip.vision.use_hd_transform"
45+
#define KEY_PHI3_WITH_SEP "clip.vision.with_learnable_separator"
4746
#define KEY_IS_DEEPSTACK_LAYERS "clip.vision.is_deepstack_layers"
4847

4948
#define KEY_MM_PATCH_MERGE_TYPE "clip.vision.mm_patch_merge_type"
@@ -91,21 +90,9 @@
9190
#define TN_MVLM_PROJ_BLOCK "mm.model.mb_block.%d.block.%d.%s"
9291
#define TN_MVLM_PROJ_PEG "mm.model.peg.%d.%s"
9392
#define TN_IMAGE_NEWLINE "model.image_newline"
94-
95-
// [NEW] Phi-3-Vision Specific Tensors
96-
// Mapping for: model.vision_embed_tokens.glb_GN
97-
#define TN_PHI3_GLB_GN "v.glb_GN"
98-
// Mapping for: model.vision_embed_tokens.sub_GN
99-
#define TN_PHI3_SUB_GN "v.sub_GN"
100-
101-
// [NEW] Projector Mapping
102-
// Your tensor map shows "model.vision_embed_tokens.img_projection.0.weight"
103-
// and "model.vision_embed_tokens.img_projection.2.weight".
104-
// This confirms it is a 2-layer MLP (Layer 0 = Linear, Layer 1 = GELU (implicit), Layer 2 = Linear).
105-
// We can reuse TN_LLAVA_PROJ ("mm.%d.%s") or define a specific one if the conversion script names them uniquely.
106-
// To be safe and specific:
107-
#define TN_PHI3_PROJ_MLP "mm.phi3_mlp.%d.%s"
108-
93+
#define TN_PHI3_GLB_GN "v.glb_GN" // phi3v
94+
#define TN_PHI3_SUB_GN "v.sub_GN" // phi3v
95+
#define TN_PHI3_PROJ_MLP "mm.phi3_mlp.%d.%s" // phi3v
10996
#define TN_MM_INP_NORM "mm.input_norm.weight"
11097
#define TN_MM_INP_NORM_B "mm.input_norm.bias"
11198
#define TN_MM_INP_PROJ "mm.input_projection.weight" // gemma3

0 commit comments

Comments
 (0)