@@ -193,6 +193,11 @@ struct clip_hparams {
193193 int32_t attn_window_size = 0 ;
194194 int32_t n_wa_pattern = 0 ;
195195
196+ // deepseek-ocr (sam)
197+ int32_t sam_n_layer = 0 ;
198+ int32_t sam_n_head = 0 ;
199+ int32_t sam_n_embd = 0 ;
200+
196201 // audio
197202 int32_t n_mel_bins = 0 ; // whisper preprocessor
198203 int32_t proj_stack_factor = 0 ; // ultravox
@@ -2676,9 +2681,9 @@ struct clip_graph {
26762681 }
26772682
26782683 ggml_tensor * build_sam (ggml_tensor * inp_raw) {
2679- const int n_embd = 768 ;
2680- const int _depth = 12 ;
2681- const int n_heads = 12 ;
2684+ const int n_embd = hparams. sam_n_embd ;
2685+ const int n_layer = hparams. sam_n_layer ;
2686+ const int n_heads = hparams. sam_n_head ;
26822687 const int d_heads = n_embd / n_heads;
26832688 const int window = hparams.attn_window_size ;
26842689
@@ -2721,7 +2726,7 @@ struct clip_graph {
27212726 }
27222727
27232728 // loop over layers
2724- for (int il = 0 ; il < _depth ; il++) {
2729+ for (int il = 0 ; il < n_layer ; il++) {
27252730 auto & layer = model.sam_layers [il];
27262731 ggml_tensor * shortcut = cur;
27272732
@@ -3286,6 +3291,10 @@ struct clip_model_loader {
32863291 hparams.patch_size = 16 ;
32873292 hparams.image_size = 1024 ;
32883293 hparams.warmup_image_size = 1024 ;
3294+
3295+ get_u32 (KEY_SAM_N_BLOCK, hparams.sam_n_layer , true );
3296+ get_u32 (KEY_SAM_N_HEAD, hparams.sam_n_head , true );
3297+ get_u32 (KEY_SAM_N_EMBD, hparams.sam_n_embd , true );
32893298 get_u32 (KEY_ATTN_WINDOW_SIZE, hparams.attn_window_size , true );
32903299 } break ;
32913300 default :
0 commit comments