Skip to content

Commit 6b0e7cd

Browse files
authored
Merge pull request #7 from bluebread/sf/deepseek-ocr
First DeepSeek-OCR working implementation
2 parents ed3b7f1 + c5f4c64 commit 6b0e7cd

File tree

11 files changed

+498
-233
lines changed

11 files changed

+498
-233
lines changed

common/arg.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1824,6 +1824,21 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
18241824
params.image_max_tokens = value;
18251825
}
18261826
).set_examples(mmproj_examples).set_env("LLAMA_ARG_IMAGE_MAX_TOKENS"));
1827+
add_opt(common_arg(
1828+
{"--dsocr-mode"}, "MODE",
1829+
"DeepSeek-OCR resolution mode, one of:\n"
1830+
"- auto (default): automatically select resolution\n"
1831+
"- tiny, small, base, large: native resolution\n"
1832+
"- gundam, gundam-master: dynamic resolution",
1833+
[](common_params & params, const std::string & value) {
1834+
if (value == "auto" || value == "tiny" || value == "small" || value == "base" ||
1835+
value == "large" || value == "gundam" || value == "gundam-master") {
1836+
params.dsocr_mode = value;
1837+
} else {
1838+
throw std::invalid_argument("invalid value");
1839+
}
1840+
}
1841+
).set_examples(mmproj_examples).set_env("LLAMA_ARG_DSOCR_MODE"));
18271842
if (llama_supports_rpc()) {
18281843
add_opt(common_arg(
18291844
{"--rpc"}, "SERVERS",

common/common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,7 @@ struct common_params {
433433
std::vector<std::string> image; // path to image file(s)
434434
int image_min_tokens = -1;
435435
int image_max_tokens = -1;
436+
std::string dsocr_mode = "auto"; // DeepSeek-OCR resolution mode: auto, tiny, small, base, large, gundam, gundam-master
436437

437438
// finetune
438439
struct lr_opt lr;

convert_hf_to_gguf.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6013,12 +6013,14 @@ def get_vision_config(self) -> dict[str, Any]:
60136013

60146014

60156015
def tensor_force_quant(self, name, new_name, bid, n_dims):
6016+
# TODO: increase numercial stability. maybe delete later.
6017+
return gguf.GGMLQuantizationType.F32
60166018
# related to https://github.com/ggml-org/llama.cpp/issues/13025
6017-
if "input_projection" in name:
6018-
return gguf.GGMLQuantizationType.F16
6019-
if ".embeddings." in name:
6020-
return gguf.GGMLQuantizationType.F32
6021-
return super().tensor_force_quant(name, new_name, bid, n_dims)
6019+
# if "input_projection" in name:
6020+
# return gguf.GGMLQuantizationType.F16
6021+
# if ".embeddings." in name:
6022+
# return gguf.GGMLQuantizationType.F32
6023+
# return super().tensor_force_quant(name, new_name, bid, n_dims)
60226024

60236025
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
60246026
# Only process vision-related tensors, skip language model tensors

ggml/src/ggml-cuda/upscale.cu

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,5 +214,7 @@ void ggml_cuda_op_upscale(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
214214
upscale_f32_bicubic_cuda(src0_d, dst_d, src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3],
215215
src0->ne[0], src0->ne[1], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3],
216216
sf0, sf1, sf2, sf3, pixel_offset, stream);
217+
} else {
218+
GGML_ABORT("fatal error");
217219
}
218220
}

ggml/src/ggml.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5204,6 +5204,7 @@ struct ggml_tensor * ggml_flash_attn_ext(
52045204
GGML_ASSERT(q->ne[3] == v->ne[3]);
52055205

52065206
if (mask) {
5207+
GGML_ASSERT(mask->type == GGML_TYPE_F16);
52075208
GGML_ASSERT(ggml_is_contiguous(mask));
52085209
GGML_ASSERT(mask->ne[1] >= GGML_PAD(q->ne[1], GGML_KQ_MASK_PAD) &&
52095210
"the Flash-Attention kernel requires the mask to be padded to GGML_KQ_MASK_PAD and at least n_queries big");

tools/mtmd/clip-impl.h

Lines changed: 181 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <climits>
66
#include <cstdarg>
77
#include <cinttypes>
8+
#include <cstring>
89
#include <string>
910
#include <map>
1011
#include <sstream>
@@ -442,6 +443,33 @@ static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
442443
// debugging
443444
//
444445

446+
447+
static std::string to_ne_string(const ggml_tensor * t) {
448+
std::string str;
449+
for (int i = 0; i < GGML_MAX_DIMS; ++i) {
450+
str += std::to_string(t->ne[i]);
451+
if (i + 1 < GGML_MAX_DIMS) {
452+
str += ", ";
453+
}
454+
}
455+
return str;
456+
}
457+
458+
static void print_tensor_info(ggml_tensor * t) {
459+
const struct ggml_tensor * src0 = t->src[0];
460+
const struct ggml_tensor * src1 = t->src[1];
461+
462+
char src1_str[128] = {0};
463+
if (src1) {
464+
snprintf(src1_str, sizeof(src1_str), "%s{%s}", src1->name, to_ne_string(src1).c_str());
465+
}
466+
467+
printf("%s: %s = %s(%s{%s}, %s)\n",
468+
t->name, ggml_type_name(t->type), ggml_op_desc(t),
469+
src0->name, to_ne_string(src0).c_str(),
470+
src1 ? src1_str : "");
471+
}
472+
445473
static void print_tensor_shape(ggml_tensor * t) {
446474
printf("%s.shape = [", t->name);
447475
for (int i = 0; i < ggml_n_dims(t); ++i) {
@@ -453,12 +481,50 @@ static void print_tensor_shape(ggml_tensor * t) {
453481
printf("]\n");
454482
}
455483

484+
static void print_tensor_sum(ggml_tensor * t, uint8_t * data, int64_t n) {
485+
(void) n; // unused parameter
486+
ggml_type type = t->type;
487+
int64_t * ne = t->ne;
488+
size_t * nb = t->nb;
489+
double sum = 0.0;
490+
for (int64_t i3 = 0; i3 < ne[3]; i3++) {
491+
for (int64_t i2 = 0; i2 < ne[2]; i2++) {
492+
for (int64_t i1 = 0; i1 < ne[1]; i1++) {
493+
for (int64_t i0 = 0; i0 < ne[0]; i0++) {
494+
size_t i = i3 * nb[3] + i2 * nb[2] + i1 * nb[1] + i0 * nb[0];
495+
float v;
496+
if (type == GGML_TYPE_F16) {
497+
v = ggml_fp16_to_fp32(*(ggml_fp16_t *) &data[i]);
498+
} else if (type == GGML_TYPE_F32) {
499+
v = *(float *) &data[i];
500+
} else if (type == GGML_TYPE_I32) {
501+
v = (float) *(int32_t *) &data[i];
502+
} else if (type == GGML_TYPE_I16) {
503+
v = (float) *(int16_t *) &data[i];
504+
} else if (type == GGML_TYPE_I8) {
505+
v = (float) *(int8_t *) &data[i];
506+
} else {
507+
GGML_ABORT("fatal error");
508+
}
509+
sum += v;
510+
}
511+
}
512+
}
513+
}
514+
printf("%s.sum = %.6f\n", t->name, sum);
515+
}
516+
456517
static void print_tensor_data(ggml_tensor * t, uint8_t * data, int64_t n) {
457518
ggml_type type = t->type;
458519
int64_t * ne = t->ne;
459520
size_t * nb = t->nb;
521+
printf("%s.data: [\n", t->name);
460522
for (int64_t i3 = 0; i3 < ne[3]; i3++) {
461-
printf("%s.data: [\n", t->name);
523+
if (i3 == n && ne[3] > 2*n) {
524+
printf(" ..., \n");
525+
i3 = ne[3] - n;
526+
}
527+
printf(" [\n");
462528
for (int64_t i2 = 0; i2 < ne[2]; i2++) {
463529
if (i2 == n && ne[2] > 2*n) {
464530
printf(" ..., \n");
@@ -500,6 +566,120 @@ static void print_tensor_data(ggml_tensor * t, uint8_t * data, int64_t n) {
500566
}
501567
printf(" ]\n");
502568
}
569+
printf(" ]\n");
570+
}
571+
572+
static void save_tensor_to_file(const struct ggml_tensor * tensor, const uint8_t * data_ptr) {
573+
char filename[512];
574+
snprintf(filename, sizeof(filename), "%s_cpp.txt", tensor->name);
575+
576+
FILE * f = fopen(filename, "w");
577+
if (!f) {
578+
fprintf(stderr, "Failed to open %s\n", filename);
579+
return;
580+
}
581+
582+
// Check tensor size and warn if too large
583+
int64_t total_elements = ggml_nelements(tensor);
584+
fprintf(stderr, "Saving tensor %s (%lld elements) to %s\n",
585+
tensor->name, (long long)total_elements, filename);
586+
587+
if (total_elements > 10000000) { // 10M elements
588+
fprintf(stderr, "Warning: tensor is very large (%lld elements), this may take time\n",
589+
(long long)total_elements);
590+
}
591+
592+
const uint8_t * data = (data_ptr) ? data_ptr : (uint8_t *) tensor->data;
593+
ggml_type type = tensor->type;
594+
const int64_t * ne = tensor->ne;
595+
const size_t * nb = tensor->nb;
596+
597+
// Use a buffer to reduce I/O calls
598+
const size_t BUF_SIZE = 8192;
599+
char * buf = (char *) malloc(BUF_SIZE);
600+
if (!buf) {
601+
fprintf(stderr, "Failed to allocate buffer\n");
602+
fclose(f);
603+
return;
604+
}
605+
size_t buf_pos = 0;
606+
607+
// Helper lambda to flush buffer
608+
auto flush_buf = [&]() {
609+
if (buf_pos > 0) {
610+
fwrite(buf, 1, buf_pos, f);
611+
buf_pos = 0;
612+
}
613+
};
614+
615+
// Helper to append to buffer
616+
auto append = [&](const char * str, size_t len) {
617+
if (buf_pos + len >= BUF_SIZE) {
618+
flush_buf();
619+
}
620+
if (len >= BUF_SIZE) {
621+
// String too large for buffer, write directly
622+
fwrite(str, 1, len, f);
623+
} else {
624+
memcpy(buf + buf_pos, str, len);
625+
buf_pos += len;
626+
}
627+
};
628+
629+
auto append_str = [&](const char * str) {
630+
append(str, strlen(str));
631+
};
632+
633+
char num_buf[32];
634+
635+
// Write header once for all batches
636+
append_str(tensor->name);
637+
append_str(".data: [\n");
638+
639+
for (int64_t i3 = 0; i3 < ne[3]; i3++) {
640+
append_str(" [\n"); // Start of batch
641+
for (int64_t i2 = 0; i2 < ne[2]; i2++) {
642+
append_str(" [\n");
643+
for (int64_t i1 = 0; i1 < ne[1]; i1++) {
644+
append_str(" [");
645+
for (int64_t i0 = 0; i0 < ne[0]; i0++) {
646+
size_t i = i3 * nb[3] + i2 * nb[2] + i1 * nb[1] + i0 * nb[0];
647+
float v;
648+
if (type == GGML_TYPE_F16) {
649+
v = ggml_fp16_to_fp32(*(ggml_fp16_t *) &data[i]);
650+
} else if (type == GGML_TYPE_F32) {
651+
v = *(float *) &data[i];
652+
} else if (type == GGML_TYPE_I32) {
653+
v = (float) *(int32_t *) &data[i];
654+
} else if (type == GGML_TYPE_I16) {
655+
v = (float) *(int16_t *) &data[i];
656+
} else if (type == GGML_TYPE_I8) {
657+
v = (float) *(int8_t *) &data[i];
658+
} else {
659+
GGML_ABORT("fatal error");
660+
}
661+
int len = snprintf(num_buf, sizeof(num_buf), "%8.4f", v);
662+
append(num_buf, len);
663+
if (i0 < ne[0] - 1) append_str(", ");
664+
}
665+
append_str("],\n");
666+
}
667+
append_str(" ],\n");
668+
}
669+
append_str(" ]"); // End of batch
670+
if (i3 < ne[3] - 1) {
671+
append_str(",\n"); // Comma between batches
672+
} else {
673+
append_str("\n");
674+
}
675+
}
676+
677+
append_str("]\n"); // Close the top-level array
678+
679+
flush_buf();
680+
free(buf);
681+
fclose(f);
682+
fprintf(stderr, "Tensor saved successfully\n");
503683
}
504684

505685
//

0 commit comments

Comments
 (0)