Skip to content

Commit 90c72a6

Browse files
authored
ggml : extend the GGML_SCHED_NO_REALLOC debug logic of the scheduler (ggml-org#17617)
1 parent 6eea666 commit 90c72a6

File tree

1 file changed

+33
-4
lines changed

1 file changed

+33
-4
lines changed

ggml/src/ggml-backend.cpp

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -723,6 +723,12 @@ struct ggml_backend_sched {
723723
bool op_offload;
724724

725725
int debug;
726+
727+
// used for debugging graph reallocations [GGML_SCHED_DEBUG_REALLOC]
728+
// ref: https://github.com/ggml-org/llama.cpp/pull/17617
729+
int debug_realloc;
730+
int debug_graph_size;
731+
int debug_prev_graph_size;
726732
};
727733

728734
#define hash_id(tensor) ggml_hash_find_or_insert(&sched->hash_set, tensor)
@@ -1289,6 +1295,11 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra
12891295
}
12901296

12911297
int graph_size = std::max(graph->n_nodes, graph->n_leafs) + sched->n_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2*sched->n_copies;
1298+
1299+
// remember the actual graph_size for performing reallocation checks later [GGML_SCHED_DEBUG_REALLOC]
1300+
sched->debug_prev_graph_size = sched->debug_graph_size;
1301+
sched->debug_graph_size = graph_size;
1302+
12921303
if (sched->graph.size < graph_size) {
12931304
sched->graph.size = graph_size;
12941305
sched->graph.nodes = (ggml_tensor **) realloc(sched->graph.nodes, graph_size * sizeof(struct ggml_tensor *));
@@ -1395,14 +1406,21 @@ static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
13951406

13961407
// allocate graph
13971408
if (backend_ids_changed || !ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) {
1398-
#ifdef GGML_SCHED_NO_REALLOC
1399-
GGML_ABORT("%s: failed to allocate graph, but graph re-allocation is disabled by GGML_SCHED_NO_REALLOC\n", __func__);
1400-
#endif
1401-
14021409
#ifndef NDEBUG
14031410
GGML_LOG_DEBUG("%s: failed to allocate graph, reserving (backend_ids_changed = %d)\n", __func__, backend_ids_changed);
14041411
#endif
14051412

1413+
if (sched->debug_realloc > 0) {
1414+
// we are interested only in situations where the graph was reallocated even though its size remained the same [GGML_SCHED_DEBUG_REALLOC]
1415+
// example: https://github.com/ggml-org/llama.cpp/pull/17143
1416+
const bool unexpected = !backend_ids_changed && sched->debug_prev_graph_size == sched->debug_graph_size;
1417+
1418+
if (unexpected || sched->debug_realloc > 1) {
1419+
GGML_ABORT("%s: unexpected graph reallocation (graph size = %d, nodes = %d, leafs = %d), debug_realloc = %d\n", __func__,
1420+
sched->debug_graph_size, sched->graph.n_nodes, sched->graph.n_leafs, sched->debug_realloc);
1421+
}
1422+
}
1423+
14061424
// the re-allocation may cause the split inputs to be moved to a different address
14071425
// synchronize without ggml_backend_sched_synchronize to avoid changing cur_copy
14081426
for (int i = 0; i < sched->n_backends; i++) {
@@ -1620,6 +1638,14 @@ ggml_backend_sched_t ggml_backend_sched_new(
16201638

16211639
const char * GGML_SCHED_DEBUG = getenv("GGML_SCHED_DEBUG");
16221640
sched->debug = GGML_SCHED_DEBUG ? atoi(GGML_SCHED_DEBUG) : 0;
1641+
1642+
sched->debug_realloc = 0;
1643+
#ifdef GGML_SCHED_NO_REALLOC
1644+
sched->debug_realloc = 1;
1645+
#endif
1646+
const char * GGML_SCHED_DEBUG_REALLOC = getenv("GGML_SCHED_DEBUG_REALLOC");
1647+
sched->debug_realloc = GGML_SCHED_DEBUG_REALLOC ? atoi(GGML_SCHED_DEBUG_REALLOC) : sched->debug_realloc;
1648+
16231649
sched->n_backends = n_backends;
16241650
sched->n_copies = parallel ? GGML_SCHED_MAX_COPIES : 1;
16251651

@@ -1636,6 +1662,9 @@ ggml_backend_sched_t ggml_backend_sched_new(
16361662
sched->prev_node_backend_ids = (int *) calloc(nodes_size, sizeof(sched->prev_node_backend_ids[0]));
16371663
sched->prev_leaf_backend_ids = (int *) calloc(nodes_size, sizeof(sched->prev_leaf_backend_ids[0]));
16381664

1665+
sched->debug_graph_size = 0;
1666+
sched->debug_prev_graph_size = 0;
1667+
16391668
sched->context_buffer_size = ggml_sched_max_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + ggml_graph_overhead_custom(graph_size, false);
16401669
sched->context_buffer = (char *) malloc(sched->context_buffer_size);
16411670

0 commit comments

Comments
 (0)