@@ -723,6 +723,12 @@ struct ggml_backend_sched {
723723 bool op_offload;
724724
725725 int debug;
726+
727+ // used for debugging graph reallocations [GGML_SCHED_DEBUG_REALLOC]
728+ // ref: https://github.com/ggml-org/llama.cpp/pull/17617
729+ int debug_realloc;
730+ int debug_graph_size;
731+ int debug_prev_graph_size;
726732};
727733
728734#define hash_id (tensor ) ggml_hash_find_or_insert(&sched->hash_set, tensor)
@@ -1289,6 +1295,11 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra
12891295 }
12901296
12911297 int graph_size = std::max (graph->n_nodes , graph->n_leafs ) + sched->n_splits *GGML_SCHED_MAX_SPLIT_INPUTS*2 *sched->n_copies ;
1298+
1299+ // remember the actual graph_size for performing reallocation checks later [GGML_SCHED_DEBUG_REALLOC]
1300+ sched->debug_prev_graph_size = sched->debug_graph_size ;
1301+ sched->debug_graph_size = graph_size;
1302+
12921303 if (sched->graph .size < graph_size) {
12931304 sched->graph .size = graph_size;
12941305 sched->graph .nodes = (ggml_tensor **) realloc (sched->graph .nodes , graph_size * sizeof (struct ggml_tensor *));
@@ -1395,14 +1406,21 @@ static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
13951406
13961407 // allocate graph
13971408 if (backend_ids_changed || !ggml_gallocr_alloc_graph (sched->galloc , &sched->graph )) {
1398- #ifdef GGML_SCHED_NO_REALLOC
1399- GGML_ABORT (" %s: failed to allocate graph, but graph re-allocation is disabled by GGML_SCHED_NO_REALLOC\n " , __func__);
1400- #endif
1401-
14021409#ifndef NDEBUG
14031410 GGML_LOG_DEBUG (" %s: failed to allocate graph, reserving (backend_ids_changed = %d)\n " , __func__, backend_ids_changed);
14041411#endif
14051412
1413+ if (sched->debug_realloc > 0 ) {
1414+ // we are interested only in situations where the graph was reallocated even though its size remained the same [GGML_SCHED_DEBUG_REALLOC]
1415+ // example: https://github.com/ggml-org/llama.cpp/pull/17143
1416+ const bool unexpected = !backend_ids_changed && sched->debug_prev_graph_size == sched->debug_graph_size ;
1417+
1418+ if (unexpected || sched->debug_realloc > 1 ) {
1419+ GGML_ABORT (" %s: unexpected graph reallocation (graph size = %d, nodes = %d, leafs = %d), debug_realloc = %d\n " , __func__,
1420+ sched->debug_graph_size , sched->graph .n_nodes , sched->graph .n_leafs , sched->debug_realloc );
1421+ }
1422+ }
1423+
14061424 // the re-allocation may cause the split inputs to be moved to a different address
14071425 // synchronize without ggml_backend_sched_synchronize to avoid changing cur_copy
14081426 for (int i = 0 ; i < sched->n_backends ; i++) {
@@ -1620,6 +1638,14 @@ ggml_backend_sched_t ggml_backend_sched_new(
16201638
16211639 const char * GGML_SCHED_DEBUG = getenv (" GGML_SCHED_DEBUG" );
16221640 sched->debug = GGML_SCHED_DEBUG ? atoi (GGML_SCHED_DEBUG) : 0 ;
1641+
1642+ sched->debug_realloc = 0 ;
1643+ #ifdef GGML_SCHED_NO_REALLOC
1644+ sched->debug_realloc = 1 ;
1645+ #endif
1646+ const char * GGML_SCHED_DEBUG_REALLOC = getenv (" GGML_SCHED_DEBUG_REALLOC" );
1647+ sched->debug_realloc = GGML_SCHED_DEBUG_REALLOC ? atoi (GGML_SCHED_DEBUG_REALLOC) : sched->debug_realloc ;
1648+
16231649 sched->n_backends = n_backends;
16241650 sched->n_copies = parallel ? GGML_SCHED_MAX_COPIES : 1 ;
16251651
@@ -1636,6 +1662,9 @@ ggml_backend_sched_t ggml_backend_sched_new(
16361662 sched->prev_node_backend_ids = (int *) calloc (nodes_size, sizeof (sched->prev_node_backend_ids [0 ]));
16371663 sched->prev_leaf_backend_ids = (int *) calloc (nodes_size, sizeof (sched->prev_leaf_backend_ids [0 ]));
16381664
1665+ sched->debug_graph_size = 0 ;
1666+ sched->debug_prev_graph_size = 0 ;
1667+
16391668 sched->context_buffer_size = ggml_sched_max_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2 *sizeof (struct ggml_tensor ) + ggml_graph_overhead_custom (graph_size, false );
16401669 sched->context_buffer = (char *) malloc (sched->context_buffer_size );
16411670
0 commit comments