@@ -602,7 +602,9 @@ static bool ggml_gallocr_is_own(ggml_gallocr_t galloc, struct ggml_tensor * t) {
602602}
603603
604604static bool ggml_gallocr_is_allocated (ggml_gallocr_t galloc , struct ggml_tensor * t ) {
605- return t -> data != NULL || ggml_gallocr_hash_get (galloc , t )-> allocated ;
605+ return t -> data != NULL // tensor data already set externally
606+ || t -> buffer // tensor on external buffer (but not yet allocated)
607+ || ggml_gallocr_is_own (galloc , t ); // tensor will be allocated by galloc
606608}
607609
608610// free the extra space at the end if the new tensor is smaller
@@ -820,7 +822,8 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
820822 }
821823}
822824
823- bool ggml_gallocr_reserve_n (ggml_gallocr_t galloc , struct ggml_cgraph * graph , const int * node_buffer_ids , const int * leaf_buffer_ids ) {
825+ static bool ggml_gallocr_reserve_n_impl (
826+ ggml_gallocr_t galloc , struct ggml_cgraph * graph , const int * node_buffer_ids , const int * leaf_buffer_ids , bool no_alloc ) {
824827 size_t min_hash_size = graph -> n_nodes + graph -> n_leafs ;
825828 // add 25% margin to avoid hash collisions
826829 min_hash_size += min_hash_size / 4 ;
@@ -925,23 +928,41 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
925928 size_t cur_size = galloc -> buffers [i ] ? ggml_vbuffer_size (galloc -> buffers [i ]) : 0 ;
926929 if (cur_size > 0 ) {
927930 GGML_LOG_DEBUG ("%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n" ,
928- __func__ , ggml_backend_buft_name (galloc -> bufts [i ]),
929- cur_size / 1024.0 / 1024.0 , new_size / 1024.0 / 1024.0 );
931+ __func__ , ggml_backend_buft_name (galloc -> bufts [i ]), cur_size / 1024.0 / 1024.0 , new_size / 1024.0 / 1024.0 );
930932 }
931933 }
932934#endif
933935 ggml_vbuffer_free (galloc -> buffers [i ]);
934- galloc -> buffers [i ] = ggml_vbuffer_alloc (galloc -> bufts [i ], galloc -> buf_tallocs [i ], GGML_BACKEND_BUFFER_USAGE_COMPUTE );
935- if (galloc -> buffers [i ] == NULL ) {
936- GGML_LOG_ERROR ("%s: failed to allocate %s buffer of size %zu\n" , __func__ , ggml_backend_buft_name (galloc -> bufts [i ]), new_size );
937- return false;
936+ if (no_alloc ) {
937+ galloc -> buffers [i ] = NULL ;
938+ } else {
939+ galloc -> buffers [i ] = ggml_vbuffer_alloc (galloc -> bufts [i ], galloc -> buf_tallocs [i ], GGML_BACKEND_BUFFER_USAGE_COMPUTE );
940+ if (galloc -> buffers [i ] == NULL ) {
941+ GGML_LOG_ERROR ("%s: failed to allocate %s buffer of size %zu\n" , __func__ , ggml_backend_buft_name (galloc -> bufts [i ]), new_size );
942+ return false;
943+ }
938944 }
939945 }
940946 }
941947
942948 return true;
943949}
944950
951+ void ggml_gallocr_reserve_n_size (
952+ ggml_gallocr_t galloc , struct ggml_cgraph * graph , const int * node_buffer_ids , const int * leaf_buffer_ids , size_t * sizes ) {
953+ GGML_ASSERT (ggml_gallocr_reserve_n_impl (galloc , graph , node_buffer_ids , leaf_buffer_ids , /*no_alloc =*/ true));
954+ for (int i = 0 ; i < galloc -> n_buffers ; i ++ ) {
955+ sizes [i ] = 0 ;
956+ for (int c = 0 ; c < galloc -> buf_tallocs [i ]-> n_chunks ; c ++ ) {
957+ sizes [i ] += galloc -> buf_tallocs [i ]-> chunks [c ]-> max_size ;
958+ }
959+ }
960+ }
961+
962+ bool ggml_gallocr_reserve_n (ggml_gallocr_t galloc , struct ggml_cgraph * graph , const int * node_buffer_ids , const int * leaf_buffer_ids ) {
963+ return ggml_gallocr_reserve_n_impl (galloc , graph , node_buffer_ids , leaf_buffer_ids , /*no_alloc =*/ false);
964+ }
965+
945966bool ggml_gallocr_reserve (ggml_gallocr_t galloc , struct ggml_cgraph * graph ) {
946967 return ggml_gallocr_reserve_n (galloc , graph , NULL , NULL );
947968}
@@ -1144,14 +1165,16 @@ static bool alloc_tensor_range(struct ggml_context * ctx,
11441165 return true;
11451166}
11461167
1147- ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft (struct ggml_context * ctx , ggml_backend_buffer_type_t buft ) {
1168+ static ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft_impl (
1169+ struct ggml_context * ctx , ggml_backend_buffer_type_t buft , size_t * nbytes_total , bool no_alloc ) {
11481170 GGML_ASSERT (ggml_get_no_alloc (ctx ) == true);
11491171
11501172 size_t alignment = ggml_backend_buft_get_alignment (buft );
11511173 size_t max_size = ggml_backend_buft_get_max_size (buft );
11521174
11531175 ggml_backend_buffer_t * buffers = NULL ;
11541176 size_t n_buffers = 0 ;
1177+ * nbytes_total = 0 ;
11551178
11561179 size_t cur_buf_size = 0 ;
11571180 struct ggml_tensor * first = ggml_get_first_tensor (ctx );
@@ -1163,10 +1186,11 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
11631186
11641187 if (cur_buf_size > 0 && (cur_buf_size + this_size ) > max_size ) {
11651188 // allocate tensors in the current buffer
1166- if (!alloc_tensor_range (ctx , first , t , buft , cur_buf_size , & buffers , & n_buffers )) {
1189+ if (!no_alloc && ! alloc_tensor_range (ctx , first , t , buft , cur_buf_size , & buffers , & n_buffers )) {
11671190 return NULL ;
11681191 }
11691192 first = t ;
1193+ * nbytes_total += cur_buf_size ;
11701194 cur_buf_size = this_size ;
11711195 } else {
11721196 cur_buf_size += this_size ;
@@ -1175,15 +1199,21 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
11751199
11761200 // allocate remaining tensors
11771201 if (cur_buf_size > 0 ) {
1178- if (!alloc_tensor_range (ctx , first , NULL , buft , cur_buf_size , & buffers , & n_buffers )) {
1202+ * nbytes_total += cur_buf_size ;
1203+ if (!no_alloc && !alloc_tensor_range (ctx , first , NULL , buft , cur_buf_size , & buffers , & n_buffers )) {
11791204 return NULL ;
11801205 }
11811206 }
11821207
1208+ if (no_alloc ) {
1209+ return NULL ;
1210+ }
1211+
11831212 if (n_buffers == 0 ) {
11841213#ifndef NDEBUG
11851214 GGML_LOG_DEBUG ("%s: all tensors in the context are already allocated\n" , __func__ );
11861215#endif
1216+ GGML_ASSERT (!buffers );
11871217 return NULL ;
11881218 }
11891219
@@ -1193,10 +1223,24 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
11931223 } else {
11941224 buffer = ggml_backend_multi_buffer_alloc_buffer (buffers , n_buffers );
11951225 }
1196- free (buffers );
1226+ if (buffers ) {
1227+ free (buffers ); // can be NULL if context is empty or no_alloc
1228+ }
11971229 return buffer ;
11981230}
11991231
1232+ size_t ggml_backend_alloc_ctx_tensors_from_buft_size (struct ggml_context * ctx , ggml_backend_buffer_type_t buft ) {
1233+ size_t nbytes_total = 0 ;
1234+ ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft_impl (ctx , buft , & nbytes_total , /*no_alloc=*/ true);
1235+ GGML_ASSERT (!buf );
1236+ return nbytes_total ;
1237+ }
1238+
1239+ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft (struct ggml_context * ctx , ggml_backend_buffer_type_t buft ) {
1240+ size_t nbytes_total = 0 ;
1241+ return ggml_backend_alloc_ctx_tensors_from_buft_impl (ctx , buft , & nbytes_total , /*no_alloc =*/ false);
1242+ }
1243+
12001244ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors (struct ggml_context * ctx , ggml_backend_t backend ) {
12011245 return ggml_backend_alloc_ctx_tensors_from_buft (ctx , ggml_backend_get_default_buffer_type (backend ));
12021246}
0 commit comments