We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent a523479 commit 49a0651Copy full SHA for 49a0651
src/llama-kv-cache-fp8.cpp
@@ -664,7 +664,12 @@ size_t llama_kv_cache_fp8::size_k_bytes() const {
664
size_t llama_kv_cache_fp8::size_v_bytes() const {
665
size_t size_v_bytes = 0;
666
for (const auto & layer : layers) {
667
- size_v_bytes += ggml_nbytes(layer.v_fp8) + ggml_nbytes(layer.v_scale);
+ if (layer.v_fp8 != nullptr) {
668
+ size_v_bytes += ggml_nbytes(layer.v_fp8);
669
+ }
670
+ if (layer.v_scale != nullptr) {
671
+ size_v_bytes += ggml_nbytes(layer.v_scale);
672
673
}
674
return size_v_bytes;
675
0 commit comments