Commit 61bde8e
authored
vulkan: Reduce temporary memory usage for TOP_K (#17623)
- Compute row size for the temp buffer based on the output of the first pass.
- Update shader addressing math to use the output row size
- Pass the output row size as "ncols_output", what used to be "ncols_output" is now "k"
For the common case of K=40 and src0=(200000,1,1,1), this reduces the temporary buffer
from about 3.2MB to 500KB.1 parent e251e5e commit 61bde8e
File tree
3 files changed
+54
-27
lines changed- ggml/src/ggml-vulkan
- vulkan-shaders
3 files changed
+54
-27
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
1227 | 1227 | | |
1228 | 1228 | | |
1229 | 1229 | | |
| 1230 | + | |
1230 | 1231 | | |
1231 | 1232 | | |
1232 | 1233 | | |
| |||
1673 | 1674 | | |
1674 | 1675 | | |
1675 | 1676 | | |
| 1677 | + | |
| 1678 | + | |
| 1679 | + | |
| 1680 | + | |
| 1681 | + | |
| 1682 | + | |
| 1683 | + | |
| 1684 | + | |
1676 | 1685 | | |
1677 | 1686 | | |
1678 | 1687 | | |
| |||
10345 | 10354 | | |
10346 | 10355 | | |
10347 | 10356 | | |
10348 | | - | |
| 10357 | + | |
10349 | 10358 | | |
10350 | | - | |
10351 | | - | |
10352 | | - | |
10353 | | - | |
10354 | | - | |
10355 | | - | |
10356 | | - | |
10357 | | - | |
10358 | | - | |
10359 | 10359 | | |
10360 | 10360 | | |
10361 | 10361 | | |
| |||
10370 | 10370 | | |
10371 | 10371 | | |
10372 | 10372 | | |
| 10373 | + | |
| 10374 | + | |
10373 | 10375 | | |
10374 | | - | |
10375 | 10376 | | |
10376 | 10377 | | |
10377 | 10378 | | |
| |||
10411 | 10412 | | |
10412 | 10413 | | |
10413 | 10414 | | |
| 10415 | + | |
| 10416 | + | |
| 10417 | + | |
| 10418 | + | |
| 10419 | + | |
| 10420 | + | |
| 10421 | + | |
| 10422 | + | |
| 10423 | + | |
| 10424 | + | |
| 10425 | + | |
| 10426 | + | |
| 10427 | + | |
| 10428 | + | |
| 10429 | + | |
10414 | 10430 | | |
10415 | 10431 | | |
10416 | 10432 | | |
| |||
10436 | 10452 | | |
10437 | 10453 | | |
10438 | 10454 | | |
| 10455 | + | |
10439 | 10456 | | |
10440 | 10457 | | |
10441 | 10458 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
19 | 19 | | |
20 | 20 | | |
21 | 21 | | |
| 22 | + | |
22 | 23 | | |
23 | 24 | | |
24 | 25 | | |
| |||
36 | 37 | | |
37 | 38 | | |
38 | 39 | | |
39 | | - | |
| 40 | + | |
40 | 41 | | |
41 | 42 | | |
42 | 43 | | |
43 | 44 | | |
44 | 45 | | |
45 | 46 | | |
46 | 47 | | |
47 | | - | |
| 48 | + | |
48 | 49 | | |
49 | 50 | | |
50 | 51 | | |
| |||
84 | 85 | | |
85 | 86 | | |
86 | 87 | | |
87 | | - | |
| 88 | + | |
88 | 89 | | |
89 | | - | |
90 | | - | |
| 90 | + | |
| 91 | + | |
| 92 | + | |
| 93 | + | |
91 | 94 | | |
92 | | - | |
93 | | - | |
| 95 | + | |
| 96 | + | |
| 97 | + | |
| 98 | + | |
94 | 99 | | |
95 | 100 | | |
96 | 101 | | |
| |||
Lines changed: 14 additions & 9 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
25 | 25 | | |
26 | 26 | | |
27 | 27 | | |
| 28 | + | |
28 | 29 | | |
29 | 30 | | |
30 | 31 | | |
| |||
60 | 61 | | |
61 | 62 | | |
62 | 63 | | |
63 | | - | |
| 64 | + | |
64 | 65 | | |
65 | 66 | | |
66 | 67 | | |
67 | 68 | | |
68 | 69 | | |
69 | 70 | | |
70 | 71 | | |
71 | | - | |
| 72 | + | |
72 | 73 | | |
73 | 74 | | |
74 | 75 | | |
| |||
98 | 99 | | |
99 | 100 | | |
100 | 101 | | |
101 | | - | |
| 102 | + | |
102 | 103 | | |
103 | 104 | | |
104 | 105 | | |
| |||
139 | 140 | | |
140 | 141 | | |
141 | 142 | | |
142 | | - | |
| 143 | + | |
143 | 144 | | |
144 | 145 | | |
145 | 146 | | |
| |||
179 | 180 | | |
180 | 181 | | |
181 | 182 | | |
182 | | - | |
| 183 | + | |
183 | 184 | | |
184 | | - | |
185 | | - | |
| 185 | + | |
| 186 | + | |
| 187 | + | |
| 188 | + | |
186 | 189 | | |
187 | | - | |
188 | | - | |
| 190 | + | |
| 191 | + | |
| 192 | + | |
| 193 | + | |
189 | 194 | | |
190 | 195 | | |
191 | 196 | | |
| |||
0 commit comments