We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent adb72c5 commit 3683721Copy full SHA for 3683721
ggml/src/ggml-cuda/cpy.cu
@@ -394,18 +394,18 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
394
ne01n = ne01*ne02;
395
ne02n = 1;
396
}
397
- ne00 = ne00n ;
+ ne00 = ne00n;
398
ne01 = ne01n;
399
ne02 = ne02n;
400
can_be_transposed = true;
401
402
- if ((nb02 == (int64_t)ggml_element_size(src0) &&
+ if ((nb02 == (int64_t)ggml_element_size(src0) && nb00 <= nb01 &&
403
nb01 == ne02 * ne00 * (int64_t)ggml_element_size(src0))) {
404
- GGML_ASSERT(nb00 <= nb01);
+ // GGML_ASSERT(nb00 <= nb01);
405
ne00n = ne00*ne01;
406
ne01n = ne02;
407
ne02n = 1; // not used
408
409
410
411
tests/test-backend-ops.cpp
@@ -7928,6 +7928,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
7928
test_cases.emplace_back(new test_cpy(GGML_TYPE_F16, GGML_TYPE_F16, {768, 1024, 256, 1}, {0, 0, 0, 0}, {0, 0, 0, 0}, true));
7929
test_cases.emplace_back(new test_cpy(GGML_TYPE_BF16, GGML_TYPE_BF16, {768, 1024, 256, 1}, {0, 0, 0, 0}, {0, 0, 0, 0}, true));
7930
7931
+ // sd.cpp cases
7932
test_cases.emplace_back(new test_cpy(GGML_TYPE_BF16, GGML_TYPE_BF16, {4352, 1, 9216, 1}, {1, 2, 0, 3}, {0, 0, 0, 0}));
7933
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {4352, 1, 9216, 1}, {1, 2, 0, 3}, {0, 0, 0, 0}));
7934
test_cases.emplace_back(new test_cpy(GGML_TYPE_BF16, GGML_TYPE_BF16, {21504, 4352, 1, 1}, {2, 0, 1, 3}, {0, 0, 0, 0}));
0 commit comments