Skip to content

Commit 3683721

Browse files
committed
minor tweak
1 parent adb72c5 commit 3683721

File tree

2 files changed

+5
-4
lines changed

2 files changed

+5
-4
lines changed

ggml/src/ggml-cuda/cpy.cu

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -394,18 +394,18 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
394394
ne01n = ne01*ne02;
395395
ne02n = 1;
396396
}
397-
ne00 = ne00n ;
397+
ne00 = ne00n;
398398
ne01 = ne01n;
399399
ne02 = ne02n;
400400
can_be_transposed = true;
401401
}
402-
if ((nb02 == (int64_t)ggml_element_size(src0) &&
402+
if ((nb02 == (int64_t)ggml_element_size(src0) && nb00 <= nb01 &&
403403
nb01 == ne02 * ne00 * (int64_t)ggml_element_size(src0))) {
404-
GGML_ASSERT(nb00 <= nb01);
404+
// GGML_ASSERT(nb00 <= nb01);
405405
ne00n = ne00*ne01;
406406
ne01n = ne02;
407407
ne02n = 1; // not used
408-
ne00 = ne00n ;
408+
ne00 = ne00n;
409409
ne01 = ne01n;
410410
ne02 = ne02n;
411411
can_be_transposed = true;

tests/test-backend-ops.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7928,6 +7928,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
79287928
test_cases.emplace_back(new test_cpy(GGML_TYPE_F16, GGML_TYPE_F16, {768, 1024, 256, 1}, {0, 0, 0, 0}, {0, 0, 0, 0}, true));
79297929
test_cases.emplace_back(new test_cpy(GGML_TYPE_BF16, GGML_TYPE_BF16, {768, 1024, 256, 1}, {0, 0, 0, 0}, {0, 0, 0, 0}, true));
79307930

7931+
// sd.cpp cases
79317932
test_cases.emplace_back(new test_cpy(GGML_TYPE_BF16, GGML_TYPE_BF16, {4352, 1, 9216, 1}, {1, 2, 0, 3}, {0, 0, 0, 0}));
79327933
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {4352, 1, 9216, 1}, {1, 2, 0, 3}, {0, 0, 0, 0}));
79337934
test_cases.emplace_back(new test_cpy(GGML_TYPE_BF16, GGML_TYPE_BF16, {21504, 4352, 1, 1}, {2, 0, 1, 3}, {0, 0, 0, 0}));

0 commit comments

Comments
 (0)