-
Notifications
You must be signed in to change notification settings - Fork 613
[BugFix]This PR aims to fix the precision issue of the LoRA feature i… #4046
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
b3f16d4
536431b
7b2b59d
af719cf
9ec244e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -225,10 +225,8 @@ class BGMVShrink { | |
| } | ||
|
|
||
| // declare all dtype kernel | ||
| BGMV_SHRINK_TYPE_DECLARE(half) | ||
| #if (__CCE_AICORE__ >= 220) | ||
| BGMV_SHRINK_TYPE_DECLARE(bfloat16_t) | ||
| #endif | ||
| BGMV_SHRINK_TYPE_DECLARE(half); | ||
| BGMV_SHRINK_TYPE_DECLARE(bfloat16_t); | ||
|
|
||
| namespace vllm_ascend { | ||
| extern void bgmv_shrink_impl(AscendType type, void* stream, void* x, void* weight, void* indices, uint32_t indicesSize, | ||
|
|
@@ -240,10 +238,8 @@ extern void bgmv_shrink_impl(AscendType type, void* stream, void* x, void* weigh | |
| bgmv_shrink_half<<<blockDim, nullptr, stream>>>(x, weight, indices, indicesSize, y, batchSize, numTokensPerCore, | ||
| inputHiddenDim, maxLoRARank, scale); | ||
| } else if (type == AscendType::BF16) { | ||
| #if (__CCE_AICORE__ >= 220) | ||
| bgmv_shrink_bfloat16_t<<<blockDim, nullptr, stream>>>(x, weight, indices, indicesSize, y, batchSize, numTokensPerCore, | ||
| inputHiddenDim, maxLoRARank, scale); | ||
| #endif | ||
| bgmv_shrink_bfloat16_t<<<blockDim, nullptr, stream>>>(x, weight, indices, indicesSize, y, batchSize, numTokensPerCore, | ||
| inputHiddenDim, maxLoRARank, scale); | ||
|
Comment on lines
+241
to
+242
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You've removed the conditional compilation directive for the |
||
| } else { | ||
| return; | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -356,10 +356,8 @@ class SGMVExpand { | |
| } | ||
|
|
||
| // declare all dtype kernel | ||
| SGMV_EXPAND_TYPE_DECLARE(half) | ||
| #if (__CCE_AICORE__ >= 220) | ||
| SGMV_EXPAND_TYPE_DECLARE(bfloat16_t) | ||
| #endif | ||
| SGMV_EXPAND_TYPE_DECLARE(half); | ||
| SGMV_EXPAND_TYPE_DECLARE(bfloat16_t); | ||
|
|
||
| namespace vllm_ascend { | ||
| extern void sgmv_expand_impl(AscendType type, void* stream, void* x, void* weight, | ||
|
|
@@ -375,12 +373,10 @@ extern void sgmv_expand_impl(AscendType type, void* stream, void* x, void* weigh | |
| numTokensPerCore, maxLoRARank, outputHiddenDim, sliceOffset, | ||
| outputFullDim); | ||
| } else if (type == AscendType::BF16) { | ||
| #if (__CCE_AICORE__ >= 220) | ||
| sgmv_expand_bfloat16_t<<<blockDim, nullptr, stream>>>(x, weight, loraIndices, loraIndicesSize, | ||
| seqLen, seqLenSize, yIn, yOut, batchSize, | ||
| numTokensPerCore, maxLoRARank, outputHiddenDim, | ||
| sliceOffset, outputFullDim); | ||
| #endif | ||
| sgmv_expand_bfloat16_t<<<blockDim, nullptr, stream>>>(x, weight, loraIndices, loraIndicesSize, | ||
| seqLen, seqLenSize, yIn, yOut, batchSize, | ||
| numTokensPerCore, maxLoRARank, outputHiddenDim, | ||
| sliceOffset, outputFullDim); | ||
|
Comment on lines
+376
to
+379
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You've removed the conditional compilation directive for the |
||
| } else { | ||
| return; | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -241,10 +241,8 @@ class SGMVShrink { | |
| } | ||
|
|
||
| // declare all dtype kernel | ||
| SGMV_SHRINK_TYPE_DECLARE(half) | ||
| #if (__CCE_AICORE__ >= 220) | ||
| SGMV_SHRINK_TYPE_DECLARE(bfloat16_t) | ||
| #endif | ||
| SGMV_SHRINK_TYPE_DECLARE(half); | ||
| SGMV_SHRINK_TYPE_DECLARE(bfloat16_t); | ||
|
|
||
| namespace vllm_ascend { | ||
| extern void sgmv_shrink_impl(AscendType type, void* stream, void* x, void* weight, | ||
|
|
@@ -260,13 +258,11 @@ extern void sgmv_shrink_impl(AscendType type, void* stream, void* x, void* weigh | |
| numTokensPerCore, inputHiddenDim, maxLoRARank, | ||
| scale); | ||
| } else if (type == AscendType::BF16) { | ||
| #if (__CCE_AICORE__ >= 220) | ||
| sgmv_shrink_bfloat16_t<<<blockDim, nullptr, stream>>>(x, weight, loraIndices, loraIndicesSize, | ||
| seqLen, seqLenSize, | ||
| y, batchSize, | ||
| numTokensPerCore, inputHiddenDim, maxLoRARank, | ||
| scale); | ||
| #endif | ||
| sgmv_shrink_bfloat16_t<<<blockDim, nullptr, stream>>>(x, weight, loraIndices, loraIndicesSize, | ||
| seqLen, seqLenSize, | ||
| y, batchSize, | ||
| numTokensPerCore, inputHiddenDim, maxLoRARank, | ||
| scale); | ||
|
Comment on lines
+261
to
+265
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You've removed the conditional compilation directive for the |
||
| } else { | ||
| return; | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You've removed the conditional compilation directive for the
bgmv_expand_bfloat16_tkernel call, making it unconditional. However, the kernel's definition usingBGMV_EXPAND_TYPE_DECLARE(bfloat16_t)at line 346 is still inside an#if (__CCE_AICORE__ >= 220)block. This will cause a compilation error on platforms where__CCE_AICORE__ < 220, as the function will be called but not defined. You should also remove the conditional compilation around the kernel declaration to fix this.