-
Notifications
You must be signed in to change notification settings - Fork 1.2k
[feat]: add kunpeng kml support #1606
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -42,6 +42,9 @@ if(USE_CONDA_TOOLCHAIN) | |
| endif() | ||
| set(CMAKE_C_COMPILER ${CONDA_CC} CACHE FILEPATH "C compiler" FORCE) | ||
| set(CMAKE_CXX_COMPILER ${CONDA_CXX} CACHE FILEPATH "C++ compiler" FORCE) | ||
| elseif(KTRANSFORMERS_CPU_USE_KML) | ||
| set(CMAKE_C_COMPILER "/opt/HPCKit/25.1.0/compiler/gcc/bin/gcc" CACHE FILEPATH "C compiler" FORCE) | ||
| set(CMAKE_CXX_COMPILER "/opt/HPCKit/25.1.0/compiler/gcc/bin/g++" CACHE FILEPATH "C++ compiler" FORCE) | ||
| else() | ||
| # Prefer system compilers explicitly to avoid accidentally picking conda wrappers from PATH | ||
| if(EXISTS "/usr/bin/gcc" AND EXISTS "/usr/bin/g++") | ||
|
|
@@ -417,6 +420,9 @@ elseif(KTRANSFORMERS_USE_MUSA) | |
| endif() | ||
| elseif(KTRANSFORMERS_CPU_USE_KML) | ||
| message(STATUS "KML CPU detected") | ||
| include_directories("/opt/HPCKit/25.1.0/kml/gcc/include") | ||
| link_directories(/opt/HPCKit/25.1.0/kml/gcc/lib) | ||
|
Comment on lines
+423
to
+424
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These paths are also hardcoded, which harms portability. Please use the Also, note that |
||
|
|
||
| else() | ||
| message(STATUS "No GPU support enabled, building for CPU only") | ||
| add_compile_definitions(KTRANSFORMERS_CPU_ONLY=1) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -870,7 +870,7 @@ struct GemmKernelInt4 { | |
| int n_end = std::min(n, N_BLOCK * (ith + 1)); | ||
| return {n_start, n_end}; | ||
| } | ||
| static std::pair<int, int> split_range_m(int m, int ith, int mth) { | ||
| static std::pair<int, int> split_range_m(int m, int ith, int mth = 0) { | ||
| int n_start = M_BLOCK * ith; | ||
| int n_end = std::min(m, M_BLOCK * (ith + 1)); | ||
| return {n_start, n_end}; | ||
|
|
@@ -1106,12 +1106,18 @@ struct GemmKernelInt4 { | |
| } | ||
| } | ||
| // 对第二个维度分块的 apply scale | ||
| static void apply_scale(int m, int n, float *c, BufferA *ba, BufferB *bb, BufferC *bc, int ith, int nth, int block) { | ||
| static void apply_scale(int m, int n, float *c, BufferA *ba, BufferB *bb, BufferC *bc, int ith, int nth, int block, int jth = -1) { | ||
| // printf("use split apply scale\n"); | ||
| auto [n_start, n_end] = split_range_n_block(n, ith, nth, block); | ||
| int m_start = 0, m_end = m; | ||
| if (jth != -1) { | ||
| auto tmp = split_range_m(m, jth); | ||
| m_start = tmp.first; | ||
| m_end = tmp.second; | ||
| } | ||
| // TODO: 后续用 SVE 来加速 | ||
| for (int m_begin = 0; m_begin < m; m_begin += M_STEP) { | ||
| for (int i = 0; i < M_STEP && m_begin + i < m; i++) { | ||
| for (int m_begin = m_start; m_begin < m_end; m_begin += M_STEP) { | ||
| for (int i = 0; i < M_STEP && m_begin + i < m_end; i++) { | ||
| float *scale_a = ba->get_scale(m, m_begin + i); | ||
|
Comment on lines
+1109
to
1121
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This change is also present in |
||
| for (int n_begin = n_start; n_begin < n_end; n_begin += N_STEP) { | ||
| for (int j = 0; j < N_STEP && n_begin + j < n_end; j++) { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hardcoding absolute paths for compilers makes the build configuration fragile and not portable. This will cause build failures on any machine with a different setup. It's better to define a variable for the root path of the toolkit and use it here.
You should define
HPCKIT_ROOTbefore this block, for example, near the other options:Then you can use it in this block and for the include/link directories later.