@@ -29,8 +29,6 @@ file(GLOB kernel_primitive_h "primitive/*.h")
2929file (
3030 GLOB kernel_cu
3131 RELATIVE "${CMAKE_CURRENT_SOURCE_DIR} "
32- "gpu/*.cu"
33- "gpu/*.cu.cc"
3432 "gpudnn/*.cu"
3533 "kps/*.cu"
3634 "legacy/kps/*.cu"
@@ -40,18 +38,24 @@ file(
4038 "strings/gpu/*.cu"
4139 "fusion/gpu/*.cu" )
4240
41+ file (
42+ GLOB kernel_gpu
43+ RELATIVE "${CMAKE_CURRENT_SOURCE_DIR} "
44+ "gpu/*.cu" "gpu/*.cu.cc" )
45+
4346if (APPLE OR WIN32 )
4447 list (REMOVE_ITEM kernel_cu "fusion/gpu/fusion_group_kernel.cu" )
4548 list (REMOVE_ITEM kernel_cu "sparse/gpu/conv_kernel_igemm.cu" )
4649endif ()
4750
4851if (NOT WITH_DGC)
49- list (REMOVE_ITEM kernel_cu "gpu/dgc_kernel.cu" )
52+ list (REMOVE_ITEM kernel_gpu "gpu/dgc_kernel.cu" )
5053endif ()
5154
5255if (DEFINED REDUCE_INFERENCE_LIB_SIZE)
53- list (FILTER kernel_cu EXCLUDE REGEX ".*_grad_kernel\\ .cc$" )
5456 list (FILTER kernel_cu EXCLUDE REGEX ".*_grad_kernel\\ .cu$" )
57+ list (FILTER kernel_gpu EXCLUDE REGEX ".*_grad_kernel\\ .cc$" )
58+ list (FILTER kernel_gpu EXCLUDE REGEX ".*_grad_kernel\\ .cu$" )
5559endif ()
5660
5761if (WITH_CUTLASS)
@@ -216,6 +220,15 @@ if(WITH_ROCM)
216220 list (
217221 REMOVE_ITEM
218222 kernel_cu
223+ "gpudnn/mha_cudnn_frontend.cu"
224+ "fusion/gpu/blha_get_max_len.cu"
225+ "fusion/gpu/block_multi_head_attention_kernel.cu"
226+ "fusion/gpu/fused_bn_add_activation_grad_kernel.cu"
227+ "fusion/gpu/fused_bn_add_activation_kernel.cu"
228+ "fusion/gpu/fusion_transpose_flatten_concat_kernel.cu" )
229+ list (
230+ REMOVE_ITEM
231+ kernel_gpu
219232 "gpu/affine_grid_grad_kernel.cu"
220233 "gpu/apply_per_channel_scale_kernel.cu"
221234 "gpu/cholesky_solve_kernel.cu"
@@ -228,13 +241,7 @@ if(WITH_ROCM)
228241 "gpu/put_along_axis_grad_kernel.cu"
229242 "gpu/put_along_axis_kernel.cu"
230243 "gpu/qr_kernel.cu"
231- "gpu/svd_kernel.cu"
232- "gpudnn/mha_cudnn_frontend.cu"
233- "fusion/gpu/blha_get_max_len.cu"
234- "fusion/gpu/block_multi_head_attention_kernel.cu"
235- "fusion/gpu/fused_bn_add_activation_grad_kernel.cu"
236- "fusion/gpu/fused_bn_add_activation_kernel.cu"
237- "fusion/gpu/fusion_transpose_flatten_concat_kernel.cu" )
244+ "gpu/svd_kernel.cu" )
238245endif ()
239246
240247set (cc_search_pattern
@@ -291,6 +298,8 @@ file(
291298if (WITH_GPU OR WITH_ROCM)
292299 collect_srcs(kernels_srcs SRCS ${kernel_cu} )
293300 kernel_declare("${kernel_cu} " )
301+ collect_srcs(kernels_gpu_srcs SRCS ${kernel_gpu} )
302+ kernel_declare("${kernel_gpu} " )
294303endif ()
295304
296305if (WITH_XPU)
0 commit comments