-
Notifications
You must be signed in to change notification settings - Fork 14
bitonic sort sample #209
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
bitonic sort sample #209
Changes from all commits
fd346a0
547e518
6544e04
8de0c0f
9923294
800802b
446d487
2f3126f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,24 @@ | ||
| include(common RESULT_VARIABLE RES) | ||
| if(NOT RES) | ||
| message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") | ||
| endif() | ||
|
|
||
| nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") | ||
|
|
||
| if(NBL_EMBED_BUILTIN_RESOURCES) | ||
| set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) | ||
| set(RESOURCE_DIR "app_resources") | ||
|
|
||
| get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) | ||
| get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) | ||
| get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) | ||
|
|
||
| file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") | ||
| foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) | ||
| LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") | ||
| endforeach() | ||
|
|
||
| ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") | ||
|
|
||
| LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) | ||
| endif() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,70 @@ | ||
| #include "common.hlsl" | ||
| #include "nbl/builtin/hlsl/workgroup/basic.hlsl" | ||
| #include "nbl/builtin/hlsl/workgroup/bitonic_sort.hlsl" | ||
|
|
||
| [[vk::push_constant]] PushConstantData pushConstants; | ||
|
|
||
| using namespace nbl::hlsl; | ||
|
|
||
| using BitonicSortConfig = workgroup::bitonic_sort::bitonic_sort_config<ElementsPerThreadLog2, WorkgroupSizeLog2, uint32_t, uint32_t, less<uint32_t> >; | ||
|
|
||
| NBL_CONSTEXPR uint32_t WorkgroupSize = BitonicSortConfig::WorkgroupSize; | ||
|
|
||
| groupshared uint32_t sharedmem[BitonicSortConfig::SharedmemDWORDs]; | ||
|
|
||
| uint32_t3 glsl::gl_WorkGroupSize() { return uint32_t3(uint32_t(BitonicSortConfig::WorkgroupSize), 1, 1); } | ||
|
|
||
| struct SharedMemoryAccessor | ||
| { | ||
| template <typename AccessType, typename IndexType> | ||
| void set(IndexType idx, AccessType value) | ||
| { | ||
| sharedmem[idx] = value; | ||
| } | ||
|
|
||
| template <typename AccessType, typename IndexType> | ||
| void get(IndexType idx, NBL_REF_ARG(AccessType) value) | ||
| { | ||
| value = sharedmem[idx]; | ||
| } | ||
|
|
||
| void workgroupExecutionAndMemoryBarrier() | ||
| { | ||
| glsl::barrier(); | ||
| } | ||
| }; | ||
|
|
||
| struct Accessor | ||
| { | ||
| static Accessor create(const uint64_t address) | ||
| { | ||
| Accessor accessor; | ||
| accessor.address = address; | ||
| return accessor; | ||
| } | ||
|
|
||
| template <typename AccessType, typename IndexType> | ||
| void get(const IndexType index, NBL_REF_ARG(AccessType) value) | ||
| { | ||
| value = vk::RawBufferLoad<AccessType>(address + index * sizeof(AccessType)); | ||
| } | ||
|
|
||
| template <typename AccessType, typename IndexType> | ||
| void set(const IndexType index, const AccessType value) | ||
| { | ||
| vk::RawBufferStore<AccessType>(address + index * sizeof(AccessType), value); | ||
| } | ||
|
|
||
| uint64_t address; | ||
| }; | ||
|
|
||
| [numthreads(BitonicSortConfig::WorkgroupSize, 1, 1)] | ||
| [shader("compute")] | ||
| void main() | ||
| { | ||
| Accessor accessor = Accessor::create(pushConstants.deviceBufferAddress); | ||
| SharedMemoryAccessor sharedmemAccessor; | ||
|
|
||
| // The sort handles load/store internally | ||
| workgroup::BitonicSort<BitonicSortConfig>::template __call<Accessor, SharedMemoryAccessor>(accessor, sharedmemAccessor); | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
| #ifndef _BITONIC_SORT_COMMON_INCLUDED_ | ||
| #define _BITONIC_SORT_COMMON_INCLUDED_ | ||
| #include "nbl/builtin/hlsl/cpp_compat.hlsl" | ||
|
|
||
| struct PushConstantData | ||
| { | ||
| uint64_t deviceBufferAddress; | ||
| }; | ||
|
|
||
| NBL_CONSTEXPR uint32_t WorkgroupSizeLog2 = 10; // 1024 threads (2^10) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 512 is optimal residency on all GPUs
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so log2 of 9 |
||
| NBL_CONSTEXPR uint32_t ElementsPerThreadLog2 = 2; // 4 elements per thread (2^2) - VIRTUAL THREADING! | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wouldn't really call that virtual threads, virtual threads is if you are able to make a workgroup of size lets say 512 behave as if its 4096 processing multiple elements per invocation is an orthogonal extra to that and it helps with |
||
| NBL_CONSTEXPR uint32_t elementCount = uint32_t(1) << (WorkgroupSizeLog2 + ElementsPerThreadLog2); // 4096 elements (2^12) | ||
| #endif | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| { | ||
| "enableParallelBuild": true, | ||
| "threadsPerBuildProcess" : 2, | ||
| "isExecuted": false, | ||
| "scriptPath": "", | ||
| "cmake": { | ||
| "configurations": [ "Release", "Debug", "RelWithDebInfo" ], | ||
| "buildModes": [], | ||
| "requiredOptions": [] | ||
| }, | ||
| "profiles": [ | ||
| { | ||
| "backend": "vulkan", // should be none | ||
| "platform": "windows", | ||
| "buildModes": [], | ||
| "runConfiguration": "Release", // we also need to run in Debug nad RWDI because foundational example | ||
| "gpuArchitectures": [] | ||
| } | ||
| ], | ||
| "dependencies": [], | ||
| "data": [ | ||
| { | ||
| "dependencies": [], | ||
| "command": [""], | ||
| "outputs": [] | ||
| } | ||
| ] | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
there's readymade BDA accessors you can use AFAIK