diff --git a/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl index cc22595444..ab7a87c7dd 100644 --- a/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl +++ b/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl @@ -69,7 +69,7 @@ NBL_CONCEPT_END( #include template -NBL_BOOL_CONCEPT GenericDataAccessor = GenericWriteAccessor && GenericWriteAccessor; +NBL_BOOL_CONCEPT GenericDataAccessor = GenericReadAccessor && GenericWriteAccessor; } } diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl new file mode 100644 index 0000000000..5509ce65c3 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -0,0 +1,133 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_ + +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ +template && hierarchical_image::LuminanceReadAccessor && Warp) +struct HierarchicalImage +{ + using scalar_type = T; + using vector2_type = vector; + using vector3_type = vector; + using vector4_type = vector; + LuminanceAccessor accessor; + uint32_t2 lumaMapSize; + bool lumaAspect2x1; + uint32_t2 lastWarpPixel; + + static vector2_type calculateSampleAndPdf(NBL_REF_ARG(scalar_type) rcpPdf, vector4_type dirsX, vector4_type dirsY, vector2_type unnormCoord, uint32_t2 lastWarpPixel) + { + // TODO(kevinyu): Convert float32_t to scalar_type + const float32_t2 interpolant = frac(unnormCoord); + const float32_t4x2 uvs = transpose(float32_t2x4(dirsX, dirsY)); + + const float32_t2 xDiffs[] = { + uvs[2] - uvs[3], + uvs[1] - uvs[0] + }; + const float32_t2 yVals[] = { + xDiffs[0] * interpolant.x + uvs[3], + xDiffs[1] * interpolant.x + uvs[0] + }; + const float32_t2 yDiff = yVals[1] - yVals[0]; + const float32_t2 uv = yDiff * interpolant.y + yVals[0]; + + // Note(kevinyu): sinTheta is calculated twice inside PostWarp::warp and PostWarp::forwardDensity + const float32_t3 L = PostWarp::warp(uv); + + const float detInterpolJacobian = determinant(float32_t2x2( + lerp(xDiffs[0], xDiffs[1], interpolant.y), // first column dFdx + yDiff // second column dFdy + )); + + rcpPdf = abs((detInterpolJacobian * scalar_t(lastWarpPixel.x * lastWarpPixel.y) / PostWarp::forwardDensity(uv)); + + return L; + } + + static HierarchicalImage create(NBL_CONST_REF_ARG(LuminanceAccessor) accessor, const uint32_t2 lumaMapSize, const bool lumaAspect2x1, const uint32_t2 warpSize) + { + HierarchicalImage result; + result.accessor = accessor; + result.lumaMapSize = lumaMapSize; + result.lumaAspect2x1 = lumaAspect2x1; + result.lastWarpPixel = warpSize - uint32_t2(1, 1); + return result; + } + + static vector binarySearch(const vector xi) + { + uint32_t2 p = uint32_t2(0, 0); + + if (aspect2x1) { + // TODO(kevinyu): Implement findMSB + const uint32_t2 mip2x1 = findMSB(lumaMapSize.x) - 1; + + // do one split in the X axis first cause penultimate full mip would have been 2x1 + p.x = impl::choseSecond(luminanceAccessor.fetch(uint32_t2(0, 0), mip2x1), luminanceAccessor.fetch(uint32_t2(0, 1), mip2x1), xi.x) ? 1 : 0; + } + + for (uint32_t i = mip2x1; i != 0;) + { + --i; + p <<= 1; + const float32_t4 values = luminanceAccessor.gather(p, i); + float32_t wx_0, wx_1; + { + const float32_t wy_0 = values[3] + values[2]; + const float32_t wy_1 = values[1] + values[0]; + if (impl::choseSecond(wy_0, wy_1, xi.y)) + { + p.y |= 1; + wx_0 = values[0]; + wx_1 = values[1]; + } + else + { + wx_0 = values[3]; + wx_1 = values[2]; + } + } + + if (impl::choseSecond(wx_0, wx_1, xi.x)) + p.x |= 1; + } + + // TODO(kevinyu): Add some comment why we add xi. + const float32_t2 directionUV = (float32_t2(p.x, p.y) + xi) / float32_t2(lumaMapSize); + return directionUV; + } + + uint32_t2 generate(NBL_REF_ARG(scalar_type) rcpPdf, vector xi) + { + const float32_t2 unnormCoord = xi * lastWarpPixel; + const float32_t2 warpSampleCoord = (unnormCoord + float32_t2(0.5f, 0.5f)) / float32_t2(warpmapSize.x, warpmapSize.y); + const float32_t2 dir0 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord + float32_t2(0, 1), lumaAspect2x1); + const float32_t2 dir1 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord + float32_t2(1, 1), lumaAspect2x1); + const float32_t2 dir2 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord + float32_t2(1, 0), lumaAspect2x1); + const float32_t2 dir3 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord, lumaAspect2x1); + + const float32_t4 dirsX = float32_t4(dir0.x, dir1.x, dir2.x, dir3.x); + const float32_t4 dirsY = float32_t4(dir1.y, dir1.y, dir2.y, dir3.y); + + return calculateSampleAndPdf(rcpPdf, dirsX, dirsY, unnormCoord, lastWarpPixel); + } +}; + +//TODO(kevinyu): Impelemnt cached warp map sampler + +} +} + +#endif diff --git a/include/nbl/builtin/hlsl/sampling/warp.hlsl b/include/nbl/builtin/hlsl/sampling/warp.hlsl new file mode 100644 index 0000000000..b8936c09f3 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/warp.hlsl @@ -0,0 +1,54 @@ +#ifndef _NBL_BUILTIN_HLSL_CONCEPTS_WARP_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CONCEPTS_WARP_INCLUDED_ + +#include "nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl" +#include "nbl/builtin/hlsl/fft/common.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ + +template +struct WarpResult +{ + C dst; + float32_t density; +}; + +namespace concepts +{ + +// declare concept +#define NBL_CONCEPT_NAME WARP +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (U) +// not the greatest syntax but works +#define NBL_CONCEPT_PARAM_0 (warper,U) +#define NBL_CONCEPT_PARAM_1 (xi,typename U::domain_type) +#define NBL_CONCEPT_PARAM_2 (dst,typename U::codomain_type) +// start concept +NBL_CONCEPT_BEGIN(3) +#define warper NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define xi NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define dst NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(U::domain_type)) + ((NBL_CONCEPT_REQ_TYPE)(U::codomain_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((warper.template warp(xi)) , ::nbl::hlsl::is_same_v, WarpResult)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((warper.template forwardDensity(xi)) , ::nbl::hlsl::is_same_v, float32_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((warper.template backwardDensity(dst)) , ::nbl::hlsl::is_same_v, float32_t)) +); +#undef dst +#undef xi +#undef warper +#include + +} +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl new file mode 100644 index 0000000000..095e138d60 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl @@ -0,0 +1,60 @@ +#ifndef _NBL_BUILTIN_HLSL_WARP_SPHERICAL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_WARP_SPHERICAL_INCLUDED_ + +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ +namespace warp +{ + struct Spherical + { + using domain_type = float32_t2; + using codomain_type = float32_t3; + + template ) + static WarpResult warp(const D uv) + { + const float32_t phi = 2 * uv.x * numbers::pi; + const float32_t theta = uv.y * numbers::pi; + float32_t3 dir; + dir.x = cos(uv.x * 2.f * numbers::pi); + dir.y = sqrt(1.f - dir.x * dir.x); + if (uv.x > 0.5f) dir.y = -dir.y; + const float32_t cosTheta = cos(theta); + float32_t sinTheta = (1.0 - cosTheta * cosTheta); + dir.xy *= sinTheta; + dir.z = cosTheta; + WarpResult warpResult; + warpResult.dst = dir; + warpResult.density = 1 / (sinTheta * numbers::pi * numbers::pi); + return warpResult; + } + + template ) + static float32_t forwardDensity(const D uv) + { + const float32_t theta = uv.y * numbers::pi; + return 1.0f / (sin(theta) * 2 * numbers::pi * numbers::pi); + + } + + template ) + static float32_t backwardDensity(const C dst) + { + return 1.0f / (sqrt(1.0f - dst.z * dst.z) * 2 * numbers::pi * numbers::pi); + } + }; + +} +} +} +} + +#endif \ No newline at end of file