Skip to content
Open

RWMC #218

Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 101 additions & 0 deletions 31_HLSLPathTracer/app_resources/hlsl/RWMCCascadeAccumulator.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#ifndef _NBL_HLSL_RWMC_INCLUDED_
#define _NBL_HLSL_RWMC_INCLUDED_
#include "nbl/builtin/hlsl/cpp_compat.hlsl"
#include <nbl/builtin/hlsl/vector_utils/vector_traits.hlsl>
#include <nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl>

namespace nbl
{
namespace hlsl
{
namespace rwmc
{

struct RWMCCascadeSettings
{
uint32_t size;
uint32_t start;
uint32_t base;
};

template<typename CascadeLayerType, uint32_t CascadeSize>
struct CascadeEntry
{
CascadeLayerType data[CascadeSize];
};

template<typename CascadeLayerType, uint32_t CascadeSize>
struct RWMCCascadeAccumulator
{
using output_storage_type = CascadeEntry<CascadeLayerType, CascadeSize>;
using initialization_data = RWMCCascadeSettings;
output_storage_type accumulation;
uint32_t cascadeSampleCounter[CascadeSize];
RWMCCascadeSettings cascadeSettings;

void initialize(in RWMCCascadeSettings settings)
{
for (int i = 0; i < CascadeSize; ++i)
{
accumulation.data[i] = (CascadeLayerType)0.0f;
cascadeSampleCounter[i] = 0u;
}

// TODO: pass these values to this function
cascadeSettings.size = settings.size;
cascadeSettings.start = settings.start;
cascadeSettings.base = settings.base;
}

typename vector_traits<CascadeLayerType>::scalar_type getLuma(NBL_CONST_REF_ARG(CascadeLayerType) col)
{
return hlsl::dot<CascadeLayerType>(hlsl::transpose(colorspace::scRGBtoXYZ)[1], col);
}

// most of this code is stolen from https://cg.ivd.kit.edu/publications/2018/rwmc/tool/split.cpp
void addSample(uint32_t sampleIndex, float32_t3 sample)
{
float lowerScale = cascadeSettings.start;
float upperScale = lowerScale * cascadeSettings.base;

const float luma = getLuma(sample);

uint32_t lowerCascadeIndex = 0u;
while (!(luma < upperScale) && lowerCascadeIndex < cascadeSettings.size - 2)
{
lowerScale = upperScale;
upperScale *= cascadeSettings.base;
++lowerCascadeIndex;
}

float lowerCascadeLevelWeight;
float higherCascadeLevelWeight;

if (luma <= lowerScale)
lowerCascadeLevelWeight = 1.0f;
else if (luma < upperScale)
lowerCascadeLevelWeight = max(0.0f, (lowerScale / luma - lowerScale / upperScale) / (1.0f - lowerScale / upperScale));
else // Inf, NaN ...
lowerCascadeLevelWeight = 0.0f;

if (luma < upperScale)
higherCascadeLevelWeight = max(0.0f, 1.0f - lowerCascadeLevelWeight);
else
higherCascadeLevelWeight = upperScale / luma;

uint32_t higherCascadeIndex = lowerCascadeIndex + 1u;

const uint32_t sampleCount = sampleIndex + 1u;
const float reciprocalSampleCount = 1.0f / float(sampleCount);
accumulation.data[lowerCascadeIndex] += (sample * lowerCascadeLevelWeight - (sampleCount - (cascadeSampleCounter[lowerCascadeIndex])) * accumulation.data[lowerCascadeIndex]) * reciprocalSampleCount;
accumulation.data[higherCascadeIndex] += (sample * higherCascadeLevelWeight - (sampleCount - (cascadeSampleCounter[higherCascadeIndex])) * accumulation.data[higherCascadeIndex]) * reciprocalSampleCount;
cascadeSampleCounter[lowerCascadeIndex] = sampleCount;
cascadeSampleCounter[higherCascadeIndex] = sampleCount;
}
};

}
}
}

#endif
42 changes: 33 additions & 9 deletions 31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl>
#include <nbl/builtin/hlsl/math/functions.hlsl>
#include <nbl/builtin/hlsl/bxdf/bxdf_traits.hlsl>
#include <nbl/builtin/hlsl/vector_utils/vector_traits.hlsl>

#include "rand_gen.hlsl"
#include "ray_gen.hlsl"
Expand Down Expand Up @@ -40,10 +41,33 @@ struct PathTracerCreationParams
BxDFCreation dielectricParams;
};

template<class RandGen, class RayGen, class Intersector, class MaterialSystem, /* class PathGuider, */ class NextEventEstimator>
// TODO: maybe implement a concept to ensure that OutputTypeVec is a vector?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes please concept needed!

template<typename OutputTypeVec>
struct DefaultAccumulator
{
struct DefaultAccumulatorInitializationSettings {};

using output_storage_type = OutputTypeVec;
using initialization_data = DefaultAccumulatorInitializationSettings;
output_storage_type accumulation;

void initialize(in initialization_data initializationData)
{
accumulation = (output_storage_type)0.0f;
}

void addSample(uint32_t sampleIndex, float32_t3 sample)
{
using ScalarType = typename vector_traits<OutputTypeVec>::scalar_type;
ScalarType rcpSampleSize = 1.0 / (sampleIndex + 1);
accumulation += (sample - accumulation) * rcpSampleSize;
}
};

template<class RandGen, class RayGen, class Intersector, class MaterialSystem, /* class PathGuider, */ class NextEventEstimator, class Accumulator>
struct Unidirectional
{
using this_t = Unidirectional<RandGen, RayGen, Intersector, MaterialSystem, NextEventEstimator>;
using this_t = Unidirectional<RandGen, RayGen, Intersector, MaterialSystem, NextEventEstimator, Accumulator>;
using randgen_type = RandGen;
using raygen_type = RayGen;
using intersector_type = Intersector;
Expand All @@ -53,6 +77,7 @@ struct Unidirectional
using scalar_type = typename MaterialSystem::scalar_type;
using vector3_type = vector<scalar_type, 3>;
using measure_type = typename MaterialSystem::measure_type;
using output_storage_type = typename Accumulator::output_storage_type;
using sample_type = typename NextEventEstimator::sample_type;
using ray_dir_info_type = typename sample_type::ray_dir_info_type;
using ray_type = typename RayGen::ray_type;
Expand Down Expand Up @@ -266,10 +291,11 @@ struct Unidirectional
}

// Li
measure_type getMeasure(uint32_t numSamples, uint32_t depth, NBL_CONST_REF_ARG(scene_type) scene)
output_storage_type getMeasure(uint32_t numSamples, uint32_t depth, NBL_CONST_REF_ARG(scene_type) scene, NBL_REF_ARG(typename Accumulator::initialization_data) accumulatorInitData)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

one suggestion to refactor, take sampleIndex (the i in the loop) and do the loop outside the path tracer (initialize the accumulator outside as well)

Also I know its not your code, but better rename depth to maxDepth

{
measure_type Li = (measure_type)0.0;
scalar_type meanLumaSq = 0.0;
Accumulator accumulator;
accumulator.initialize(accumulatorInitData);
//scalar_type meanLumaSq = 0.0;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

take accumulator byt deferend from the outside, otherwise static polymorphism gets harder with stateful accumulators

Consider this scenario, I already have an accumulator, and I just want to add a fwe samples

for (uint32_t i = 0; i < numSamples; i++)
{
vector3_type uvw = rand3d(0u, i, randGen.rng()); // TODO: take from scramblebuf?
Expand All @@ -290,16 +316,14 @@ struct Unidirectional
if (!hit)
missProgram(ray);

measure_type accumulation = ray.payload.accumulation;
scalar_type rcpSampleSize = 1.0 / (i + 1);
Li += (accumulation - Li) * rcpSampleSize;
accumulator.addSample(i, ray.payload.accumulation);

// TODO: visualize high variance

// TODO: russian roulette early exit?
}

return Li;
return accumulator.accumulation;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

take accumulator by reference and make the function void

}

NBL_CONSTEXPR_STATIC_INLINE uint32_t MAX_DEPTH_LOG2 = 4u;
Expand Down
58 changes: 53 additions & 5 deletions 31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,32 @@
#define BXDF_COUNT 7

#include "render_common.hlsl"
#include "rwmc_global_settings_common.hlsl"

#ifdef RWMC_ENABLED
#include "RWMCCascadeAccumulator.hlsl"
#include "render_rwmc_common.hlsl"
#endif

#ifdef RWMC_ENABLED
[[vk::push_constant]] RenderRWMCPushConstants pc;
#else
[[vk::push_constant]] RenderPushConstants pc;
#endif

[[vk::combinedImageSampler]] [[vk::binding(0, 2)]] Texture2D<float3> envMap; // unused
[[vk::combinedImageSampler]] [[vk::binding(0, 2)]] SamplerState envSampler;

[[vk::binding(1, 2)]] Buffer<uint3> sampleSequence;

[[vk::combinedImageSampler]] [[vk::binding(2, 2)]] Texture2D<uint2> scramblebuf; // unused
[[vk::combinedImageSampler]] [[vk::binding(2, 2)]] SamplerState scrambleSampler;

#ifdef RWMC_ENABLED
[[vk::image_format("rgba16f")]] [[vk::binding(0, 1)]] RWTexture2DArray<float32_t4> cascade;
#endif
[[vk::image_format("rgba16f")]] [[vk::binding(0, 0)]] RWTexture2D<float32_t4> outImage;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

always use an array view (without RWMC you can just make the array 1 layer), and use the same binding

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if you're only writing the image (not loading) you can skip declaring the format, because we use the Unformatted Storage extension

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

after taking another look at the code, i figured it's best to keep cascade and outImage in two separate descriptor sets. this way descriptor set 0 (the one with outImage only) can be reused across different shaders.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

moved everything to ds0


#include "pathtracer.hlsl"

using namespace nbl;
Expand Down Expand Up @@ -96,7 +122,14 @@ using raygen_type = ext::RayGen::Basic<ray_type>;
using intersector_type = ext::Intersector::Comprehensive<ray_type, light_type, bxdfnode_type>;
using material_system_type = ext::MaterialSystem::System<diffuse_bxdf_type, conductor_bxdf_type, dielectric_bxdf_type>;
using nee_type = ext::NextEventEstimator::Estimator<scene_type, ray_type, sample_t, aniso_interaction, ext::IntersectMode::IM_PROCEDURAL, LIGHT_TYPE, POLYGON_METHOD>;
using pathtracer_type = ext::PathTracer::Unidirectional<randgen_type, raygen_type, intersector_type, material_system_type, nee_type>;

#ifdef RWMC_ENABLED
using accumulator_type = rwmc::RWMCCascadeAccumulator<float32_t3, CascadeSize>;
#else
using accumulator_type = ext::PathTracer::DefaultAccumulator<float32_t3>;
#endif

using pathtracer_type = ext::PathTracer::Unidirectional<randgen_type, raygen_type, intersector_type, material_system_type, nee_type, accumulator_type>;

static const ext::Shape<ext::PST_SPHERE> spheres[SPHERE_COUNT] = {
ext::Shape<ext::PST_SPHERE>::create(float3(0.0, -100.5, -1.0), 100.0, 0u, light_type::INVALID_ID),
Expand Down Expand Up @@ -129,7 +162,7 @@ static const ext::Shape<ext::PST_RECTANGLE> rectangles[1];
#endif

static const light_type lights[LIGHT_COUNT] = {
light_type::create(spectral_t(30.0,25.0,15.0),
light_type::create(LightEminence,
#ifdef SPHERE_LIGHT
8u,
#else
Expand Down Expand Up @@ -217,9 +250,24 @@ void main(uint32_t3 threadID : SV_DispatchThreadID)

pathtracer_type pathtracer = pathtracer_type::create(ptCreateParams);

float32_t3 color = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene);
float32_t4 pixCol = float32_t4(color, 1.0);
outImage[coords] = pixCol;
#ifdef RWMC_ENABLED
accumulator_type::initialization_data accumulatorInitData;
accumulatorInitData.size = CascadeSize;
accumulatorInitData.start = pc.start;
accumulatorInitData.base = pc.base;
accumulator_type::output_storage_type cascadeEntry = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene, accumulatorInitData);
for (uint32_t i = 0; i < CascadeSize; ++i)
{
float32_t4 cascadeLayerEntry = float32_t4(cascadeEntry.data[i], 1.0f);
cascade[uint3(coords.x, coords.y, i)] = cascadeLayerEntry;
}
#else
accumulator_type::initialization_data accumulatorInitData;
float32_t3 color = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene, accumulatorInitData);
outImage[coords] = float32_t4(color, 1.0);
#endif



#ifdef PERSISTENT_WORKGROUPS
}
Expand Down
23 changes: 11 additions & 12 deletions 31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl
Original file line number Diff line number Diff line change
@@ -1,23 +1,22 @@
#ifndef _NBL_HLSL_PATHTRACER_RENDER_COMMON_INCLUDED_
#define _NBL_HLSL_PATHTRACER_RENDER_COMMON_INCLUDED_
#include "nbl/builtin/hlsl/cpp_compat.hlsl"

struct SPushConstants
#ifndef __HLSL_VERSION
#include "matrix4SIMD.h"
#endif

struct RenderPushConstants
{
#ifdef __HLSL_VERSION
float32_t4x4 invMVP;
#else
nbl::core::matrix4SIMD invMVP;
#endif
Comment on lines 4 to 15

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no we use hlsl::float32_t4x4 in C++ and float32_t4x4 in HLSL only!

int sampleCount;
int depth;
};

[[vk::push_constant]] SPushConstants pc;

[[vk::combinedImageSampler]][[vk::binding(0, 2)]] Texture2D<float3> envMap; // unused
[[vk::combinedImageSampler]][[vk::binding(0, 2)]] SamplerState envSampler;

[[vk::binding(1, 2)]] Buffer<uint3> sampleSequence;

[[vk::combinedImageSampler]][[vk::binding(2, 2)]] Texture2D<uint2> scramblebuf; // unused
[[vk::combinedImageSampler]][[vk::binding(2, 2)]] SamplerState scrambleSampler;

[[vk::image_format("rgba16f")]][[vk::binding(0, 0)]] RWTexture2D<float32_t4> outImage;
NBL_CONSTEXPR nbl::hlsl::float32_t3 LightEminence = nbl::hlsl::float32_t3(30.0f, 25.0f, 15.0f);

#endif
23 changes: 23 additions & 0 deletions 31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#ifndef _NBL_HLSL_PATHTRACER_RENDER_RWMC_COMMON_INCLUDED_
#define _NBL_HLSL_PATHTRACER_RENDER_RWMC_COMMON_INCLUDED_
#include "nbl/builtin/hlsl/cpp_compat.hlsl"

#ifndef __HLSL_VERSION
#include "matrix4SIMD.h"
#endif

struct RenderRWMCPushConstants
{
#ifdef __HLSL_VERSION
float32_t4x4 invMVP;
#else
nbl::core::matrix4SIMD invMVP;
#endif
int sampleCount;
int depth;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

make one push cosntant struct in terms of the other, preferably through composition, so

struct RenderRWMCPushConstants
{
   RenderPushConstants base;
   rwmc::SplattingParameters rwmc;
};

float start;
float base;
float kappa;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dont use the same push constants for resolve and rendering & splatting

I really want you to pack up start and base into its own rwmc::SplattingParameters struct

};

#endif
Loading