Devsh-Graphics-Programming · devshgraphicsprogramming · Jul 19, 2024 · Jul 19, 2024 · Jul 20, 2024 · Jul 24, 2024
diff --git a/examples_tests b/examples_tests
diff --git a/include/nbl/asset/utils/IMeshPacker.h b/include/nbl/asset/utils/IMeshPacker.h
@@ -6,7 +6,7 @@
 #define __NBL_ASSET_I_MESH_PACKER_H_INCLUDED__
 
 #include "nbl/asset/utils/IMeshManipulator.h"
-#include "nbl/core/math/morton.h"
+#include "nbl/builtin/hlsl/math/morton.hlsl"
 
 namespace nbl
 {

diff --git a/include/nbl/builtin/hlsl/luma_meter/common.hlsl b/include/nbl/builtin/hlsl/luma_meter/common.hlsl
@@ -0,0 +1,35 @@
+// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+
+#ifndef _NBL_BUILTIN_HLSL_LUMA_METER_COMMON_INCLUDED_
+#define _NBL_BUILTIN_HLSL_LUMA_METER_COMMON_INCLUDED_
+
+#include "nbl/builtin/hlsl/cpp_compat.hlsl"
+
+namespace nbl
+{
+namespace hlsl
+{
+namespace luma_meter
+{
+
+struct MeteringWindow
+{
+	using this_t = MeteringWindow;
+	float32_t2 meteringWindowScale;
+	float32_t2 meteringWindowOffset;
+
+	static this_t create(float32_t2 scale, float32_t2 offset) {
+		this_t retval;
+		retval.meteringWindowScale = scale;
+		retval.meteringWindowOffset = offset;
+		return retval;
+	}
+};
+
+}
+}
+}
+
+#endif
diff --git a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl
@@ -0,0 +1,293 @@
+// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+
+#ifndef _NBL_BUILTIN_HLSL_LUMA_METER_INCLUDED_
+#define _NBL_BUILTIN_HLSL_LUMA_METER_INCLUDED_
+
+#include "nbl/builtin/hlsl/glsl_compat/core.hlsl"
+#include "nbl/builtin/hlsl/glsl_compat/subgroup_basic.hlsl"
+#include "nbl/builtin/hlsl/glsl_compat/subgroup_arithmetic.hlsl"
+#include "nbl/builtin/hlsl/workgroup/basic.hlsl"
+#include "nbl/builtin/hlsl/workgroup/arithmetic.hlsl"
+#include "nbl/builtin/hlsl/type_traits.hlsl"
+#include "nbl/builtin/hlsl/math/morton.hlsl"
+#include "nbl/builtin/hlsl/luma_meter/common.hlsl"
+
+namespace nbl
+{
+namespace hlsl
+{
+namespace luma_meter
+{
+
+template<uint32_t GroupSize, typename ValueAccessor, typename SharedAccessor, typename TexAccessor>
+struct geom_meter {
+    using float_t = typename SharedAccessor::type;
+    using float_t2 = typename conditional<is_same_v<float_t, float32_t>, float32_t2, float16_t2>::type;
+    using float_t3 = typename conditional<is_same_v<float_t, float32_t>, float32_t3, float16_t3>::type;
+    using this_t = geom_meter<GroupSize, ValueAccessor, SharedAccessor, TexAccessor>;
+
+    static this_t create(float_t2 lumaMinMax, float_t sampleCount)
+    {
+        this_t retval;
+        retval.lumaMinMax = lumaMinMax;
+        retval.sampleCount = sampleCount;
+        return retval;
+    }
+
+    float_t reduction(float_t value, NBL_REF_ARG(SharedAccessor) sdata)
+    {
+        return workgroup::reduction < plus < float_t >, GroupSize >::
+            template __call <SharedAccessor>(value, sdata);
+    }
+
+    float_t computeLumaLog2(
+        NBL_CONST_REF_ARG(MeteringWindow) window,
+        NBL_REF_ARG(TexAccessor) tex,
+        float_t2 shiftedCoord
+    )
+    {
+        float_t2 uvPos = shiftedCoord * window.meteringWindowScale + window.meteringWindowOffset;
+        float_t3 color = tex.get(uvPos);
+        float_t luma = (float_t)TexAccessor::toXYZ(color);
+
+        luma = clamp(luma, lumaMinMax.x, lumaMinMax.y);
+
+        return max(log2(luma), log2(lumaMinMax.x));
+    }
+
+    void uploadFloat(
+        NBL_REF_ARG(ValueAccessor) val_accessor,
+        uint32_t index,
+        float_t val,
+        float_t minLog2,
+        float_t rangeLog2
+    )
+    {
+        uint32_t3 workGroupCount = glsl::gl_NumWorkGroups();
+        uint32_t fixedPointBitsLeft = 32 - uint32_t(ceil(log2(workGroupCount.x * workGroupCount.y * workGroupCount.z))) + glsl::gl_SubgroupSizeLog2();
+
+        uint32_t lumaSumBitPattern = uint32_t(clamp((val - minLog2) * rangeLog2, 0.f, float32_t((1 << fixedPointBitsLeft) - 1)));
+
+        val_accessor.atomicAdd(index & ((1 << glsl::gl_SubgroupSizeLog2()) - 1), lumaSumBitPattern);
+    }
+
+    float_t downloadFloat(
+        NBL_REF_ARG(ValueAccessor) val_accessor,
+        uint32_t index,
+        float_t minLog2,
+        float_t rangeLog2
+    )
+    {
+        float_t luma = (float_t)val_accessor.get(index & ((1 << glsl::gl_SubgroupSizeLog2()) - 1));
+        return luma / rangeLog2 + minLog2;
+    }
+
+    void sampleLuma(
+        NBL_CONST_REF_ARG(MeteringWindow) window,
+        NBL_REF_ARG(ValueAccessor) val,
+        NBL_REF_ARG(TexAccessor) tex,
+        NBL_REF_ARG(SharedAccessor) sdata,
+        float_t2 tileOffset,
+        float_t2 viewportSize
+    )
+    {
+        uint32_t tid = workgroup::SubgroupContiguousIndex();
+        uint32_t2 coord = {
+            morton2d_decode_x(tid),
+            morton2d_decode_y(tid)
+        };
+
+        float_t luma = 0.0f;
+        float_t2 shiftedCoord = (tileOffset + (float32_t2)(coord)) / viewportSize;
+        luma = computeLumaLog2(window, tex, shiftedCoord);
+        float_t lumaSum = reduction(luma, sdata);
+
+        if (tid == GroupSize - 1) {
+            uint32_t3 workgroupCount = glsl::gl_NumWorkGroups();
+            uint32_t workgroupIndex = (workgroupCount.x * workgroupCount.y * workgroupCount.z) / 64;
+
+            uploadFloat(
+                val,
+                workgroupIndex,
+                lumaSum,
+                log2(lumaMinMax.x),
+                log2(lumaMinMax.y / lumaMinMax.x)
+            );
+        }
+    }
+
+    float_t gatherLuma(
+        NBL_REF_ARG(ValueAccessor) val
+    )
+    {
+        uint32_t tid = glsl::gl_SubgroupInvocationID();
+        float_t luma = glsl::subgroupAdd(
+            downloadFloat(
+                val,
+                tid,
+                log2(lumaMinMax.x),
+                log2(lumaMinMax.y / lumaMinMax.x)
+            )
+        );
+
+        uint32_t3 workGroupCount = glsl::gl_NumWorkGroups();
+        uint32_t fixedPointBitsLeft = 32 - uint32_t(ceil(log2(workGroupCount.x * workGroupCount.y * workGroupCount.z))) + glsl::gl_SubgroupSizeLog2();
+
+        return (luma / (1 << fixedPointBitsLeft)) / sampleCount;
+    }
+
+    float_t sampleCount;
+    float_t2 lumaMinMax;
+};
+
+template<uint32_t GroupSize, uint16_t BinCount, typename HistogramAccessor, typename SharedAccessor, typename TexAccessor>
+struct median_meter {
+    using int_t = typename SharedAccessor::type;
+    using float_t  = float32_t;
+    using float_t2 = typename conditional<is_same_v<float_t, float32_t>, float32_t2, float16_t2>::type;
+    using float_t3 = typename conditional<is_same_v<float_t, float32_t>, float32_t3, float16_t3>::type;
+    using this_t = median_meter<GroupSize, BinCount, HistogramAccessor, SharedAccessor, TexAccessor>;
+
+    static this_t create(float_t2 lumaMinMax, float_t sampleCount) {
+        this_t retval;
+        retval.lumaMinMax = lumaMinMax;
+        retval.sampleCount = sampleCount;
+        return retval;
+    }
+
+    int_t inclusive_scan(float_t value, NBL_REF_ARG(SharedAccessor) sdata) {
+        return workgroup::inclusive_scan < plus < int_t >, GroupSize >::
+            template __call <SharedAccessor>(value, sdata);
+    }
+
+    float_t computeLuma(
+        NBL_CONST_REF_ARG(MeteringWindow) window,
+        NBL_REF_ARG(TexAccessor) tex,
+        float_t2 shiftedCoord
+    ) {
+        float_t2 uvPos = shiftedCoord * window.meteringWindowScale + window.meteringWindowOffset;
+        float_t3 color = tex.get(uvPos);
+        float_t luma = (float_t)TexAccessor::toXYZ(color);
+
+        return clamp(luma, lumaMinMax.x, lumaMinMax.y);
+    }
+
+    int_t float2Int(
+        float_t val,
+        float_t minLog2,
+        float_t rangeLog2
+    ) {
+        uint32_t3 workGroupCount = glsl::gl_NumWorkGroups();
+        uint32_t fixedPointBitsLeft = 32 - uint32_t(ceil(log2(workGroupCount.x * workGroupCount.y * workGroupCount.z))) + glsl::gl_SubgroupSizeLog2();
+
+        return int_t(clamp((val - minLog2) * rangeLog2, 0.f, float32_t((1 << fixedPointBitsLeft) - 1)));
+    }
+
+    float_t int2Float(
+        int_t val,
+        float_t minLog2,
+        float_t rangeLog2
+    ) {
+        return val / rangeLog2 + minLog2;
+    }
+
+    void sampleLuma(
+        NBL_CONST_REF_ARG(MeteringWindow) window,
+        NBL_REF_ARG(HistogramAccessor) histo,
+        NBL_REF_ARG(TexAccessor) tex,
+        NBL_REF_ARG(SharedAccessor) sdata,
+        float_t2 tileOffset,
+        float_t2 viewportSize
+    ) {
+        uint32_t tid = workgroup::SubgroupContiguousIndex();
+
+        for (uint32_t vid = tid; vid < BinCount; vid += GroupSize) {
+            sdata.set(vid, 0);
+        }
+
+        sdata.workgroupExecutionAndMemoryBarrier();
+
+        uint32_t2 coord = {
+            morton2d_decode_x(tid),
+            morton2d_decode_y(tid)
+        };
+
+        float_t luma = 0.0f;
+        float_t2 shiftedCoord = (tileOffset + (float32_t2)(coord)) / viewportSize;
+        luma = computeLuma(window, tex, shiftedCoord);
+
+        float_t binSize = (lumaMinMax.y - lumaMinMax.x) / BinCount;
+        uint32_t binIndex = (uint32_t)((luma - lumaMinMax.x) / binSize);
+
+        sdata.atomicAdd(binIndex, float2Int(luma, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x));
+
+        sdata.workgroupExecutionAndMemoryBarrier();
+
+        float_t histogram_value;
+        sdata.get(tid, histogram_value);
+
+        sdata.workgroupExecutionAndMemoryBarrier();
+
+        float_t sum = inclusive_scan(histogram_value, sdata);
+        histo.atomicAdd(tid, float2Int(sum, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x));
+
+        const bool is_last_wg_invocation = tid == (GroupSize - 1);
+        const static uint32_t RoundedBinCount = 1 + (BinCount - 1) / GroupSize;
+
+        for (int i = 1; i < RoundedBinCount; i++) {
+            uint32_t keyBucketStart = GroupSize * i;
+            uint32_t vid = tid + keyBucketStart;
+
+            // no if statement about the last iteration needed
+            if (is_last_wg_invocation) {
+                float_t beforeSum;
+                sdata.get(keyBucketStart, beforeSum);
+                sdata.set(keyBucketStart, beforeSum + sum);
+            }
+
+            // propagate last block tail to next block head and protect against subsequent scans stepping on each other's toes
+            sdata.workgroupExecutionAndMemoryBarrier();
+
+            // no aliasing anymore
+            float_t atVid;
+            sdata.get(vid, atVid);
+            sum = inclusive_scan(atVid, sdata);
+            if (vid < BinCount) {
+                histo.atomicAdd(vid, float2Int(sum, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x));
+            }
+        }
+    }
+
+    float_t gatherLuma(
+        NBL_REF_ARG(HistogramAccessor) histo,
+        NBL_REF_ARG(SharedAccessor) sdata
+    ) {
+        uint32_t tid = workgroup::SubgroupContiguousIndex();
+
+        for (uint32_t vid = tid; vid < BinCount; vid += GroupSize) {
+            sdata.set(
+                vid,
+                histo.get(vid & (BinCount - 1))
+            );
+        }
+
+        sdata.workgroupExecutionAndMemoryBarrier();
+
+        uint32_t percentile40, percentile60;
+        sdata.get(BinCount * 0.4, percentile40);
+        sdata.get(BinCount * 0.6, percentile60);
+
+        return (int2Float(percentile40, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x) + int2Float(percentile60, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x)) / 2;
+    }
+
+    float_t sampleCount;
+    float_t2 lumaMinMax;
+};
+
+}
+}
+}
+
+#endif
+188 −0		11_FFT/README.md
+1 −0		11_FFT/app_resources/common.hlsl
+10 −6		11_FFT/app_resources/shader.comp.hlsl
+4 −1		11_FFT/main.cpp
+0 −12		23_Autoexposure/CMakeLists.txt
+0 −177		23_Autoexposure/main.cpp
+3 −3		25_FilterTest/main.cpp
+2 −1		26_Autoexposure/CMakeLists.txt
+68 −0		26_Autoexposure/app_resources/avg_luma_meter.comp.hlsl
+88 −0		26_Autoexposure/app_resources/avg_luma_tonemap.comp.hlsl
+28 −0		26_Autoexposure/app_resources/common.hlsl
+72 −0		26_Autoexposure/app_resources/median_luma_meter.comp.hlsl
+93 −0		26_Autoexposure/app_resources/median_luma_tonemap.comp.hlsl
+20 −0		26_Autoexposure/app_resources/present.frag.hlsl
+0 −0		26_Autoexposure/config.json.template
+1,134 −0		26_Autoexposure/main.cpp
+0 −0		26_Autoexposure/pipeline.groovy
+0 −11		26_Blur/app_resources/common.hlsl
+0 −160		26_Blur/app_resources/shader.comp.hlsl
+0 −782		26_Blur/main.cpp
+8 −7		28_FFTBloom/app_resources/common.hlsl
+9 −8		28_FFTBloom/app_resources/fft_common.hlsl
+76 −58		28_FFTBloom/app_resources/fft_convolve_ifft.hlsl
+0 −7		28_FFTBloom/app_resources/image_fft_first_axis.hlsl
+0 −7		28_FFTBloom/app_resources/image_ifft_first_axis.hlsl
+0 −6		28_FFTBloom/app_resources/kernel_fft_first_axis.hlsl
+2 −11		28_FFTBloom/app_resources/kernel_fft_second_axis.hlsl
+2 −3		28_FFTBloom/app_resources/kernel_spectrum_normalize.hlsl
+55 −78		28_FFTBloom/main.cpp
+9 −9		30_ComputeShaderPathTracer/main.cpp
+0 −4		62_CAD/DrawResourcesFiller.cpp
+1 −11		62_CAD/DrawResourcesFiller.h
+1 −2		62_CAD/Hatch.cpp
+1 −3		62_CAD/Hatch.h
+1 −4		62_CAD/Polyline.cpp
+5 −44		62_CAD/Polyline.h
+7 −7		62_CAD/SingleLineText.cpp
+2 −3		62_CAD/SingleLineText.h
+33 −48		62_CAD/main.cpp
+0 −4		62_CAD/shaders/globals.hlsl
+0 −2		62_CAD/shaders/main_pipeline/common.hlsl
+0 −4		62_CAD/shaders/main_pipeline/fragment.hlsl
+23 −33		62_CAD/shaders/main_pipeline/fragment_shader.hlsl
+5 −3		62_CAD/shaders/main_pipeline/fragment_shader_debug.hlsl
+10 −22		62_CAD/shaders/main_pipeline/resolve_alphas.hlsl
+0 −5		62_CAD/shaders/main_pipeline/vertex_shader.hlsl
+1 −11		67_RayQueryGeometry/main.cpp
+0 −27		68_JpegLoading/CMakeLists.txt
+0 −193		68_JpegLoading/main.cpp
+0 −50		68_JpegLoading/pipeline.groovy
+10 −10		70_FLIPFluids/main.cpp
+2 −3		CMakeLists.txt