Devsh-Graphics-Programming · devshgraphicsprogramming · Mar 18, 2025 · Mar 27, 2025 · Mar 27, 2025 · Mar 28, 2025
diff --git a/examples_tests b/examples_tests
diff --git a/include/nbl/builtin/hlsl/subgroup/arithmetic_portability.hlsl b/include/nbl/builtin/hlsl/subgroup/arithmetic_portability.hlsl
@@ -9,6 +9,7 @@
 
 #include "nbl/builtin/hlsl/subgroup/basic.hlsl"
 #include "nbl/builtin/hlsl/subgroup/arithmetic_portability_impl.hlsl"
+#include "nbl/builtin/hlsl/concepts.hlsl"
 
 
 namespace nbl

diff --git a/include/nbl/builtin/hlsl/subgroup/ballot.hlsl b/include/nbl/builtin/hlsl/subgroup/ballot.hlsl
@@ -37,6 +37,23 @@ uint32_t ElectedSubgroupInvocationID() {
     return glsl::subgroupBroadcastFirst<uint32_t>(glsl::gl_SubgroupInvocationID());
 }
 
+template<uint32_t SubgroupSizeLog2>
+struct Configuration
+{
+    using mask_t = conditional_t<SubgroupSizeLog2 < 7, conditional_t<SubgroupSizeLog2 < 6, uint32_t1, uint32_t2>, uint32_t4>;
+
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t Size = 0x1u << SubgroupSizeLog2;
+};
+
+template<class T>
+struct is_configuration : bool_constant<false> {};
+
+template<uint32_t N>
+struct is_configuration<Configuration<N> > : bool_constant<true> {};
+
+template<typename T>
+NBL_CONSTEXPR bool is_configuration_v = is_configuration<T>::value;
+
 }
 }
 }

diff --git a/include/nbl/builtin/hlsl/subgroup2/arithmetic_portability.hlsl b/include/nbl/builtin/hlsl/subgroup2/arithmetic_portability.hlsl
@@ -0,0 +1,45 @@
+// Copyright (C) 2023 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+#ifndef _NBL_BUILTIN_HLSL_SUBGROUP2_ARITHMETIC_PORTABILITY_INCLUDED_
+#define _NBL_BUILTIN_HLSL_SUBGROUP2_ARITHMETIC_PORTABILITY_INCLUDED_
+
+
+#include "nbl/builtin/hlsl/device_capabilities_traits.hlsl"
+
+#include "nbl/builtin/hlsl/subgroup/basic.hlsl"
+#include "nbl/builtin/hlsl/subgroup2/arithmetic_portability_impl.hlsl"
+#include "nbl/builtin/hlsl/concepts.hlsl"
+
+
+namespace nbl
+{
+namespace hlsl
+{
+namespace subgroup2
+{
+
+template<typename Config, class BinOp, int32_t _ItemsPerInvocation=1, class device_capabilities=void NBL_PRIMARY_REQUIRES(subgroup::is_configuration_v<Config>)
+struct ArithmeticParams
+{
+    using config_t = Config;
+    using binop_t = BinOp;
+    using scalar_t = typename BinOp::type_t;    // BinOp should be with scalar type
+    using type_t = conditional_t<_ItemsPerInvocation<2, scalar_t, vector<scalar_t, _ItemsPerInvocation> >;
+
+    NBL_CONSTEXPR_STATIC_INLINE int32_t ItemsPerInvocation = _ItemsPerInvocation;
+    NBL_CONSTEXPR_STATIC_INLINE bool UseNativeIntrinsics = device_capabilities_traits<device_capabilities>::shaderSubgroupArithmetic /*&& /*some heuristic for when its faster*/;
+};
+
+template<typename Params>
+struct reduction : impl::reduction<typename Params::binop_t,typename Params::type_t,Params::ItemsPerInvocation,Params::UseNativeIntrinsics> {};
+template<typename Params>
+struct inclusive_scan : impl::inclusive_scan<typename Params::binop_t,typename Params::type_t,Params::ItemsPerInvocation,Params::UseNativeIntrinsics> {};
+template<typename Params>
+struct exclusive_scan : impl::exclusive_scan<typename Params::binop_t,typename Params::type_t,Params::ItemsPerInvocation,Params::UseNativeIntrinsics> {};
+
+}
+}
+}
+
+#endif
diff --git a/include/nbl/builtin/hlsl/subgroup2/arithmetic_portability_impl.hlsl b/include/nbl/builtin/hlsl/subgroup2/arithmetic_portability_impl.hlsl
@@ -0,0 +1,144 @@
+// Copyright (C) 2023 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+#ifndef _NBL_BUILTIN_HLSL_SUBGROUP2_ARITHMETIC_PORTABILITY_IMPL_INCLUDED_
+#define _NBL_BUILTIN_HLSL_SUBGROUP2_ARITHMETIC_PORTABILITY_IMPL_INCLUDED_
+
+#include "nbl/builtin/hlsl/subgroup/arithmetic_portability_impl.hlsl"
+
+namespace nbl
+{
+namespace hlsl
+{
+namespace subgroup2
+{
+
+namespace impl
+{
+
+template<class Binop, typename T, uint32_t ItemsPerInvocation, bool native>
+struct inclusive_scan
+{
+    using type_t = T;
+    using scalar_t = typename Binop::type_t;
+    using binop_t = Binop;
+    using exclusive_scan_op_t = subgroup::impl::exclusive_scan<binop_t, native>;
+
+    // NBL_CONSTEXPR_STATIC_INLINE uint32_t ItemsPerInvocation = vector_traits<T>::Dimension;
+
+    type_t operator()(NBL_CONST_REF_ARG(type_t) value)
+    {
+        binop_t binop;
+        type_t retval;
+        retval[0] = value[0];
+        //[unroll(ItemsPerInvocation-1)]
+        for (uint32_t i = 1; i < ItemsPerInvocation; i++)
+            retval[i] = binop(retval[i-1], value[i]);
+
+        exclusive_scan_op_t op;
+        scalar_t exclusive = op(retval[ItemsPerInvocation-1]);
+
+        //[unroll(ItemsPerInvocation)]
+        for (uint32_t i = 0; i < ItemsPerInvocation; i++)
+            retval[i] = binop(retval[i], exclusive);
+        return retval;
+    }
+};
+
+template<class Binop, typename T, uint32_t ItemsPerInvocation, bool native>
+struct exclusive_scan
+{
+    using type_t = T;
+    using scalar_t = typename Binop::type_t;
+    using binop_t = Binop;
+    using inclusive_scan_op_t = subgroup2::impl::inclusive_scan<binop_t, T, ItemsPerInvocation, native>;
+
+    // NBL_CONSTEXPR_STATIC_INLINE uint32_t ItemsPerInvocation = vector_traits<T>::Dimension;
+
+    type_t operator()(type_t value)
+    {
+        inclusive_scan_op_t op;
+        value = op(value);
+
+        type_t left = glsl::subgroupShuffleUp<type_t>(value,1);
+
+        type_t retval;
+        retval[0] = bool(glsl::gl_SubgroupInvocationID()) ? left[ItemsPerInvocation-1] : binop_t::identity;
+        //[unroll(ItemsPerInvocation-1)]
+        for (uint32_t i = 1; i < ItemsPerInvocation; i++)
+            retval[i] = value[i-1];
+        return retval;
+    }
+};
+
+template<class Binop, typename T, uint32_t ItemsPerInvocation, bool native>
+struct reduction
+{
+    using type_t = T;   // TODO? assert scalar_type<T> == scalar_t
+    using scalar_t = typename Binop::type_t;
+    using binop_t = Binop;
+    using op_t = subgroup::impl::reduction<binop_t, native>;
+
+    // NBL_CONSTEXPR_STATIC_INLINE uint32_t ItemsPerInvocation = vector_traits<T>::Dimension;
+
+    scalar_t operator()(NBL_CONST_REF_ARG(type_t) value)
+    {
+        binop_t binop;
+        op_t op;
+        scalar_t retval = value[0];
+        //[unroll(ItemsPerInvocation-1)]
+        for (uint32_t i = 1; i < ItemsPerInvocation; i++)
+            retval = binop(retval, value[i]);
+        return op(retval);
+    }
+};
+
+
+// spec for N=1 uses subgroup funcs
+template<class Binop, typename T, bool native>
+struct inclusive_scan<Binop, T, 1, native>
+{
+    using binop_t = Binop;
+    using op_t = subgroup::impl::inclusive_scan<binop_t, native>;
+    // assert T == scalar type, binop::type == T
+
+    T operator()(NBL_CONST_REF_ARG(T) value)
+    {
+        op_t op;
+        return op(value);
+    }
+};
+
+template<class Binop, typename T, bool native>
+struct exclusive_scan<Binop, T, 1, native>
+{
+    using binop_t = Binop;
+    using op_t = subgroup::impl::exclusive_scan<binop_t, native>;
+
+    T operator()(NBL_CONST_REF_ARG(T) value)
+    {
+        op_t op;
+        return op(value);
+    }
+};
+
+template<class Binop, typename T, bool native>
+struct reduction<Binop, T, 1, native>
+{
+    using binop_t = Binop;
+    using op_t = subgroup::impl::reduction<binop_t, native>;
+
+    T operator()(NBL_CONST_REF_ARG(T) value)
+    {
+        op_t op;
+        return op(value);
+    }
+};
+
+}
+
+}
+}
+}
+
+#endif
+25 −0		73_ArithmeticBench/CMakeLists.txt
+79 −0		73_ArithmeticBench/app_resources/benchmarkSubgroup.comp.hlsl
+95 −0		73_ArithmeticBench/app_resources/common.hlsl
+85 −0		73_ArithmeticBench/app_resources/shaderCommon.hlsl
+18 −0		73_ArithmeticBench/app_resources/testSubgroup.comp.hlsl
+107 −0		73_ArithmeticBench/app_resources/testWorkgroup.comp.hlsl
+28 −0		73_ArithmeticBench/config.json.template
+949 −0		73_ArithmeticBench/main.cpp
+50 −0		73_ArithmeticBench/pipeline.groovy
+3 −1		CMakeLists.txt