Skip to content

Commit 4c73802

Browse files
author
Deano
committed
Embed kernel option for CL and Vulkan
Rename from FR_EMBED_KERNELS to RR_EMBED_KERNELS Modifed stringy.py to do embedding of CL and vulkan shader (at the same time if required) Kernel cache is now set\kernelcache\kernels_[cl | vk]
1 parent 6f306c4 commit 4c73802

19 files changed

+8158
-4240
lines changed

App/AO/aorenderer.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ THE SOFTWARE.
3232
//#include "sobol.h"
3333
extern unsigned g_SobolMatrices[];
3434

35-
#ifdef FR_EMBED_KERNELS
35+
#ifdef RR_EMBED_KERNELS
3636
#include "./CL/cache/kernels.h"
3737
#endif
3838

@@ -104,7 +104,7 @@ namespace Baikal
104104
m_render_data->pp = CLWParallelPrimitives(m_context);
105105

106106
// Load kernels
107-
#ifndef FR_EMBED_KERNELS
107+
#ifndef RR_EMBED_KERNELS
108108
m_render_data->program = CLWProgram::CreateFromFile("../App/CL/integrator_ao.cl", m_context);
109109
#else
110110
m_render_data->program = CLWProgram::CreateFromSource(cl_app, std::strlen(cl_integrator_ao), context);

App/App.lua

+6-6
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,14 @@ project "App"
1515

1616
if os.is("windows") then
1717
includedirs { "../3rdParty/glew/include", "../3rdParty/freeglut/include", "../3rdParty/oiio/include" }
18-
links {"RadeonRays", "freeglut", "glew"}
18+
links {"RadeonRays", "freeglut", "glew"}
1919

20-
configuration {"x32"}
21-
libdirs { "../3rdParty/glew/lib/x86", "../3rdParty/freeglut/lib/x86", "../3rdParty/embree/lib/x86", "../3rdParty/oiio/lib/x86" }
22-
configuration {"x64"}
23-
libdirs { "../3rdParty/glew/lib/x64", "../3rdParty/freeglut/lib/x64", "../3rdParty/embree/lib/x64", "../3rdParty/oiio/lib/x64"}
20+
configuration {"x32"}
21+
libdirs { "../3rdParty/glew/lib/x86", "../3rdParty/freeglut/lib/x86", "../3rdParty/embree/lib/x86", "../3rdParty/oiio/lib/x86" }
22+
configuration {"x64"}
23+
libdirs { "../3rdParty/glew/lib/x64", "../3rdParty/freeglut/lib/x64", "../3rdParty/embree/lib/x64", "../3rdParty/oiio/lib/x64"}
2424

25-
configuration {}
25+
configuration {}
2626

2727
configuration {"Debug"}
2828
links {"OpenImageIOD"}

App/PT/ptrenderer.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ THE SOFTWARE.
3131

3232
#include "sobol.h"
3333

34-
#ifdef FR_EMBED_KERNELS
34+
#ifdef RR_EMBED_KERNELS
3535
#include "./CL/cache/kernels.h"
3636
#endif
3737

@@ -116,7 +116,7 @@ namespace Baikal
116116
m_render_data->pp = CLWParallelPrimitives(m_context);
117117

118118
// Load kernels
119-
#ifndef FR_EMBED_KERNELS
119+
#ifndef RR_EMBED_KERNELS
120120
m_render_data->program = CLWProgram::CreateFromFile("../App/CL/integrator_pt.cl", m_context);
121121
#else
122122
m_render_data->program = CLWProgram::CreateFromSource(cl_app, std::strlen(cl_integrator_pt), context);

App/main.cpp

+72-72
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ THE SOFTWARE.
5454
#define _USE_MATH_DEFINES
5555
#include <math.h>
5656

57-
#ifdef FR_EMBED_KERNELS
57+
#ifdef RR_EMBED_KERNELS
5858
#include "./CL/cache/kernels.h"
5959
#endif
6060

@@ -605,92 +605,92 @@ void Update()
605605
for (int i = 0; i < g_cfgs.size(); ++i)
606606
{
607607
g_cfgs[i].renderer->SetNumBounces(numbnc);
608-
}*/
609-
}
610-
611-
if (g_num_samples == -1 || g_samplecount++ < g_num_samples)
612-
{
613-
g_cfgs[g_primary].renderer->Render(*g_scene.get());
614-
}
615-
616-
//if (std::chrono::duration_cast<std::chrono::seconds>(time - updatetime).count() > 1)
617-
//{
618-
for (int i = 0; i < g_cfgs.size(); ++i)
619-
{
620-
if (g_cfgs[i].type == ConfigManager::kPrimary)
621-
continue;
622-
623-
int desired = 1;
624-
if (std::atomic_compare_exchange_strong(&g_ctrl[i].newdata, &desired, 0))
625-
{
626-
{
627-
//std::unique_lock<std::mutex> lock(g_ctrl[i].datamutex);
628-
//std::cout << "Start updating acc buffer\n"; std::cout.flush();
629-
g_cfgs[g_primary].context.WriteBuffer(0, g_outputs[g_primary].copybuffer, &g_outputs[i].fdata[0], g_window_width * g_window_height);
630-
//std::cout << "Finished updating acc buffer\n"; std::cout.flush();
631-
}
632-
633-
CLWKernel acckernel = g_cfgs[g_primary].renderer->GetAccumulateKernel();
608+
}*/
609+
}
634610

635-
int argc = 0;
636-
acckernel.SetArg(argc++, g_outputs[g_primary].copybuffer);
637-
acckernel.SetArg(argc++, g_window_width * g_window_width);
638-
acckernel.SetArg(argc++, g_outputs[g_primary].output->data());
611+
if (g_num_samples == -1 || g_samplecount++ < g_num_samples)
612+
{
613+
g_cfgs[g_primary].renderer->Render(*g_scene.get());
614+
}
639615

640-
int globalsize = g_window_width * g_window_height;
641-
g_cfgs[g_primary].context.Launch1D(0, ((globalsize + 63) / 64) * 64, 64, acckernel);
642-
}
643-
}
616+
//if (std::chrono::duration_cast<std::chrono::seconds>(time - updatetime).count() > 1)
617+
//{
618+
for (int i = 0; i < g_cfgs.size(); ++i)
619+
{
620+
if (g_cfgs[i].type == ConfigManager::kPrimary)
621+
continue;
622+
623+
int desired = 1;
624+
if (std::atomic_compare_exchange_strong(&g_ctrl[i].newdata, &desired, 0))
625+
{
626+
{
627+
//std::unique_lock<std::mutex> lock(g_ctrl[i].datamutex);
628+
//std::cout << "Start updating acc buffer\n"; std::cout.flush();
629+
g_cfgs[g_primary].context.WriteBuffer(0, g_outputs[g_primary].copybuffer, &g_outputs[i].fdata[0], g_window_width * g_window_height);
630+
//std::cout << "Finished updating acc buffer\n"; std::cout.flush();
631+
}
632+
633+
CLWKernel acckernel = g_cfgs[g_primary].renderer->GetAccumulateKernel();
634+
635+
int argc = 0;
636+
acckernel.SetArg(argc++, g_outputs[g_primary].copybuffer);
637+
acckernel.SetArg(argc++, g_window_width * g_window_width);
638+
acckernel.SetArg(argc++, g_outputs[g_primary].output->data());
639+
640+
int globalsize = g_window_width * g_window_height;
641+
g_cfgs[g_primary].context.Launch1D(0, ((globalsize + 63) / 64) * 64, 64, acckernel);
642+
}
643+
}
644644

645-
//updatetime = time;
646-
//}
645+
//updatetime = time;
646+
//}
647647

648-
if (!g_interop)
649-
{
650-
g_outputs[g_primary].output->GetData(&g_outputs[g_primary].fdata[0]);
648+
if (!g_interop)
649+
{
650+
g_outputs[g_primary].output->GetData(&g_outputs[g_primary].fdata[0]);
651651

652-
float gamma = 2.2f;
653-
for (int i = 0; i < (int)g_outputs[g_primary].fdata.size(); ++i)
654-
{
655-
g_outputs[g_primary].udata[4 * i] = (unsigned char)clamp(clamp(pow(g_outputs[g_primary].fdata[i].x / g_outputs[g_primary].fdata[i].w, 1.f / gamma), 0.f, 1.f) * 255, 0, 255);
656-
g_outputs[g_primary].udata[4 * i + 1] = (unsigned char)clamp(clamp(pow(g_outputs[g_primary].fdata[i].y / g_outputs[g_primary].fdata[i].w, 1.f / gamma), 0.f, 1.f) * 255, 0, 255);
657-
g_outputs[g_primary].udata[4 * i + 2] = (unsigned char)clamp(clamp(pow(g_outputs[g_primary].fdata[i].z / g_outputs[g_primary].fdata[i].w, 1.f / gamma), 0.f, 1.f) * 255, 0, 255);
658-
g_outputs[g_primary].udata[4 * i + 3] = 1;
659-
}
652+
float gamma = 2.2f;
653+
for (int i = 0; i < (int)g_outputs[g_primary].fdata.size(); ++i)
654+
{
655+
g_outputs[g_primary].udata[4 * i] = (unsigned char)clamp(clamp(pow(g_outputs[g_primary].fdata[i].x / g_outputs[g_primary].fdata[i].w, 1.f / gamma), 0.f, 1.f) * 255, 0, 255);
656+
g_outputs[g_primary].udata[4 * i + 1] = (unsigned char)clamp(clamp(pow(g_outputs[g_primary].fdata[i].y / g_outputs[g_primary].fdata[i].w, 1.f / gamma), 0.f, 1.f) * 255, 0, 255);
657+
g_outputs[g_primary].udata[4 * i + 2] = (unsigned char)clamp(clamp(pow(g_outputs[g_primary].fdata[i].z / g_outputs[g_primary].fdata[i].w, 1.f / gamma), 0.f, 1.f) * 255, 0, 255);
658+
g_outputs[g_primary].udata[4 * i + 3] = 1;
659+
}
660660

661661

662-
glActiveTexture(GL_TEXTURE0);
662+
glActiveTexture(GL_TEXTURE0);
663663

664-
glBindTexture(GL_TEXTURE_2D, g_texture);
664+
glBindTexture(GL_TEXTURE_2D, g_texture);
665665

666-
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, g_outputs[g_primary].output->width(), g_outputs[g_primary].output->height(), GL_RGBA, GL_UNSIGNED_BYTE, &g_outputs[g_primary].udata[0]);
666+
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, g_outputs[g_primary].output->width(), g_outputs[g_primary].output->height(), GL_RGBA, GL_UNSIGNED_BYTE, &g_outputs[g_primary].udata[0]);
667667

668-
glBindTexture(GL_TEXTURE_2D, 0);
669-
}
670-
else
671-
{
672-
std::vector<cl_mem> objects;
673-
objects.push_back(g_cl_interop_image);
674-
g_cfgs[g_primary].context.AcquireGLObjects(0, objects);
668+
glBindTexture(GL_TEXTURE_2D, 0);
669+
}
670+
else
671+
{
672+
std::vector<cl_mem> objects;
673+
objects.push_back(g_cl_interop_image);
674+
g_cfgs[g_primary].context.AcquireGLObjects(0, objects);
675675

676-
CLWKernel copykernel = g_cfgs[g_primary].renderer->GetCopyKernel();
676+
CLWKernel copykernel = g_cfgs[g_primary].renderer->GetCopyKernel();
677677

678-
int argc = 0;
679-
copykernel.SetArg(argc++, g_outputs[g_primary].output->data());
680-
copykernel.SetArg(argc++, g_outputs[g_primary].output->width());
681-
copykernel.SetArg(argc++, g_outputs[g_primary].output->height());
682-
copykernel.SetArg(argc++, 2.2f);
683-
copykernel.SetArg(argc++, g_cl_interop_image);
678+
int argc = 0;
679+
copykernel.SetArg(argc++, g_outputs[g_primary].output->data());
680+
copykernel.SetArg(argc++, g_outputs[g_primary].output->width());
681+
copykernel.SetArg(argc++, g_outputs[g_primary].output->height());
682+
copykernel.SetArg(argc++, 2.2f);
683+
copykernel.SetArg(argc++, g_cl_interop_image);
684684

685-
int globalsize = g_outputs[g_primary].output->width() * g_outputs[g_primary].output->height();
686-
g_cfgs[g_primary].context.Launch1D(0, ((globalsize + 63) / 64) * 64, 64, copykernel);
685+
int globalsize = g_outputs[g_primary].output->width() * g_outputs[g_primary].output->height();
686+
g_cfgs[g_primary].context.Launch1D(0, ((globalsize + 63) / 64) * 64, 64, copykernel);
687687

688-
g_cfgs[g_primary].context.ReleaseGLObjects(0, objects);
689-
g_cfgs[g_primary].context.Finish(0);
690-
}
691-
//}
688+
g_cfgs[g_primary].context.ReleaseGLObjects(0, objects);
689+
g_cfgs[g_primary].context.Finish(0);
690+
}
691+
//}
692692

693-
glutPostRedisplay();
693+
glutPostRedisplay();
694694
}
695695

696696
void RenderThread(ControlData& cd)

CLW/CL/cache/kernels.h

-1
This file was deleted.

CLW/CLW.lua

+8-6
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,16 @@ project "CLW"
1212
end
1313
end
1414

15+
configuration {}
16+
17+
-- we rely on RadeonRays to do the actual embedding for us
1518
if _OPTIONS["embed_kernels"] then
16-
configuration {}
17-
defines {"FR_EMBED_KERNELS"}
18-
os.execute("python ../Tools/scripts/stringify.py ./CL/ > ./CL/cache/kernels.h")
19-
print ">> CLW: CL kernels embedded"
19+
defines {"RR_EMBED_KERNELS=1"}
20+
21+
-- there is no CLW version for vulkan (yet at least)
2022
end
21-
22-
configuration {"x32", "Debug"}
23+
24+
configuration {"x32", "Debug"}
2325
targetdir "../Bin/Debug/x86"
2426
configuration {"x64", "Debug"}
2527
targetdir "../Bin/Debug/x64"

CLW/CLWParallelPrimitives.cpp

+10-4
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,16 @@ THE SOFTWARE.
2828
#include <algorithm>
2929
#include <cstring>
3030

31-
#ifdef FR_EMBED_KERNELS
32-
#include "./CL/cache/kernels.h"
31+
#ifdef RR_EMBED_KERNELS
32+
#if USE_OPENCL
33+
# include <RadeonRays/src/kernelcache/kernels_cl.h>
3334
#endif
3435

36+
#if USE_VULKAN
37+
# include <RadeonRays/src/kernelcache/kernels_vk.h>
38+
#endif
39+
#endif // RR_EMBED_KERNELS
40+
3541
#define WG_SIZE 64
3642
#define NUM_SCAN_ELEMS_PER_WI 8
3743
#define NUM_SEG_SCAN_ELEMS_PER_WI 1
@@ -41,10 +47,10 @@ THE SOFTWARE.
4147
CLWParallelPrimitives::CLWParallelPrimitives(CLWContext context)
4248
: context_(context)
4349
{
44-
#ifndef FR_EMBED_KERNELS
50+
#ifndef RR_EMBED_KERNELS
4551
program_ = CLWProgram::CreateFromFile("../CLW/CL/CLW.cl", context_);
4652
#else
47-
program_ = CLWProgram::CreateFromSource(cl_CLW, std::strlen(cl_CLW), context_);
53+
program_ = CLWProgram::CreateFromSource(g_CLW_opencl, std::strlen(g_CLW_opencl), context_);
4854
#endif
4955
}
5056

CLW/CLWPlatform.cpp

+8
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,14 @@ void CLWPlatform::CreateAllPlatforms(std::vector<CLWPlatform>& platforms)
7676
continue;
7777
}
7878

79+
status = clGetPlatformInfo(platformIds[i], CL_PLATFORM_NAME, 0, nullptr, &size);
80+
81+
std::vector<char> name(size);
82+
83+
status = clGetPlatformInfo(platformIds[i], CL_PLATFORM_NAME, size, &name[0], 0);
84+
85+
std::cout << name << std::endl;
86+
7987
validIds.push_back(platformIds[i]);
8088
}
8189

RadeonRays/RadeonRays.lua

+22-4
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,28 @@ project "RadeonRays"
4343
configuration {}
4444

4545
if _OPTIONS["embed_kernels"] then
46-
defines {"FR_EMBED_KERNELS"}
47-
os.execute("python ../Tools/scripts/stringify.py ./src/kernel/CL/ > ./src/kernel/CL/cache/kernels.h")
48-
print ">> RadeonRays: CL kernels embedded"
49-
end
46+
defines {"RR_EMBED_KERNELS=1"}
5047

48+
if _OPTIONS["use_vulkan"] then
49+
os.execute( "python ../Tools/scripts/stringify.py " ..
50+
os.getcwd() .. "../Resources/kernels/GLSL/ " ..
51+
".comp " ..
52+
"vulkan " ..
53+
"> ./src/kernelcache/kernels_vk.h"
54+
)
55+
print ">> RadeonRays: VK kernels embedded"
56+
end
57+
58+
if _OPTIONS["use_opencl"] then
59+
os.execute( "python ../Tools/scripts/stringify.py " ..
60+
os.getcwd() .. "../Resources/kernels/CL/ " ..
61+
".cl " ..
62+
"opencl " ..
63+
"> ./src/kernelcache/kernels_cl.h"
64+
)
65+
print ">> RadeonRays: CL kernels embedded"
66+
end
67+
end
5168

5269
if _OPTIONS["use_tbb"] then
5370
defines {"USE_TBB"}
@@ -112,3 +129,4 @@ project "RadeonRays"
112129
targetdir "../Bin/Release/x64"
113130
configuration {}
114131

132+

RadeonRays/src/accelerator/hlbvh.cpp

+4-9
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ THE SOFTWARE.
2424
#include "primitives.h"
2525
#include "executable.h"
2626
#include "../except/except.h"
27+
#include "../strategy/strategy.h"
2728
#include "calc.h"
2829
#include "event.h"
2930

@@ -34,13 +35,6 @@ THE SOFTWARE.
3435
#include <iostream>
3536
#include <assert.h>
3637

37-
38-
39-
40-
#ifdef FR_EMBED_KERNELS
41-
#include "../kernel/CL/cache/kernels.h"
42-
#endif
43-
4438
#define INITIAL_TRIANGLE_CAPACITY 100000
4539

4640
namespace RadeonRays
@@ -86,7 +80,7 @@ namespace RadeonRays
8680

8781
void Hlbvh::InitGpuData()
8882
{
89-
#ifndef FR_EMBED_KERNELS
83+
#ifndef RR_EMBED_KERNELS
9084
if ( m_device->GetPlatform() == Calc::Platform::kOpenCL )
9185
{
9286
m_gpudata->executable = m_device->CompileExecutable( "kernels/CL/hlbvh_build.cl", nullptr, 0 );
@@ -98,7 +92,8 @@ namespace RadeonRays
9892
m_gpudata->executable = m_device->CompileExecutable( "kernels/GLSL/hlbvh_build.comp", nullptr, 0 );
9993
}
10094
#else
101-
m_gpudata->executable = m_device->CompileExecutable(cl_hlbvh_build, std::strlen(cl_hlbvh_build), "");
95+
auto& device = m_device;
96+
RR_GetEmbeddedKernel(hlbvh)
10297
#endif
10398
m_gpudata->morton_code_func = m_gpudata->executable->CreateFunction("CalcMortonCode");
10499
m_gpudata->build_func = m_gpudata->executable->CreateFunction("BuildHierarchy");

0 commit comments

Comments
 (0)