diff --git a/Makefile b/Makefile index b6ff3ea58..0f23f75ec 100644 --- a/Makefile +++ b/Makefile @@ -12,8 +12,8 @@ VERSION := $(shell cat ./VERSION) VCPKG_WIN_PATH ?= "C:\\Users\\axsau\\Programming\\lib\\vcpkg\\scripts\\buildsystems\\vcpkg.cmake" VCPKG_UNIX_PATH ?= "/c/Users/axsau/Programming/lib/vcpkg/scripts/buildsystems/vcpkg.cmake" -# Regext to pass to catch2 to filter tests -FILTER_TESTS ?= "-TestAsyncOperations.TestManagerParallelExecution:TestSequence.SequenceTimestamps" +# These are the tests that don't work with swiftshader but can be run directly with vulkan +FILTER_TESTS ?= "-TestAsyncOperations.TestManagerParallelExecution:TestSequence.SequenceTimestamps:TestPushConstants:TestConstantsDouble" ifeq ($(OS),Windows_NT) # is Windows_NT on XP, 2000, 7, Vista, 10... CMAKE_BIN ?= "C:\Program Files\CMake\bin\cmake.exe" @@ -105,7 +105,7 @@ mk_run_tests_cpu: mk_build_swiftshader_library mk_build_tests mk_run_tests_cpu_o VS_BUILD_TYPE ?= "Debug" # Run with multiprocessin / parallel build by default VS_CMAKE_EXTRA_FLAGS ?= "" -VS_KOMPUTE_EXTRA_CXX_FLAGS ?= "/MP" # /MP is for faster multiprocessing builds. You should add "/MT" for submodule builds for compatibility with gtest +VS_KOMPUTE_EXTRA_CXX_FLAGS ?= "/MT" # /MP is for faster multiprocessing builds. You should add "/MT" for submodule builds for compatibility with gtest VS_INSTALL_PATH ?= "build/src/CMakeFiles/Export/" # Set to "" if prefer default vs_cmake: @@ -116,7 +116,7 @@ vs_cmake: -DKOMPUTE_EXTRA_CXX_FLAGS=$(VS_KOMPUTE_EXTRA_CXX_FLAGS) \ -DCMAKE_INSTALL_PREFIX=$(VS_INSTALL_PATH) \ -DKOMPUTE_OPT_INSTALL=1 \ - -DKOMPUTE_OPT_REPO_SUBMODULE_BUILD=0 \ + -DKOMPUTE_OPT_REPO_SUBMODULE_BUILD=1 \ -DKOMPUTE_OPT_BUILD_TESTS=1 \ -DKOMPUTE_OPT_BUILD_SHADERS=1 \ -DKOMPUTE_OPT_BUILD_SINGLE_HEADER=1 \ diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index 97385e4fc..8cbfd404f 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -1246,23 +1246,29 @@ class Algorithm template void setPushConstants(const std::vector& pushConstants) { + uint32_t memorySize = sizeof(decltype(pushConstants.back())); + uint32_t size = pushConstants.size(); + uint32_t totalSize = memorySize * size; + uint32_t previousTotalSize = this->mPushConstantsDataTypeMemorySize * this->mPushConstantsSize; - if (pushConstants.size() != this->mPushConstantsSize) { + if (totalSize != previousTotalSize) { throw std::runtime_error( fmt::format("Kompute Algorithm push " - "constant provided is size {} but expected size {}", - pushConstants.size(), - this->mPushConstantsSize)); + "constant total memory size provided is {} but expected {} bytes", + totalSize, + previousTotalSize)); } if (this->mPushConstantsData) { free(this->mPushConstantsData); } - uint32_t memorySize = sizeof(decltype(pushConstants.back())); - uint32_t size = pushConstants.size(); + this->setPushConstants(pushConstants.data(), size, memorySize); + } + + void setPushConstants(void* data, uint32_t size, uint32_t memorySize) { uint32_t totalSize = size * memorySize; this->mPushConstantsData = malloc(totalSize); - memcpy(this->mPushConstantsData, pushConstants.data(), totalSize); + memcpy(this->mPushConstantsData, data, totalSize); this->mPushConstantsDataTypeMemorySize = memorySize; this->mPushConstantsSize = size; } @@ -1675,8 +1681,24 @@ class OpAlgoDispatch : public OpBase * @param algorithm The algorithm object to use for dispatch * @param pushConstants The push constants to use for override */ + template OpAlgoDispatch(const std::shared_ptr& algorithm, - const kp::Constants& pushConstants = {}); + const std::vector& pushConstants = {}) + { + KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor"); + + this->mAlgorithm = algorithm; + + if (pushConstants.size()) { + uint32_t memorySize = sizeof(decltype(pushConstants.back())); + uint32_t size = pushConstants.size(); + uint32_t totalSize = size * memorySize; + this->mPushConstantsData = malloc(totalSize); + memcpy(this->mPushConstantsData, pushConstants.data(), totalSize); + this->mPushConstantsDataTypeMemorySize = memorySize; + this->mPushConstantsSize = size; + } + } /** * Default destructor, which is in charge of destroying the algorithm @@ -1713,7 +1735,9 @@ class OpAlgoDispatch : public OpBase private: // -------------- ALWAYS OWNED RESOURCES std::shared_ptr mAlgorithm; - Constants mPushConstants; + void* mPushConstantsData = nullptr; + uint32_t mPushConstantsDataTypeMemorySize = 0; + uint32_t mPushConstantsSize = 0; }; } // End namespace kp diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp index a59f34f75..9179cffbd 100644 --- a/src/Algorithm.cpp +++ b/src/Algorithm.cpp @@ -338,8 +338,8 @@ void Algorithm::recordBindPush(const vk::CommandBuffer& commandBuffer) { if (this->mPushConstantsSize) { - KP_LOG_DEBUG("Kompute Algorithm binding push constants size: {}", - this->mPushConstantsSize); + KP_LOG_DEBUG("Kompute Algorithm binding push constants memory size: {}", + this->mPushConstantsSize * this->mPushConstantsDataTypeMemorySize); commandBuffer.pushConstants(*this->mPipelineLayout, vk::ShaderStageFlagBits::eCompute, diff --git a/src/OpAlgoDispatch.cpp b/src/OpAlgoDispatch.cpp index 15bfc05c9..c6099ff85 100644 --- a/src/OpAlgoDispatch.cpp +++ b/src/OpAlgoDispatch.cpp @@ -5,18 +5,13 @@ namespace kp { -OpAlgoDispatch::OpAlgoDispatch(const std::shared_ptr& algorithm, - const kp::Constants& pushConstants) -{ - KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor"); - - this->mAlgorithm = algorithm; - this->mPushConstants = pushConstants; -} - OpAlgoDispatch::~OpAlgoDispatch() { KP_LOG_DEBUG("Kompute OpAlgoDispatch destructor started"); + + if (this->mPushConstantsData) { + free(this->mPushConstantsData); + } } void @@ -35,8 +30,11 @@ OpAlgoDispatch::record(const vk::CommandBuffer& commandBuffer) vk::PipelineStageFlagBits::eComputeShader); } - if (this->mPushConstants.size()) { - this->mAlgorithm->setPushConstants(this->mPushConstants); + if (this->mPushConstantsSize) { + this->mAlgorithm->setPushConstants( + this->mPushConstantsData, + this->mPushConstantsSize, + this->mPushConstantsDataTypeMemorySize); } this->mAlgorithm->recordBindCore(commandBuffer); diff --git a/src/include/kompute/Algorithm.hpp b/src/include/kompute/Algorithm.hpp index 6bc49cef6..a0b2ba146 100644 --- a/src/include/kompute/Algorithm.hpp +++ b/src/include/kompute/Algorithm.hpp @@ -183,23 +183,29 @@ class Algorithm template void setPushConstants(const std::vector& pushConstants) { + uint32_t memorySize = sizeof(decltype(pushConstants.back())); + uint32_t size = pushConstants.size(); + uint32_t totalSize = memorySize * size; + uint32_t previousTotalSize = this->mPushConstantsDataTypeMemorySize * this->mPushConstantsSize; - if (pushConstants.size() != this->mPushConstantsSize) { + if (totalSize != previousTotalSize) { throw std::runtime_error( fmt::format("Kompute Algorithm push " - "constant provided is size {} but expected size {}", - pushConstants.size(), - this->mPushConstantsSize)); + "constant total memory size provided is {} but expected {} bytes", + totalSize, + previousTotalSize)); } if (this->mPushConstantsData) { free(this->mPushConstantsData); } - uint32_t memorySize = sizeof(decltype(pushConstants.back())); - uint32_t size = pushConstants.size(); + this->setPushConstants(pushConstants.data(), size, memorySize); + } + + void setPushConstants(void* data, uint32_t size, uint32_t memorySize) { uint32_t totalSize = size * memorySize; this->mPushConstantsData = malloc(totalSize); - memcpy(this->mPushConstantsData, pushConstants.data(), totalSize); + memcpy(this->mPushConstantsData, data, totalSize); this->mPushConstantsDataTypeMemorySize = memorySize; this->mPushConstantsSize = size; } diff --git a/src/include/kompute/operations/OpAlgoDispatch.hpp b/src/include/kompute/operations/OpAlgoDispatch.hpp index 600b6116c..48acd6014 100644 --- a/src/include/kompute/operations/OpAlgoDispatch.hpp +++ b/src/include/kompute/operations/OpAlgoDispatch.hpp @@ -25,8 +25,24 @@ class OpAlgoDispatch : public OpBase * @param algorithm The algorithm object to use for dispatch * @param pushConstants The push constants to use for override */ + template OpAlgoDispatch(const std::shared_ptr& algorithm, - const kp::Constants& pushConstants = {}); + const std::vector& pushConstants = {}) + { + KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor"); + + this->mAlgorithm = algorithm; + + if (pushConstants.size()) { + uint32_t memorySize = sizeof(decltype(pushConstants.back())); + uint32_t size = pushConstants.size(); + uint32_t totalSize = size * memorySize; + this->mPushConstantsData = malloc(totalSize); + memcpy(this->mPushConstantsData, pushConstants.data(), totalSize); + this->mPushConstantsDataTypeMemorySize = memorySize; + this->mPushConstantsSize = size; + } + } /** * Default destructor, which is in charge of destroying the algorithm @@ -63,7 +79,9 @@ class OpAlgoDispatch : public OpBase private: // -------------- ALWAYS OWNED RESOURCES std::shared_ptr mAlgorithm; - Constants mPushConstants; + void* mPushConstantsData = nullptr; + uint32_t mPushConstantsDataTypeMemorySize = 0; + uint32_t mPushConstantsSize = 0; }; } // End namespace kp diff --git a/test/TestPushConstant.cpp b/test/TestPushConstant.cpp index 83b3d3d83..6d32fccaf 100644 --- a/test/TestPushConstant.cpp +++ b/test/TestPushConstant.cpp @@ -137,3 +137,226 @@ TEST(TestPushConstants, TestConstantsWrongSize) } } } + +// TODO: Ensure different types are considered for push constants +// TEST(TestPushConstants, TestConstantsWrongType) +// { +// { +// std::string shader(R"( +// #version 450 +// layout(push_constant) uniform PushConstants { +// float x; +// float y; +// float z; +// } pcs; +// layout (local_size_x = 1) in; +// layout(set = 0, binding = 0) buffer a { float pa[]; }; +// void main() { +// pa[0] += pcs.x; +// pa[1] += pcs.y; +// pa[2] += pcs.z; +// })"); +// +// std::vector spirv = compileSource(shader); +// +// std::shared_ptr sq = nullptr; +// +// { +// kp::Manager mgr; +// +// std::shared_ptr> tensor = +// mgr.tensor({ 0, 0, 0 }); +// +// std::shared_ptr algo = mgr.algorithm( +// { tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.0 }); +// +// sq = mgr.sequence()->record({ tensor }); +// +// EXPECT_THROW(sq->record( +// algo, std::vector{ 1, 2, 3 }), +// std::runtime_error); +// } +// } +// } + +TEST(TestPushConstants, TestConstantsMixedTypes) +{ + { + std::string shader(R"( + #version 450 + layout(push_constant) uniform PushConstants { + float x; + uint y; + int z; + } pcs; + layout (local_size_x = 1) in; + layout(set = 0, binding = 0) buffer a { float pa[]; }; + void main() { + pa[0] += pcs.x; + pa[1] += pcs.y - 2147483000; + pa[2] += pcs.z; + })"); + + struct Params{float x; uint32_t y; int32_t z;}; + + std::vector spirv = compileSource(shader); + + std::shared_ptr sq = nullptr; + + { + kp::Manager mgr; + + std::shared_ptr> tensor = + mgr.tensorT({ 0, 0, 0 }); + + std::shared_ptr algo = mgr.algorithm( + { tensor }, spirv, kp::Workgroup({ 1 }), {}, {{ 0, 0, 0 }}); + + sq = mgr.sequence()->eval({ tensor }); + + // We need to run this in sequence to avoid race condition + // We can't use atomicAdd as swiftshader doesn't support it for + // float + sq->eval(algo, std::vector{{ 15.32, 2147483650, 10 }}); + sq->eval(algo, std::vector{{ 30.32, 2147483650, -3 }}); + sq->eval({ tensor }); + + EXPECT_EQ(tensor->vector(), std::vector({ 45.64, 1300, 7 })); + } + } +} + +TEST(TestPushConstants, TestConstantsInt) +{ + { + std::string shader(R"( + #version 450 + layout(push_constant) uniform PushConstants { + int x; + int y; + int z; + } pcs; + layout (local_size_x = 1) in; + layout(set = 0, binding = 0) buffer a { int pa[]; }; + void main() { + pa[0] += pcs.x; + pa[1] += pcs.y; + pa[2] += pcs.z; + })"); + + std::vector spirv = compileSource(shader); + + std::shared_ptr sq = nullptr; + + { + kp::Manager mgr; + + std::shared_ptr> tensor = + mgr.tensorT({ -1, -1, -1 }); + + std::shared_ptr algo = mgr.algorithm( + { tensor }, spirv, kp::Workgroup({ 1 }), {}, {{ 0, 0, 0 }}); + + sq = mgr.sequence()->eval({ tensor }); + + // We need to run this in sequence to avoid race condition + // We can't use atomicAdd as swiftshader doesn't support it for + // float + sq->eval(algo, std::vector{{ -1, -1, -1 }}); + sq->eval(algo, std::vector{{ -1, -1, -1 }}); + sq->eval({ tensor }); + + EXPECT_EQ(tensor->vector(), std::vector({ -3, -3, -3 })); + } + } +} + +TEST(TestPushConstants, TestConstantsUnsignedInt) +{ + { + std::string shader(R"( + #version 450 + layout(push_constant) uniform PushConstants { + uint x; + uint y; + uint z; + } pcs; + layout (local_size_x = 1) in; + layout(set = 0, binding = 0) buffer a { uint pa[]; }; + void main() { + pa[0] += pcs.x; + pa[1] += pcs.y; + pa[2] += pcs.z; + })"); + + std::vector spirv = compileSource(shader); + + std::shared_ptr sq = nullptr; + + { + kp::Manager mgr; + + std::shared_ptr> tensor = + mgr.tensorT({ 0, 0, 0 }); + + std::shared_ptr algo = mgr.algorithm( + { tensor }, spirv, kp::Workgroup({ 1 }), {}, {{ 0, 0, 0 }}); + + sq = mgr.sequence()->eval({ tensor }); + + // We need to run this in sequence to avoid race condition + // We can't use atomicAdd as swiftshader doesn't support it for + // float + sq->eval(algo, std::vector{{ 2147483650, 2147483650, 2147483650 }}); + sq->eval(algo, std::vector{{ 5, 5, 5 }}); + sq->eval({ tensor }); + + EXPECT_EQ(tensor->vector(), std::vector({ 2147483655, 2147483655, 2147483655 })); + } + } +} + +TEST(TestPushConstants, TestConstantsDouble) +{ + { + std::string shader(R"( + #version 450 + layout(push_constant) uniform PushConstants { + double x; + double y; + double z; + } pcs; + layout (local_size_x = 1) in; + layout(set = 0, binding = 0) buffer a { double pa[]; }; + void main() { + pa[0] += pcs.x; + pa[1] += pcs.y; + pa[2] += pcs.z; + })"); + + std::vector spirv = compileSource(shader); + + std::shared_ptr sq = nullptr; + + { + kp::Manager mgr; + + std::shared_ptr> tensor = + mgr.tensorT({ 0, 0, 0 }); + + std::shared_ptr algo = mgr.algorithm( + { tensor }, spirv, kp::Workgroup({ 1 }), {}, {{ 0, 0, 0 }}); + + sq = mgr.sequence()->eval({ tensor }); + + // We need to run this in sequence to avoid race condition + // We can't use atomicAdd as swiftshader doesn't support it for + // float + sq->eval(algo, std::vector{{ 1.1111222233334444, 2.1111222233334444, 3.1111222233334444 }}); + sq->eval(algo, std::vector{{ 1.1111222233334444, 2.1111222233334444, 3.1111222233334444 }}); + sq->eval({ tensor }); + + EXPECT_EQ(tensor->vector(), std::vector({ 2.2222444466668888, 4.2222444466668888, 6.2222444466668888 })); + } + } +}