Added tests for push constants of all and mixed types

Signed-off-by: Alejandro Saucedo <axsauze@gmail.com>
2021-09-12 11:31:32 +01:00 · 2021-09-12 11:31:32 +01:00 · 858a70d9b8
commit 858a70d9b8
parent c23573eb47
7 changed files with 304 additions and 35 deletions
--- a/8
+++ b/8
@ -12,8 +12,8 @@ VERSION := $(shell cat ./VERSION)
 VCPKG_WIN_PATH ?= "C:\\Users\\axsau\\Programming\\lib\\vcpkg\\scripts\\buildsystems\\vcpkg.cmake"
 VCPKG_UNIX_PATH ?= "/c/Users/axsau/Programming/lib/vcpkg/scripts/buildsystems/vcpkg.cmake"

-# Regext to pass to catch2 to filter tests
-FILTER_TESTS ?= "-TestAsyncOperations.TestManagerParallelExecution:TestSequence.SequenceTimestamps"
+# These are the tests that don't work with swiftshader but can be run directly with vulkan
+FILTER_TESTS ?= "-TestAsyncOperations.TestManagerParallelExecution:TestSequence.SequenceTimestamps:TestPushConstants:TestConstantsDouble"

 ifeq ($(OS),Windows_NT)     # is Windows_NT on XP, 2000, 7, Vista, 10...
 	CMAKE_BIN ?= "C:\Program Files\CMake\bin\cmake.exe"
@ -105,7 +105,7 @@ mk_run_tests_cpu: mk_build_swiftshader_library mk_build_tests mk_run_tests_cpu_o
 VS_BUILD_TYPE ?= "Debug"
 # Run with multiprocessin / parallel build by default
 VS_CMAKE_EXTRA_FLAGS ?= ""
-VS_KOMPUTE_EXTRA_CXX_FLAGS ?= "/MP" # /MP is for faster multiprocessing builds. You should add "/MT" for submodule builds for compatibility with gtest
+VS_KOMPUTE_EXTRA_CXX_FLAGS ?= "/MT" # /MP is for faster multiprocessing builds. You should add "/MT" for submodule builds for compatibility with gtest
 VS_INSTALL_PATH ?= "build/src/CMakeFiles/Export/" # Set to "" if prefer default

 vs_cmake:
@ -116,7 +116,7 @@ vs_cmake:
 		-DKOMPUTE_EXTRA_CXX_FLAGS=$(VS_KOMPUTE_EXTRA_CXX_FLAGS) \
 		-DCMAKE_INSTALL_PREFIX=$(VS_INSTALL_PATH) \
 		-DKOMPUTE_OPT_INSTALL=1 \
-		-DKOMPUTE_OPT_REPO_SUBMODULE_BUILD=0 \
+		-DKOMPUTE_OPT_REPO_SUBMODULE_BUILD=1 \
 		-DKOMPUTE_OPT_BUILD_TESTS=1 \
 		-DKOMPUTE_OPT_BUILD_SHADERS=1 \
 		-DKOMPUTE_OPT_BUILD_SINGLE_HEADER=1 \
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@ -1246,23 +1246,29 @@ class Algorithm
    template<typename T>
    void setPushConstants(const std::vector<T>& pushConstants)
    {
+        uint32_t memorySize = sizeof(decltype(pushConstants.back()));
+        uint32_t size = pushConstants.size();
+        uint32_t totalSize = memorySize * size;
+        uint32_t previousTotalSize = this->mPushConstantsDataTypeMemorySize * this->mPushConstantsSize;

-        if (pushConstants.size() != this->mPushConstantsSize) {
+        if (totalSize != previousTotalSize) {
            throw std::runtime_error(
              fmt::format("Kompute Algorithm push "
-                          "constant provided is size {} but expected size {}",
-                          pushConstants.size(),
-                          this->mPushConstantsSize));
+                          "constant total memory size provided is {} but expected {} bytes",
+                          totalSize,
+                          previousTotalSize));
        }
        if (this->mPushConstantsData) {
            free(this->mPushConstantsData);
        }

-        uint32_t memorySize = sizeof(decltype(pushConstants.back()));
-        uint32_t size = pushConstants.size();
+        this->setPushConstants(pushConstants.data(), size, memorySize);
+    }
+
+    void setPushConstants(void* data, uint32_t size, uint32_t memorySize) {
        uint32_t totalSize = size * memorySize;
        this->mPushConstantsData = malloc(totalSize);
-        memcpy(this->mPushConstantsData, pushConstants.data(), totalSize);
+        memcpy(this->mPushConstantsData, data, totalSize);
        this->mPushConstantsDataTypeMemorySize = memorySize;
        this->mPushConstantsSize = size;
    }
@ -1675,8 +1681,24 @@ class OpAlgoDispatch : public OpBase
     * @param algorithm The algorithm object to use for dispatch
     * @param pushConstants The push constants to use for override
     */
+    template<typename T = float>
    OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm,
-            const kp::Constants& pushConstants = {});
+            const std::vector<T>& pushConstants = {})
+    {
+        KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor");
+
+        this->mAlgorithm = algorithm;
+
+        if (pushConstants.size()) {
+            uint32_t memorySize = sizeof(decltype(pushConstants.back()));
+            uint32_t size = pushConstants.size();
+            uint32_t totalSize = size * memorySize;
+            this->mPushConstantsData = malloc(totalSize);
+            memcpy(this->mPushConstantsData, pushConstants.data(), totalSize);
+            this->mPushConstantsDataTypeMemorySize = memorySize;
+            this->mPushConstantsSize = size;
+        }
+    }

    /**
     * Default destructor, which is in charge of destroying the algorithm
@ -1713,7 +1735,9 @@ class OpAlgoDispatch : public OpBase
 private:
    // -------------- ALWAYS OWNED RESOURCES
    std::shared_ptr<Algorithm> mAlgorithm;
-    Constants mPushConstants;
+    void* mPushConstantsData = nullptr;
+    uint32_t mPushConstantsDataTypeMemorySize = 0;
+    uint32_t mPushConstantsSize = 0;
 };

 } // End namespace kp
--- a/src/Algorithm.cpp
+++ b/src/Algorithm.cpp
@ -338,8 +338,8 @@ void
 Algorithm::recordBindPush(const vk::CommandBuffer& commandBuffer)
 {
    if (this->mPushConstantsSize) {
-        KP_LOG_DEBUG("Kompute Algorithm binding push constants size: {}",
-                     this->mPushConstantsSize);
+        KP_LOG_DEBUG("Kompute Algorithm binding push constants memory size: {}",
+                     this->mPushConstantsSize * this->mPushConstantsDataTypeMemorySize);

        commandBuffer.pushConstants(*this->mPipelineLayout,
                                    vk::ShaderStageFlagBits::eCompute,
--- a/src/OpAlgoDispatch.cpp
+++ b/src/OpAlgoDispatch.cpp
@ -5,18 +5,13 @@

 namespace kp {

-OpAlgoDispatch::OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm,
-                               const kp::Constants& pushConstants)
-{
-    KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor");
-
-    this->mAlgorithm = algorithm;
-    this->mPushConstants = pushConstants;
-}
-
 OpAlgoDispatch::~OpAlgoDispatch()
 {
    KP_LOG_DEBUG("Kompute OpAlgoDispatch destructor started");
+
+    if (this->mPushConstantsData) {
+        free(this->mPushConstantsData);
+    }
 }

 void
@ -35,8 +30,11 @@ OpAlgoDispatch::record(const vk::CommandBuffer& commandBuffer)
          vk::PipelineStageFlagBits::eComputeShader);
    }

-    if (this->mPushConstants.size()) {
-        this->mAlgorithm->setPushConstants(this->mPushConstants);
+    if (this->mPushConstantsSize) {
+        this->mAlgorithm->setPushConstants(
+                this->mPushConstantsData,
+                this->mPushConstantsSize,
+                this->mPushConstantsDataTypeMemorySize);
    }

    this->mAlgorithm->recordBindCore(commandBuffer);
--- a/src/include/kompute/Algorithm.hpp
+++ b/src/include/kompute/Algorithm.hpp
@ -183,23 +183,29 @@ class Algorithm
    template<typename T>
    void setPushConstants(const std::vector<T>& pushConstants)
    {
+        uint32_t memorySize = sizeof(decltype(pushConstants.back()));
+        uint32_t size = pushConstants.size();
+        uint32_t totalSize = memorySize * size;
+        uint32_t previousTotalSize = this->mPushConstantsDataTypeMemorySize * this->mPushConstantsSize;

-        if (pushConstants.size() != this->mPushConstantsSize) {
+        if (totalSize != previousTotalSize) {
            throw std::runtime_error(
              fmt::format("Kompute Algorithm push "
-                          "constant provided is size {} but expected size {}",
-                          pushConstants.size(),
-                          this->mPushConstantsSize));
+                          "constant total memory size provided is {} but expected {} bytes",
+                          totalSize,
+                          previousTotalSize));
        }
        if (this->mPushConstantsData) {
            free(this->mPushConstantsData);
        }

-        uint32_t memorySize = sizeof(decltype(pushConstants.back()));
-        uint32_t size = pushConstants.size();
+        this->setPushConstants(pushConstants.data(), size, memorySize);
+    }
+
+    void setPushConstants(void* data, uint32_t size, uint32_t memorySize) {
        uint32_t totalSize = size * memorySize;
        this->mPushConstantsData = malloc(totalSize);
-        memcpy(this->mPushConstantsData, pushConstants.data(), totalSize);
+        memcpy(this->mPushConstantsData, data, totalSize);
        this->mPushConstantsDataTypeMemorySize = memorySize;
        this->mPushConstantsSize = size;
    }
--- a/src/include/kompute/operations/OpAlgoDispatch.hpp
+++ b/src/include/kompute/operations/OpAlgoDispatch.hpp
@ -25,8 +25,24 @@ class OpAlgoDispatch : public OpBase
     * @param algorithm The algorithm object to use for dispatch
     * @param pushConstants The push constants to use for override
     */
+    template<typename T = float>
    OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm,
-            const kp::Constants& pushConstants = {});
+            const std::vector<T>& pushConstants = {})
+    {
+        KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor");
+
+        this->mAlgorithm = algorithm;
+
+        if (pushConstants.size()) {
+            uint32_t memorySize = sizeof(decltype(pushConstants.back()));
+            uint32_t size = pushConstants.size();
+            uint32_t totalSize = size * memorySize;
+            this->mPushConstantsData = malloc(totalSize);
+            memcpy(this->mPushConstantsData, pushConstants.data(), totalSize);
+            this->mPushConstantsDataTypeMemorySize = memorySize;
+            this->mPushConstantsSize = size;
+        }
+    }

    /**
     * Default destructor, which is in charge of destroying the algorithm
@ -63,7 +79,9 @@ class OpAlgoDispatch : public OpBase
 private:
    // -------------- ALWAYS OWNED RESOURCES
    std::shared_ptr<Algorithm> mAlgorithm;
-    Constants mPushConstants;
+    void* mPushConstantsData = nullptr;
+    uint32_t mPushConstantsDataTypeMemorySize = 0;
+    uint32_t mPushConstantsSize = 0;
 };

 } // End namespace kp
--- a/test/TestPushConstant.cpp
+++ b/test/TestPushConstant.cpp
@ -137,3 +137,226 @@ TEST(TestPushConstants, TestConstantsWrongSize)
        }
    }
 }
+
+// TODO: Ensure different types are considered for push constants
+// TEST(TestPushConstants, TestConstantsWrongType)
+// {
+//     {
+//         std::string shader(R"(
+//           #version 450
+//           layout(push_constant) uniform PushConstants {
+//             float x;
+//             float y;
+//             float z;
+//           } pcs;
+//           layout (local_size_x = 1) in;
+//           layout(set = 0, binding = 0) buffer a { float pa[]; };
+//           void main() {
+//               pa[0] += pcs.x;
+//               pa[1] += pcs.y;
+//               pa[2] += pcs.z;
+//           })");
+// 
+//         std::vector<uint32_t> spirv = compileSource(shader);
+// 
+//         std::shared_ptr<kp::Sequence> sq = nullptr;
+// 
+//         {
+//             kp::Manager mgr;
+// 
+//             std::shared_ptr<kp::TensorT<float>> tensor =
+//               mgr.tensor({ 0, 0, 0 });
+// 
+//             std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(
+//               { tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.0 });
+// 
+//             sq = mgr.sequence()->record<kp::OpTensorSyncDevice>({ tensor });
+// 
+//             EXPECT_THROW(sq->record<kp::OpAlgoDispatch>(
+//                            algo, std::vector<uint32_t>{ 1, 2, 3 }),
+//                          std::runtime_error);
+//         }
+//     }
+// }
+
+TEST(TestPushConstants, TestConstantsMixedTypes)
+{
+    {
+        std::string shader(R"(
+          #version 450
+          layout(push_constant) uniform PushConstants {
+            float x;
+            uint y;
+            int z;
+          } pcs;
+          layout (local_size_x = 1) in;
+          layout(set = 0, binding = 0) buffer a { float pa[]; };
+          void main() {
+              pa[0] += pcs.x;
+              pa[1] += pcs.y - 2147483000;
+              pa[2] += pcs.z;
+          })");
+
+        struct Params{float x; uint32_t y; int32_t z;};
+
+        std::vector<uint32_t> spirv = compileSource(shader);
+
+        std::shared_ptr<kp::Sequence> sq = nullptr;
+
+        {
+            kp::Manager mgr;
+
+            std::shared_ptr<kp::TensorT<float>> tensor =
+              mgr.tensorT<float>({ 0, 0, 0 });
+
+            std::shared_ptr<kp::Algorithm> algo = mgr.algorithm<float, Params>(
+              { tensor }, spirv, kp::Workgroup({ 1 }), {}, {{ 0, 0, 0 }});
+
+            sq = mgr.sequence()->eval<kp::OpTensorSyncDevice>({ tensor });
+
+            // We need to run this in sequence to avoid race condition
+            // We can't use atomicAdd as swiftshader doesn't support it for
+            // float
+            sq->eval<kp::OpAlgoDispatch>(algo, std::vector<Params>{{ 15.32, 2147483650, 10 }});
+            sq->eval<kp::OpAlgoDispatch>(algo, std::vector<Params>{{ 30.32, 2147483650, -3 }});
+            sq->eval<kp::OpTensorSyncLocal>({ tensor });
+
+            EXPECT_EQ(tensor->vector(), std::vector<float>({ 45.64, 1300, 7 }));
+        }
+    }
+}
+
+TEST(TestPushConstants, TestConstantsInt)
+{
+    {
+        std::string shader(R"(
+          #version 450
+          layout(push_constant) uniform PushConstants {
+            int x;
+            int y;
+            int z;
+          } pcs;
+          layout (local_size_x = 1) in;
+          layout(set = 0, binding = 0) buffer a { int pa[]; };
+          void main() {
+              pa[0] += pcs.x;
+              pa[1] += pcs.y;
+              pa[2] += pcs.z;
+          })");
+
+        std::vector<uint32_t> spirv = compileSource(shader);
+
+        std::shared_ptr<kp::Sequence> sq = nullptr;
+
+        {
+            kp::Manager mgr;
+
+            std::shared_ptr<kp::TensorT<int32_t>> tensor =
+              mgr.tensorT<int32_t>({ -1, -1, -1 });
+
+            std::shared_ptr<kp::Algorithm> algo = mgr.algorithm<int32_t , int32_t>(
+              { tensor }, spirv, kp::Workgroup({ 1 }), {}, {{ 0, 0, 0 }});
+
+            sq = mgr.sequence()->eval<kp::OpTensorSyncDevice>({ tensor });
+
+            // We need to run this in sequence to avoid race condition
+            // We can't use atomicAdd as swiftshader doesn't support it for
+            // float
+            sq->eval<kp::OpAlgoDispatch>(algo, std::vector<int32_t>{{ -1, -1, -1 }});
+            sq->eval<kp::OpAlgoDispatch>(algo, std::vector<int32_t>{{ -1, -1, -1 }});
+            sq->eval<kp::OpTensorSyncLocal>({ tensor });
+
+            EXPECT_EQ(tensor->vector(), std::vector<int32_t>({ -3, -3, -3 }));
+        }
+    }
+}
+
+TEST(TestPushConstants, TestConstantsUnsignedInt)
+{
+    {
+        std::string shader(R"(
+          #version 450
+          layout(push_constant) uniform PushConstants {
+            uint x;
+            uint y;
+            uint z;
+          } pcs;
+          layout (local_size_x = 1) in;
+          layout(set = 0, binding = 0) buffer a { uint pa[]; };
+          void main() {
+              pa[0] += pcs.x;
+              pa[1] += pcs.y;
+              pa[2] += pcs.z;
+          })");
+
+        std::vector<uint32_t> spirv = compileSource(shader);
+
+        std::shared_ptr<kp::Sequence> sq = nullptr;
+
+        {
+            kp::Manager mgr;
+
+            std::shared_ptr<kp::TensorT<uint32_t>> tensor =
+              mgr.tensorT<uint32_t>({ 0, 0, 0 });
+
+            std::shared_ptr<kp::Algorithm> algo = mgr.algorithm<uint32_t , uint32_t>(
+              { tensor }, spirv, kp::Workgroup({ 1 }), {}, {{ 0, 0, 0 }});
+
+            sq = mgr.sequence()->eval<kp::OpTensorSyncDevice>({ tensor });
+
+            // We need to run this in sequence to avoid race condition
+            // We can't use atomicAdd as swiftshader doesn't support it for
+            // float
+            sq->eval<kp::OpAlgoDispatch>(algo, std::vector<uint32_t>{{ 2147483650, 2147483650, 2147483650 }});
+            sq->eval<kp::OpAlgoDispatch>(algo, std::vector<uint32_t>{{ 5, 5, 5 }});
+            sq->eval<kp::OpTensorSyncLocal>({ tensor });
+
+            EXPECT_EQ(tensor->vector(), std::vector<uint32_t>({ 2147483655, 2147483655, 2147483655 }));
+        }
+    }
+}
+
+TEST(TestPushConstants, TestConstantsDouble)
+{
+    {
+        std::string shader(R"(
+          #version 450
+          layout(push_constant) uniform PushConstants {
+            double x;
+            double y;
+            double z;
+          } pcs;
+          layout (local_size_x = 1) in;
+          layout(set = 0, binding = 0) buffer a { double pa[]; };
+          void main() {
+              pa[0] += pcs.x;
+              pa[1] += pcs.y;
+              pa[2] += pcs.z;
+          })");
+
+        std::vector<uint32_t> spirv = compileSource(shader);
+
+        std::shared_ptr<kp::Sequence> sq = nullptr;
+
+        {
+            kp::Manager mgr;
+
+            std::shared_ptr<kp::TensorT<double>> tensor =
+              mgr.tensorT<double>({ 0, 0, 0 });
+
+            std::shared_ptr<kp::Algorithm> algo = mgr.algorithm<double, double>(
+              { tensor }, spirv, kp::Workgroup({ 1 }), {}, {{ 0, 0, 0 }});
+
+            sq = mgr.sequence()->eval<kp::OpTensorSyncDevice>({ tensor });
+
+            // We need to run this in sequence to avoid race condition
+            // We can't use atomicAdd as swiftshader doesn't support it for
+            // float
+            sq->eval<kp::OpAlgoDispatch>(algo, std::vector<double>{{ 1.1111222233334444, 2.1111222233334444, 3.1111222233334444 }});
+            sq->eval<kp::OpAlgoDispatch>(algo, std::vector<double>{{ 1.1111222233334444, 2.1111222233334444, 3.1111222233334444 }});
+            sq->eval<kp::OpTensorSyncLocal>({ tensor });
+
+            EXPECT_EQ(tensor->vector(), std::vector<double>({ 2.2222444466668888, 4.2222444466668888, 6.2222444466668888 }));
+        }
+    }
+}