From 860fda9fb526d2350f550964b78644236b20482e Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 12 Sep 2021 09:37:50 +0100 Subject: [PATCH 01/19] Initil implementation Signed-off-by: Alejandro Saucedo --- single_include/kompute/Kompute.hpp | 120 ++++++++++++++++++++-- src/Algorithm.cpp | 114 +++++--------------- src/Manager.cpp | 1 + src/OpAlgoDispatch.cpp | 2 +- src/include/kompute/Algorithm.hpp | 118 +++++++++++++++++++-- src/include/kompute/operations/OpMult.hpp | 2 +- 6 files changed, 247 insertions(+), 110 deletions(-) diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index 12fe9cda9..a68cff1e6 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -1094,12 +1094,30 @@ class Algorithm * these can be modified but all new values must have the same vector size * as this initial value. */ + template Algorithm(std::shared_ptr device, const std::vector>& tensors = {}, const std::vector& spirv = {}, const Workgroup& workgroup = {}, - const Constants& specializationConstants = {}, - const Constants& pushConstants = {}); + const std::vector& specializationConstants = {}, + const std::vector

& pushConstants = {}) + { + KP_LOG_DEBUG("Kompute Algorithm Constructor with device"); + + this->mDevice = device; + + if (tensors.size() && spirv.size()) { + KP_LOG_INFO("Kompute Algorithm initialising with tensor size: {} and " + "spirv size: {}", + tensors.size(), + spirv.size()); + this->rebuild( + tensors, spirv, workgroup, specializationConstants, pushConstants); + } else { + KP_LOG_INFO("Kompute Algorithm constructor with empty tensors and or " + "spirv so not rebuilding vulkan components"); + } + } /** * Rebuild function to reconstruct algorithm with configuration parameters @@ -1116,11 +1134,57 @@ class Algorithm * these can be modified but all new values must have the same vector size * as this initial value. */ + template void rebuild(const std::vector>& tensors, const std::vector& spirv, const Workgroup& workgroup = {}, - const Constants& specializationConstants = {}, - const Constants& pushConstants = {}); + const std::vector& specializationConstants = {}, + const std::vector

& pushConstants = {}) + { + KP_LOG_DEBUG("Kompute Algorithm rebuild started"); + + this->mTensors = tensors; + this->mSpirv = spirv; + + if (specializationConstants.size()) { + if (this->mSpecializationConstantsData) { + free(this->mSpecializationConstantsData); + } + uint32_t memorySize = sizeof(decltype(specializationConstants.back())); + uint32_t size = specializationConstants.size(); + uint32_t totalSize = size * memorySize; + this->mSpecializationConstantsData = malloc(totalSize); + memcpy(this->mSpecializationConstantsData, specializationConstants.data(), totalSize); + this->mSpecializationConstantsDataTypeMemorySize = memorySize; + this->mSpecializationConstantsSize = size; + } + + if (pushConstants.size()) { + if (this->mPushConstantsData) { + free(this->mPushConstantsData); + } + uint32_t memorySize = sizeof(decltype(pushConstants.back())); + uint32_t size = pushConstants.size(); + uint32_t totalSize = size * memorySize; + this->mPushConstantsData = malloc(totalSize); + memcpy(this->mPushConstantsData, pushConstants.data(), totalSize); + this->mPushConstantsDataTypeMemorySize = memorySize; + this->mPushConstantsSize = size; + } + + this->setWorkgroup(workgroup, + this->mTensors.size() ? this->mTensors[0]->size() : 1); + + // Descriptor pool is created first so if available then destroy all before + // rebuild + if (this->isInit()) { + this->destroy(); + } + + this->createParameters(); + this->createShaderModule(); + this->createPipeline(); + } /** * Destructor for Algorithm which is responsible for freeing and desroying @@ -1179,7 +1243,29 @@ class Algorithm * next bindPush(...) calls. The constants provided must be of the same size * as the ones created during initialization. */ - void setPush(const Constants& pushConstants); + template + void setPushConstants(const std::vector& pushConstants) + { + + if (pushConstants.size() != this->mPushConstantsSize) { + throw std::runtime_error( + fmt::format("Kompute Algorithm push " + "constant provided is size {} but expected size {}", + pushConstants.size(), + this->mPushConstantsSize)); + } + if (this->mPushConstantsData) { + free(this->mPushConstantsData); + } + + uint32_t memorySize = sizeof(decltype(pushConstants.back())); + uint32_t size = pushConstants.size(); + uint32_t totalSize = size * memorySize; + this->mPushConstantsData = malloc(totalSize); + memcpy(this->mPushConstantsData, pushConstants.data(), totalSize); + this->mPushConstantsDataTypeMemorySize = memorySize; + this->mPushConstantsSize = size; + } /** * Gets the current workgroup from the algorithm. @@ -1194,13 +1280,23 @@ class Algorithm * * @returns The kp::Constants currently set for specialization constants */ - const Constants& getSpecializationConstants(); + template + const std::vector getSpecializationConstants() + { + return { (T*)this->mSpecializationConstantsData, + ((T*)this->mSpecializationConstantsData) + this->mSpecializationConstantsSize }; + } /** * Gets the specialization constants of the current algorithm. * * @returns The kp::Constants currently set for push constants */ - const Constants& getPush(); + template + const std::vector getPushConstants() + { + return { (T*)this->mPushConstantsData, + ((T*)this->mPushConstantsData) + this->mPushConstantsSize }; + } /** * Gets the current tensors that are used in the algorithm. * @@ -1233,8 +1329,12 @@ class Algorithm // -------------- ALWAYS OWNED RESOURCES std::vector mSpirv; - Constants mSpecializationConstants; - Constants mPushConstants; + void* mSpecializationConstantsData = nullptr; + uint32_t mSpecializationConstantsDataTypeMemorySize = 0; + uint32_t mSpecializationConstantsSize = 0; + void* mPushConstantsData = nullptr; + uint32_t mPushConstantsDataTypeMemorySize = 0; + uint32_t mPushConstantsSize = 0; Workgroup mWorkgroup; // Create util functions @@ -1655,7 +1755,7 @@ class OpMult : public OpAlgoDispatch (uint32_t*)(shader_data::shaders_glsl_opmult_comp_spv + kp::shader_data::shaders_glsl_opmult_comp_spv_len)); - algorithm->rebuild(tensors, spirv); + algorithm->rebuild<>(tensors, spirv); } /** diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp index 8d510bb9c..69ab5f7ad 100644 --- a/src/Algorithm.cpp +++ b/src/Algorithm.cpp @@ -5,30 +5,6 @@ namespace kp { -Algorithm::Algorithm(std::shared_ptr device, - const std::vector>& tensors, - const std::vector& spirv, - const Workgroup& workgroup, - const Constants& specializationConstants, - const Constants& pushConstants) -{ - KP_LOG_DEBUG("Kompute Algorithm Constructor with device"); - - this->mDevice = device; - - if (tensors.size() && spirv.size()) { - KP_LOG_INFO("Kompute Algorithm initialising with tensor size: {} and " - "spirv size: {}", - tensors.size(), - spirv.size()); - this->rebuild( - tensors, spirv, workgroup, specializationConstants, pushConstants); - } else { - KP_LOG_INFO("Kompute Algorithm constructor with empty tensors and or " - "spirv so not rebuilding vulkan components"); - } -} - Algorithm::~Algorithm() { KP_LOG_DEBUG("Kompute Algorithm Destructor started"); @@ -36,33 +12,6 @@ Algorithm::~Algorithm() this->destroy(); } -void -Algorithm::rebuild(const std::vector>& tensors, - const std::vector& spirv, - const Workgroup& workgroup, - const Constants& specializationConstants, - const Constants& pushConstants) -{ - KP_LOG_DEBUG("Kompute Algorithm rebuild started"); - - this->mTensors = tensors; - this->mSpirv = spirv; - this->mSpecializationConstants = specializationConstants; - this->mPushConstants = pushConstants; - this->setWorkgroup(workgroup, - this->mTensors.size() ? this->mTensors[0]->size() : 1); - - // Descriptor pool is created first so if available then destroy all before - // rebuild - if (this->isInit()) { - this->destroy(); - } - - this->createParameters(); - this->createShaderModule(); - this->createPipeline(); -} - bool Algorithm::isInit() { @@ -74,6 +23,13 @@ Algorithm::isInit() void Algorithm::destroy() { + if (this->mPushConstantsData) { + free(this->mPushConstantsData); + } + + if (this->mSpecializationConstantsData) { + free(this->mSpecializationConstantsData); + } if (!this->mDevice) { KP_LOG_WARN("Kompute Algorithm destroy function reached with null " @@ -279,10 +235,10 @@ Algorithm::createPipeline() this->mDescriptorSetLayout.get()); vk::PushConstantRange pushConstantRange; - if (this->mPushConstants.size()) { + if (this->mPushConstantsSize) { pushConstantRange.setStageFlags(vk::ShaderStageFlagBits::eCompute); pushConstantRange.setOffset(0); - pushConstantRange.setSize(sizeof(float) * this->mPushConstants.size()); + pushConstantRange.setSize(this->mPushConstantsDataTypeMemorySize * this->mPushConstantsSize); pipelineLayoutInfo.setPushConstantRangeCount(1); pipelineLayoutInfo.setPPushConstantRanges(&pushConstantRange); @@ -295,11 +251,11 @@ Algorithm::createPipeline() std::vector specializationEntries; - for (uint32_t i = 0; i < this->mSpecializationConstants.size(); i++) { + for (uint32_t i = 0; i < this->mSpecializationConstantsSize; i++) { vk::SpecializationMapEntry specializationEntry( static_cast(i), - static_cast(sizeof(float) * i), - sizeof(float)); + static_cast(this->mSpecializationConstantsDataTypeMemorySize * i), + this->mSpecializationConstantsDataTypeMemorySize); specializationEntries.push_back(specializationEntry); } @@ -309,8 +265,8 @@ Algorithm::createPipeline() vk::SpecializationInfo specializationInfo( static_cast(specializationEntries.size()), specializationEntries.data(), - sizeof(float) * this->mSpecializationConstants.size(), - this->mSpecializationConstants.data()); + this->mSpecializationConstantsDataTypeMemorySize * this->mSpecializationConstantsSize, + this->mSpecializationConstantsData); vk::PipelineShaderStageCreateInfo shaderStage( vk::PipelineShaderStageCreateFlags(), @@ -381,15 +337,22 @@ Algorithm::recordBindCore(const vk::CommandBuffer& commandBuffer) void Algorithm::recordBindPush(const vk::CommandBuffer& commandBuffer) { - if (this->mPushConstants.size()) { + if (this->mPushConstantsSize) { KP_LOG_DEBUG("Kompute Algorithm binding push constants size: {}", - this->mPushConstants.size()); + this->mPushConstantsSize); + KP_LOG_DEBUG("{} {}", + this->mPushConstantsDataTypeMemorySize, + this->mPushConstantsData == nullptr); + KP_LOG_DEBUG("{}", + ((float*)this->mPushConstantsData)[0]); commandBuffer.pushConstants(*this->mPipelineLayout, vk::ShaderStageFlagBits::eCompute, 0, - this->mPushConstants.size() * sizeof(float), - this->mPushConstants.data()); + this->mPushConstantsSize * this->mPushConstantsDataTypeMemorySize, + this->mPushConstantsData); + KP_LOG_DEBUG("Constants bound: {}", + this->mPushConstantsSize); } } @@ -426,39 +389,12 @@ Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize) this->mWorkgroup[2]); } -void -Algorithm::setPush(const Constants& pushConstants) -{ - - if (pushConstants.size() != this->mPushConstants.size()) { - throw std::runtime_error( - fmt::format("Kompute Algorithm push " - "constant provided is size {} but expected size {}", - pushConstants.size(), - this->mPushConstants.size())); - } - - this->mPushConstants = pushConstants; -} - const Workgroup& Algorithm::getWorkgroup() { return this->mWorkgroup; } -const Constants& -Algorithm::getSpecializationConstants() -{ - return this->mSpecializationConstants; -} - -const Constants& -Algorithm::getPush() -{ - return this->mPushConstants; -} - const std::vector>& Algorithm::getTensors() { diff --git a/src/Manager.cpp b/src/Manager.cpp index 80f308983..8e8367c30 100644 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -422,6 +422,7 @@ Manager::createDevice(const std::vector& familyQueueIndices, KP_LOG_DEBUG("Kompute Manager compute queue obtained"); } +// TODO: Update to template std::shared_ptr Manager::algorithm(const std::vector>& tensors, const std::vector& spirv, diff --git a/src/OpAlgoDispatch.cpp b/src/OpAlgoDispatch.cpp index 0fd323b7d..15bfc05c9 100644 --- a/src/OpAlgoDispatch.cpp +++ b/src/OpAlgoDispatch.cpp @@ -36,7 +36,7 @@ OpAlgoDispatch::record(const vk::CommandBuffer& commandBuffer) } if (this->mPushConstants.size()) { - this->mAlgorithm->setPush(this->mPushConstants); + this->mAlgorithm->setPushConstants(this->mPushConstants); } this->mAlgorithm->recordBindCore(commandBuffer); diff --git a/src/include/kompute/Algorithm.hpp b/src/include/kompute/Algorithm.hpp index 2ec2797a8..6bc49cef6 100644 --- a/src/include/kompute/Algorithm.hpp +++ b/src/include/kompute/Algorithm.hpp @@ -31,12 +31,30 @@ class Algorithm * these can be modified but all new values must have the same vector size * as this initial value. */ + template Algorithm(std::shared_ptr device, const std::vector>& tensors = {}, const std::vector& spirv = {}, const Workgroup& workgroup = {}, - const Constants& specializationConstants = {}, - const Constants& pushConstants = {}); + const std::vector& specializationConstants = {}, + const std::vector

& pushConstants = {}) + { + KP_LOG_DEBUG("Kompute Algorithm Constructor with device"); + + this->mDevice = device; + + if (tensors.size() && spirv.size()) { + KP_LOG_INFO("Kompute Algorithm initialising with tensor size: {} and " + "spirv size: {}", + tensors.size(), + spirv.size()); + this->rebuild( + tensors, spirv, workgroup, specializationConstants, pushConstants); + } else { + KP_LOG_INFO("Kompute Algorithm constructor with empty tensors and or " + "spirv so not rebuilding vulkan components"); + } + } /** * Rebuild function to reconstruct algorithm with configuration parameters @@ -53,11 +71,57 @@ class Algorithm * these can be modified but all new values must have the same vector size * as this initial value. */ + template void rebuild(const std::vector>& tensors, const std::vector& spirv, const Workgroup& workgroup = {}, - const Constants& specializationConstants = {}, - const Constants& pushConstants = {}); + const std::vector& specializationConstants = {}, + const std::vector

& pushConstants = {}) + { + KP_LOG_DEBUG("Kompute Algorithm rebuild started"); + + this->mTensors = tensors; + this->mSpirv = spirv; + + if (specializationConstants.size()) { + if (this->mSpecializationConstantsData) { + free(this->mSpecializationConstantsData); + } + uint32_t memorySize = sizeof(decltype(specializationConstants.back())); + uint32_t size = specializationConstants.size(); + uint32_t totalSize = size * memorySize; + this->mSpecializationConstantsData = malloc(totalSize); + memcpy(this->mSpecializationConstantsData, specializationConstants.data(), totalSize); + this->mSpecializationConstantsDataTypeMemorySize = memorySize; + this->mSpecializationConstantsSize = size; + } + + if (pushConstants.size()) { + if (this->mPushConstantsData) { + free(this->mPushConstantsData); + } + uint32_t memorySize = sizeof(decltype(pushConstants.back())); + uint32_t size = pushConstants.size(); + uint32_t totalSize = size * memorySize; + this->mPushConstantsData = malloc(totalSize); + memcpy(this->mPushConstantsData, pushConstants.data(), totalSize); + this->mPushConstantsDataTypeMemorySize = memorySize; + this->mPushConstantsSize = size; + } + + this->setWorkgroup(workgroup, + this->mTensors.size() ? this->mTensors[0]->size() : 1); + + // Descriptor pool is created first so if available then destroy all before + // rebuild + if (this->isInit()) { + this->destroy(); + } + + this->createParameters(); + this->createShaderModule(); + this->createPipeline(); + } /** * Destructor for Algorithm which is responsible for freeing and desroying @@ -116,7 +180,29 @@ class Algorithm * next bindPush(...) calls. The constants provided must be of the same size * as the ones created during initialization. */ - void setPush(const Constants& pushConstants); + template + void setPushConstants(const std::vector& pushConstants) + { + + if (pushConstants.size() != this->mPushConstantsSize) { + throw std::runtime_error( + fmt::format("Kompute Algorithm push " + "constant provided is size {} but expected size {}", + pushConstants.size(), + this->mPushConstantsSize)); + } + if (this->mPushConstantsData) { + free(this->mPushConstantsData); + } + + uint32_t memorySize = sizeof(decltype(pushConstants.back())); + uint32_t size = pushConstants.size(); + uint32_t totalSize = size * memorySize; + this->mPushConstantsData = malloc(totalSize); + memcpy(this->mPushConstantsData, pushConstants.data(), totalSize); + this->mPushConstantsDataTypeMemorySize = memorySize; + this->mPushConstantsSize = size; + } /** * Gets the current workgroup from the algorithm. @@ -131,13 +217,23 @@ class Algorithm * * @returns The kp::Constants currently set for specialization constants */ - const Constants& getSpecializationConstants(); + template + const std::vector getSpecializationConstants() + { + return { (T*)this->mSpecializationConstantsData, + ((T*)this->mSpecializationConstantsData) + this->mSpecializationConstantsSize }; + } /** * Gets the specialization constants of the current algorithm. * * @returns The kp::Constants currently set for push constants */ - const Constants& getPush(); + template + const std::vector getPushConstants() + { + return { (T*)this->mPushConstantsData, + ((T*)this->mPushConstantsData) + this->mPushConstantsSize }; + } /** * Gets the current tensors that are used in the algorithm. * @@ -170,8 +266,12 @@ class Algorithm // -------------- ALWAYS OWNED RESOURCES std::vector mSpirv; - Constants mSpecializationConstants; - Constants mPushConstants; + void* mSpecializationConstantsData = nullptr; + uint32_t mSpecializationConstantsDataTypeMemorySize = 0; + uint32_t mSpecializationConstantsSize = 0; + void* mPushConstantsData = nullptr; + uint32_t mPushConstantsDataTypeMemorySize = 0; + uint32_t mPushConstantsSize = 0; Workgroup mWorkgroup; // Create util functions diff --git a/src/include/kompute/operations/OpMult.hpp b/src/include/kompute/operations/OpMult.hpp index 97b29cad9..2d6b88057 100644 --- a/src/include/kompute/operations/OpMult.hpp +++ b/src/include/kompute/operations/OpMult.hpp @@ -45,7 +45,7 @@ class OpMult : public OpAlgoDispatch (uint32_t*)(shader_data::shaders_glsl_opmult_comp_spv + kp::shader_data::shaders_glsl_opmult_comp_spv_len)); - algorithm->rebuild(tensors, spirv); + algorithm->rebuild<>(tensors, spirv); } /** From 2e1275e085ad4ef2b61d3b4b77a199724e0f6d6b Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 12 Sep 2021 09:42:07 +0100 Subject: [PATCH 02/19] Cleanup of logging Signed-off-by: Alejandro Saucedo --- src/Algorithm.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp index 69ab5f7ad..a59f34f75 100644 --- a/src/Algorithm.cpp +++ b/src/Algorithm.cpp @@ -340,19 +340,12 @@ Algorithm::recordBindPush(const vk::CommandBuffer& commandBuffer) if (this->mPushConstantsSize) { KP_LOG_DEBUG("Kompute Algorithm binding push constants size: {}", this->mPushConstantsSize); - KP_LOG_DEBUG("{} {}", - this->mPushConstantsDataTypeMemorySize, - this->mPushConstantsData == nullptr); - KP_LOG_DEBUG("{}", - ((float*)this->mPushConstantsData)[0]); commandBuffer.pushConstants(*this->mPipelineLayout, vk::ShaderStageFlagBits::eCompute, 0, this->mPushConstantsSize * this->mPushConstantsDataTypeMemorySize, this->mPushConstantsData); - KP_LOG_DEBUG("Constants bound: {}", - this->mPushConstantsSize); } } From c23573eb47e4f9684baf9f3da4654a62c431c62e Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 12 Sep 2021 09:51:36 +0100 Subject: [PATCH 03/19] Added template function for algorithm on manager Signed-off-by: Alejandro Saucedo --- single_include/kompute/Kompute.hpp | 39 ++++++++++++++++++++++++++---- src/Manager.cpp | 26 -------------------- src/include/kompute/Manager.hpp | 39 ++++++++++++++++++++++++++---- 3 files changed, 68 insertions(+), 36 deletions(-) diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index a68cff1e6..97385e4fc 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -2181,6 +2181,16 @@ class Manager return tensor; } + std::shared_ptr algorithm( + const std::vector>& tensors = {}, + const std::vector& spirv = {}, + const Workgroup& workgroup = {}, + const std::vector& specializationConstants = {}, + const std::vector& pushConstants = {}) + { + return this->algorithm<>(tensors, spirv, workgroup, specializationConstants, pushConstants); + } + /** * Create a managed algorithm that will be destroyed by this manager * if it hasn't been destroyed by its reference count going to zero. @@ -2195,12 +2205,31 @@ class Manager * and defaults to an empty constant * @returns Shared pointer with initialised algorithm */ + template std::shared_ptr algorithm( - const std::vector>& tensors = {}, - const std::vector& spirv = {}, - const Workgroup& workgroup = {}, - const Constants& specializationConstants = {}, - const Constants& pushConstants = {}); + const std::vector>& tensors, + const std::vector& spirv, + const Workgroup& workgroup, + const std::vector& specializationConstants, + const std::vector

& pushConstants) + { + + KP_LOG_DEBUG("Kompute Manager algorithm creation triggered"); + + std::shared_ptr algorithm{ new kp::Algorithm( + this->mDevice, + tensors, + spirv, + workgroup, + specializationConstants, + pushConstants) }; + + if (this->mManageResources) { + this->mManagedAlgorithms.push_back(algorithm); + } + + return algorithm; + } /** * Destroy the GPU resources and all managed resources by manager. diff --git a/src/Manager.cpp b/src/Manager.cpp index 8e8367c30..a553d667f 100644 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -422,32 +422,6 @@ Manager::createDevice(const std::vector& familyQueueIndices, KP_LOG_DEBUG("Kompute Manager compute queue obtained"); } -// TODO: Update to template -std::shared_ptr -Manager::algorithm(const std::vector>& tensors, - const std::vector& spirv, - const Workgroup& workgroup, - const Constants& specializationConstants, - const Constants& pushConstants) -{ - - KP_LOG_DEBUG("Kompute Manager algorithm creation triggered"); - - std::shared_ptr algorithm{ new kp::Algorithm( - this->mDevice, - tensors, - spirv, - workgroup, - specializationConstants, - pushConstants) }; - - if (this->mManageResources) { - this->mManagedAlgorithms.push_back(algorithm); - } - - return algorithm; -} - std::shared_ptr Manager::sequence(uint32_t queueIndex, uint32_t totalTimestamps) { diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index 62f98d6d5..8a4244a2b 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -123,6 +123,16 @@ class Manager return tensor; } + std::shared_ptr algorithm( + const std::vector>& tensors = {}, + const std::vector& spirv = {}, + const Workgroup& workgroup = {}, + const std::vector& specializationConstants = {}, + const std::vector& pushConstants = {}) + { + return this->algorithm<>(tensors, spirv, workgroup, specializationConstants, pushConstants); + } + /** * Create a managed algorithm that will be destroyed by this manager * if it hasn't been destroyed by its reference count going to zero. @@ -137,12 +147,31 @@ class Manager * and defaults to an empty constant * @returns Shared pointer with initialised algorithm */ + template std::shared_ptr algorithm( - const std::vector>& tensors = {}, - const std::vector& spirv = {}, - const Workgroup& workgroup = {}, - const Constants& specializationConstants = {}, - const Constants& pushConstants = {}); + const std::vector>& tensors, + const std::vector& spirv, + const Workgroup& workgroup, + const std::vector& specializationConstants, + const std::vector

& pushConstants) + { + + KP_LOG_DEBUG("Kompute Manager algorithm creation triggered"); + + std::shared_ptr algorithm{ new kp::Algorithm( + this->mDevice, + tensors, + spirv, + workgroup, + specializationConstants, + pushConstants) }; + + if (this->mManageResources) { + this->mManagedAlgorithms.push_back(algorithm); + } + + return algorithm; + } /** * Destroy the GPU resources and all managed resources by manager. From 858a70d9b8bf387070fd6c94a3553d573e6fdd8e Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 12 Sep 2021 11:31:32 +0100 Subject: [PATCH 04/19] Added tests for push constants of all and mixed types Signed-off-by: Alejandro Saucedo --- Makefile | 8 +- single_include/kompute/Kompute.hpp | 42 +++- src/Algorithm.cpp | 4 +- src/OpAlgoDispatch.cpp | 20 +- src/include/kompute/Algorithm.hpp | 20 +- .../kompute/operations/OpAlgoDispatch.hpp | 22 +- test/TestPushConstant.cpp | 223 ++++++++++++++++++ 7 files changed, 304 insertions(+), 35 deletions(-) diff --git a/Makefile b/Makefile index b6ff3ea58..0f23f75ec 100644 --- a/Makefile +++ b/Makefile @@ -12,8 +12,8 @@ VERSION := $(shell cat ./VERSION) VCPKG_WIN_PATH ?= "C:\\Users\\axsau\\Programming\\lib\\vcpkg\\scripts\\buildsystems\\vcpkg.cmake" VCPKG_UNIX_PATH ?= "/c/Users/axsau/Programming/lib/vcpkg/scripts/buildsystems/vcpkg.cmake" -# Regext to pass to catch2 to filter tests -FILTER_TESTS ?= "-TestAsyncOperations.TestManagerParallelExecution:TestSequence.SequenceTimestamps" +# These are the tests that don't work with swiftshader but can be run directly with vulkan +FILTER_TESTS ?= "-TestAsyncOperations.TestManagerParallelExecution:TestSequence.SequenceTimestamps:TestPushConstants:TestConstantsDouble" ifeq ($(OS),Windows_NT) # is Windows_NT on XP, 2000, 7, Vista, 10... CMAKE_BIN ?= "C:\Program Files\CMake\bin\cmake.exe" @@ -105,7 +105,7 @@ mk_run_tests_cpu: mk_build_swiftshader_library mk_build_tests mk_run_tests_cpu_o VS_BUILD_TYPE ?= "Debug" # Run with multiprocessin / parallel build by default VS_CMAKE_EXTRA_FLAGS ?= "" -VS_KOMPUTE_EXTRA_CXX_FLAGS ?= "/MP" # /MP is for faster multiprocessing builds. You should add "/MT" for submodule builds for compatibility with gtest +VS_KOMPUTE_EXTRA_CXX_FLAGS ?= "/MT" # /MP is for faster multiprocessing builds. You should add "/MT" for submodule builds for compatibility with gtest VS_INSTALL_PATH ?= "build/src/CMakeFiles/Export/" # Set to "" if prefer default vs_cmake: @@ -116,7 +116,7 @@ vs_cmake: -DKOMPUTE_EXTRA_CXX_FLAGS=$(VS_KOMPUTE_EXTRA_CXX_FLAGS) \ -DCMAKE_INSTALL_PREFIX=$(VS_INSTALL_PATH) \ -DKOMPUTE_OPT_INSTALL=1 \ - -DKOMPUTE_OPT_REPO_SUBMODULE_BUILD=0 \ + -DKOMPUTE_OPT_REPO_SUBMODULE_BUILD=1 \ -DKOMPUTE_OPT_BUILD_TESTS=1 \ -DKOMPUTE_OPT_BUILD_SHADERS=1 \ -DKOMPUTE_OPT_BUILD_SINGLE_HEADER=1 \ diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index 97385e4fc..8cbfd404f 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -1246,23 +1246,29 @@ class Algorithm template void setPushConstants(const std::vector& pushConstants) { + uint32_t memorySize = sizeof(decltype(pushConstants.back())); + uint32_t size = pushConstants.size(); + uint32_t totalSize = memorySize * size; + uint32_t previousTotalSize = this->mPushConstantsDataTypeMemorySize * this->mPushConstantsSize; - if (pushConstants.size() != this->mPushConstantsSize) { + if (totalSize != previousTotalSize) { throw std::runtime_error( fmt::format("Kompute Algorithm push " - "constant provided is size {} but expected size {}", - pushConstants.size(), - this->mPushConstantsSize)); + "constant total memory size provided is {} but expected {} bytes", + totalSize, + previousTotalSize)); } if (this->mPushConstantsData) { free(this->mPushConstantsData); } - uint32_t memorySize = sizeof(decltype(pushConstants.back())); - uint32_t size = pushConstants.size(); + this->setPushConstants(pushConstants.data(), size, memorySize); + } + + void setPushConstants(void* data, uint32_t size, uint32_t memorySize) { uint32_t totalSize = size * memorySize; this->mPushConstantsData = malloc(totalSize); - memcpy(this->mPushConstantsData, pushConstants.data(), totalSize); + memcpy(this->mPushConstantsData, data, totalSize); this->mPushConstantsDataTypeMemorySize = memorySize; this->mPushConstantsSize = size; } @@ -1675,8 +1681,24 @@ class OpAlgoDispatch : public OpBase * @param algorithm The algorithm object to use for dispatch * @param pushConstants The push constants to use for override */ + template OpAlgoDispatch(const std::shared_ptr& algorithm, - const kp::Constants& pushConstants = {}); + const std::vector& pushConstants = {}) + { + KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor"); + + this->mAlgorithm = algorithm; + + if (pushConstants.size()) { + uint32_t memorySize = sizeof(decltype(pushConstants.back())); + uint32_t size = pushConstants.size(); + uint32_t totalSize = size * memorySize; + this->mPushConstantsData = malloc(totalSize); + memcpy(this->mPushConstantsData, pushConstants.data(), totalSize); + this->mPushConstantsDataTypeMemorySize = memorySize; + this->mPushConstantsSize = size; + } + } /** * Default destructor, which is in charge of destroying the algorithm @@ -1713,7 +1735,9 @@ class OpAlgoDispatch : public OpBase private: // -------------- ALWAYS OWNED RESOURCES std::shared_ptr mAlgorithm; - Constants mPushConstants; + void* mPushConstantsData = nullptr; + uint32_t mPushConstantsDataTypeMemorySize = 0; + uint32_t mPushConstantsSize = 0; }; } // End namespace kp diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp index a59f34f75..9179cffbd 100644 --- a/src/Algorithm.cpp +++ b/src/Algorithm.cpp @@ -338,8 +338,8 @@ void Algorithm::recordBindPush(const vk::CommandBuffer& commandBuffer) { if (this->mPushConstantsSize) { - KP_LOG_DEBUG("Kompute Algorithm binding push constants size: {}", - this->mPushConstantsSize); + KP_LOG_DEBUG("Kompute Algorithm binding push constants memory size: {}", + this->mPushConstantsSize * this->mPushConstantsDataTypeMemorySize); commandBuffer.pushConstants(*this->mPipelineLayout, vk::ShaderStageFlagBits::eCompute, diff --git a/src/OpAlgoDispatch.cpp b/src/OpAlgoDispatch.cpp index 15bfc05c9..c6099ff85 100644 --- a/src/OpAlgoDispatch.cpp +++ b/src/OpAlgoDispatch.cpp @@ -5,18 +5,13 @@ namespace kp { -OpAlgoDispatch::OpAlgoDispatch(const std::shared_ptr& algorithm, - const kp::Constants& pushConstants) -{ - KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor"); - - this->mAlgorithm = algorithm; - this->mPushConstants = pushConstants; -} - OpAlgoDispatch::~OpAlgoDispatch() { KP_LOG_DEBUG("Kompute OpAlgoDispatch destructor started"); + + if (this->mPushConstantsData) { + free(this->mPushConstantsData); + } } void @@ -35,8 +30,11 @@ OpAlgoDispatch::record(const vk::CommandBuffer& commandBuffer) vk::PipelineStageFlagBits::eComputeShader); } - if (this->mPushConstants.size()) { - this->mAlgorithm->setPushConstants(this->mPushConstants); + if (this->mPushConstantsSize) { + this->mAlgorithm->setPushConstants( + this->mPushConstantsData, + this->mPushConstantsSize, + this->mPushConstantsDataTypeMemorySize); } this->mAlgorithm->recordBindCore(commandBuffer); diff --git a/src/include/kompute/Algorithm.hpp b/src/include/kompute/Algorithm.hpp index 6bc49cef6..a0b2ba146 100644 --- a/src/include/kompute/Algorithm.hpp +++ b/src/include/kompute/Algorithm.hpp @@ -183,23 +183,29 @@ class Algorithm template void setPushConstants(const std::vector& pushConstants) { + uint32_t memorySize = sizeof(decltype(pushConstants.back())); + uint32_t size = pushConstants.size(); + uint32_t totalSize = memorySize * size; + uint32_t previousTotalSize = this->mPushConstantsDataTypeMemorySize * this->mPushConstantsSize; - if (pushConstants.size() != this->mPushConstantsSize) { + if (totalSize != previousTotalSize) { throw std::runtime_error( fmt::format("Kompute Algorithm push " - "constant provided is size {} but expected size {}", - pushConstants.size(), - this->mPushConstantsSize)); + "constant total memory size provided is {} but expected {} bytes", + totalSize, + previousTotalSize)); } if (this->mPushConstantsData) { free(this->mPushConstantsData); } - uint32_t memorySize = sizeof(decltype(pushConstants.back())); - uint32_t size = pushConstants.size(); + this->setPushConstants(pushConstants.data(), size, memorySize); + } + + void setPushConstants(void* data, uint32_t size, uint32_t memorySize) { uint32_t totalSize = size * memorySize; this->mPushConstantsData = malloc(totalSize); - memcpy(this->mPushConstantsData, pushConstants.data(), totalSize); + memcpy(this->mPushConstantsData, data, totalSize); this->mPushConstantsDataTypeMemorySize = memorySize; this->mPushConstantsSize = size; } diff --git a/src/include/kompute/operations/OpAlgoDispatch.hpp b/src/include/kompute/operations/OpAlgoDispatch.hpp index 600b6116c..48acd6014 100644 --- a/src/include/kompute/operations/OpAlgoDispatch.hpp +++ b/src/include/kompute/operations/OpAlgoDispatch.hpp @@ -25,8 +25,24 @@ class OpAlgoDispatch : public OpBase * @param algorithm The algorithm object to use for dispatch * @param pushConstants The push constants to use for override */ + template OpAlgoDispatch(const std::shared_ptr& algorithm, - const kp::Constants& pushConstants = {}); + const std::vector& pushConstants = {}) + { + KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor"); + + this->mAlgorithm = algorithm; + + if (pushConstants.size()) { + uint32_t memorySize = sizeof(decltype(pushConstants.back())); + uint32_t size = pushConstants.size(); + uint32_t totalSize = size * memorySize; + this->mPushConstantsData = malloc(totalSize); + memcpy(this->mPushConstantsData, pushConstants.data(), totalSize); + this->mPushConstantsDataTypeMemorySize = memorySize; + this->mPushConstantsSize = size; + } + } /** * Default destructor, which is in charge of destroying the algorithm @@ -63,7 +79,9 @@ class OpAlgoDispatch : public OpBase private: // -------------- ALWAYS OWNED RESOURCES std::shared_ptr mAlgorithm; - Constants mPushConstants; + void* mPushConstantsData = nullptr; + uint32_t mPushConstantsDataTypeMemorySize = 0; + uint32_t mPushConstantsSize = 0; }; } // End namespace kp diff --git a/test/TestPushConstant.cpp b/test/TestPushConstant.cpp index 83b3d3d83..6d32fccaf 100644 --- a/test/TestPushConstant.cpp +++ b/test/TestPushConstant.cpp @@ -137,3 +137,226 @@ TEST(TestPushConstants, TestConstantsWrongSize) } } } + +// TODO: Ensure different types are considered for push constants +// TEST(TestPushConstants, TestConstantsWrongType) +// { +// { +// std::string shader(R"( +// #version 450 +// layout(push_constant) uniform PushConstants { +// float x; +// float y; +// float z; +// } pcs; +// layout (local_size_x = 1) in; +// layout(set = 0, binding = 0) buffer a { float pa[]; }; +// void main() { +// pa[0] += pcs.x; +// pa[1] += pcs.y; +// pa[2] += pcs.z; +// })"); +// +// std::vector spirv = compileSource(shader); +// +// std::shared_ptr sq = nullptr; +// +// { +// kp::Manager mgr; +// +// std::shared_ptr> tensor = +// mgr.tensor({ 0, 0, 0 }); +// +// std::shared_ptr algo = mgr.algorithm( +// { tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.0 }); +// +// sq = mgr.sequence()->record({ tensor }); +// +// EXPECT_THROW(sq->record( +// algo, std::vector{ 1, 2, 3 }), +// std::runtime_error); +// } +// } +// } + +TEST(TestPushConstants, TestConstantsMixedTypes) +{ + { + std::string shader(R"( + #version 450 + layout(push_constant) uniform PushConstants { + float x; + uint y; + int z; + } pcs; + layout (local_size_x = 1) in; + layout(set = 0, binding = 0) buffer a { float pa[]; }; + void main() { + pa[0] += pcs.x; + pa[1] += pcs.y - 2147483000; + pa[2] += pcs.z; + })"); + + struct Params{float x; uint32_t y; int32_t z;}; + + std::vector spirv = compileSource(shader); + + std::shared_ptr sq = nullptr; + + { + kp::Manager mgr; + + std::shared_ptr> tensor = + mgr.tensorT({ 0, 0, 0 }); + + std::shared_ptr algo = mgr.algorithm( + { tensor }, spirv, kp::Workgroup({ 1 }), {}, {{ 0, 0, 0 }}); + + sq = mgr.sequence()->eval({ tensor }); + + // We need to run this in sequence to avoid race condition + // We can't use atomicAdd as swiftshader doesn't support it for + // float + sq->eval(algo, std::vector{{ 15.32, 2147483650, 10 }}); + sq->eval(algo, std::vector{{ 30.32, 2147483650, -3 }}); + sq->eval({ tensor }); + + EXPECT_EQ(tensor->vector(), std::vector({ 45.64, 1300, 7 })); + } + } +} + +TEST(TestPushConstants, TestConstantsInt) +{ + { + std::string shader(R"( + #version 450 + layout(push_constant) uniform PushConstants { + int x; + int y; + int z; + } pcs; + layout (local_size_x = 1) in; + layout(set = 0, binding = 0) buffer a { int pa[]; }; + void main() { + pa[0] += pcs.x; + pa[1] += pcs.y; + pa[2] += pcs.z; + })"); + + std::vector spirv = compileSource(shader); + + std::shared_ptr sq = nullptr; + + { + kp::Manager mgr; + + std::shared_ptr> tensor = + mgr.tensorT({ -1, -1, -1 }); + + std::shared_ptr algo = mgr.algorithm( + { tensor }, spirv, kp::Workgroup({ 1 }), {}, {{ 0, 0, 0 }}); + + sq = mgr.sequence()->eval({ tensor }); + + // We need to run this in sequence to avoid race condition + // We can't use atomicAdd as swiftshader doesn't support it for + // float + sq->eval(algo, std::vector{{ -1, -1, -1 }}); + sq->eval(algo, std::vector{{ -1, -1, -1 }}); + sq->eval({ tensor }); + + EXPECT_EQ(tensor->vector(), std::vector({ -3, -3, -3 })); + } + } +} + +TEST(TestPushConstants, TestConstantsUnsignedInt) +{ + { + std::string shader(R"( + #version 450 + layout(push_constant) uniform PushConstants { + uint x; + uint y; + uint z; + } pcs; + layout (local_size_x = 1) in; + layout(set = 0, binding = 0) buffer a { uint pa[]; }; + void main() { + pa[0] += pcs.x; + pa[1] += pcs.y; + pa[2] += pcs.z; + })"); + + std::vector spirv = compileSource(shader); + + std::shared_ptr sq = nullptr; + + { + kp::Manager mgr; + + std::shared_ptr> tensor = + mgr.tensorT({ 0, 0, 0 }); + + std::shared_ptr algo = mgr.algorithm( + { tensor }, spirv, kp::Workgroup({ 1 }), {}, {{ 0, 0, 0 }}); + + sq = mgr.sequence()->eval({ tensor }); + + // We need to run this in sequence to avoid race condition + // We can't use atomicAdd as swiftshader doesn't support it for + // float + sq->eval(algo, std::vector{{ 2147483650, 2147483650, 2147483650 }}); + sq->eval(algo, std::vector{{ 5, 5, 5 }}); + sq->eval({ tensor }); + + EXPECT_EQ(tensor->vector(), std::vector({ 2147483655, 2147483655, 2147483655 })); + } + } +} + +TEST(TestPushConstants, TestConstantsDouble) +{ + { + std::string shader(R"( + #version 450 + layout(push_constant) uniform PushConstants { + double x; + double y; + double z; + } pcs; + layout (local_size_x = 1) in; + layout(set = 0, binding = 0) buffer a { double pa[]; }; + void main() { + pa[0] += pcs.x; + pa[1] += pcs.y; + pa[2] += pcs.z; + })"); + + std::vector spirv = compileSource(shader); + + std::shared_ptr sq = nullptr; + + { + kp::Manager mgr; + + std::shared_ptr> tensor = + mgr.tensorT({ 0, 0, 0 }); + + std::shared_ptr algo = mgr.algorithm( + { tensor }, spirv, kp::Workgroup({ 1 }), {}, {{ 0, 0, 0 }}); + + sq = mgr.sequence()->eval({ tensor }); + + // We need to run this in sequence to avoid race condition + // We can't use atomicAdd as swiftshader doesn't support it for + // float + sq->eval(algo, std::vector{{ 1.1111222233334444, 2.1111222233334444, 3.1111222233334444 }}); + sq->eval(algo, std::vector{{ 1.1111222233334444, 2.1111222233334444, 3.1111222233334444 }}); + sq->eval({ tensor }); + + EXPECT_EQ(tensor->vector(), std::vector({ 2.2222444466668888, 4.2222444466668888, 6.2222444466668888 })); + } + } +} From 76fc7cd1c4a67f1cff38714b1c5fc320d11cbbef Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 12 Sep 2021 11:32:19 +0100 Subject: [PATCH 05/19] Renamed Signed-off-by: Alejandro Saucedo --- test/TestPushConstant.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/TestPushConstant.cpp b/test/TestPushConstant.cpp index 6d32fccaf..23d24c010 100644 --- a/test/TestPushConstant.cpp +++ b/test/TestPushConstant.cpp @@ -197,7 +197,7 @@ TEST(TestPushConstants, TestConstantsMixedTypes) pa[2] += pcs.z; })"); - struct Params{float x; uint32_t y; int32_t z;}; + struct TestConsts{float x; uint32_t y; int32_t z;}; std::vector spirv = compileSource(shader); @@ -209,7 +209,7 @@ TEST(TestPushConstants, TestConstantsMixedTypes) std::shared_ptr> tensor = mgr.tensorT({ 0, 0, 0 }); - std::shared_ptr algo = mgr.algorithm( + std::shared_ptr algo = mgr.algorithm( { tensor }, spirv, kp::Workgroup({ 1 }), {}, {{ 0, 0, 0 }}); sq = mgr.sequence()->eval({ tensor }); @@ -217,8 +217,8 @@ TEST(TestPushConstants, TestConstantsMixedTypes) // We need to run this in sequence to avoid race condition // We can't use atomicAdd as swiftshader doesn't support it for // float - sq->eval(algo, std::vector{{ 15.32, 2147483650, 10 }}); - sq->eval(algo, std::vector{{ 30.32, 2147483650, -3 }}); + sq->eval(algo, std::vector{{ 15.32, 2147483650, 10 }}); + sq->eval(algo, std::vector{{ 30.32, 2147483650, -3 }}); sq->eval({ tensor }); EXPECT_EQ(tensor->vector(), std::vector({ 45.64, 1300, 7 })); From a30b6c53dd9fd9c2f9741c074be143a20391f954 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 12 Sep 2021 11:33:57 +0100 Subject: [PATCH 06/19] Format Signed-off-by: Alejandro Saucedo --- test/TestPushConstant.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/TestPushConstant.cpp b/test/TestPushConstant.cpp index 23d24c010..c885f1d87 100644 --- a/test/TestPushConstant.cpp +++ b/test/TestPushConstant.cpp @@ -197,7 +197,11 @@ TEST(TestPushConstants, TestConstantsMixedTypes) pa[2] += pcs.z; })"); - struct TestConsts{float x; uint32_t y; int32_t z;}; + struct TestConsts{ + float x; + uint32_t y; + int32_t z; + }; std::vector spirv = compileSource(shader); From ac0f30191b3cd7082b64092905c8241de5ca1336 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 12 Sep 2021 11:50:09 +0100 Subject: [PATCH 07/19] Updated push consts Signed-off-by: Alejandro Saucedo --- test/TestMultipleAlgoExecutions.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp index 7f63c208f..1fe6a6664 100644 --- a/test/TestMultipleAlgoExecutions.cpp +++ b/test/TestMultipleAlgoExecutions.cpp @@ -220,7 +220,7 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval) EXPECT_EQ(tensorA->vector(), std::vector({ 3, 3, 3 })); } -TEST(TestAlgoUtils, TestAlgorithmUtilFunctions) +TEST(TestMultipleAlgoExecutions, TestAlgorithmUtilFunctions) { kp::Manager mgr; @@ -273,6 +273,6 @@ TEST(TestAlgoUtils, TestAlgorithmUtilFunctions) pushConsts); EXPECT_EQ(algorithm->getWorkgroup(), workgroup); - EXPECT_EQ(algorithm->getPush(), pushConsts); - EXPECT_EQ(algorithm->getSpecializationConstants(), specConsts); + EXPECT_EQ(algorithm->getPushConstants(), pushConsts); + EXPECT_EQ(algorithm->getSpecializationConstants(), specConsts); } From 5ed26913011bcc665d75cf33b62168231e076033 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 12 Sep 2021 12:11:33 +0100 Subject: [PATCH 08/19] Updated push const to validate same size params Signed-off-by: Alejandro Saucedo --- single_include/kompute/Kompute.hpp | 11 ++++++----- src/include/kompute/Algorithm.hpp | 11 ++++++----- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index 8cbfd404f..67efbe708 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -1248,6 +1248,12 @@ class Algorithm { uint32_t memorySize = sizeof(decltype(pushConstants.back())); uint32_t size = pushConstants.size(); + + this->setPushConstants(pushConstants.data(), size, memorySize); + } + + void setPushConstants(void* data, uint32_t size, uint32_t memorySize) { + uint32_t totalSize = memorySize * size; uint32_t previousTotalSize = this->mPushConstantsDataTypeMemorySize * this->mPushConstantsSize; @@ -1262,11 +1268,6 @@ class Algorithm free(this->mPushConstantsData); } - this->setPushConstants(pushConstants.data(), size, memorySize); - } - - void setPushConstants(void* data, uint32_t size, uint32_t memorySize) { - uint32_t totalSize = size * memorySize; this->mPushConstantsData = malloc(totalSize); memcpy(this->mPushConstantsData, data, totalSize); this->mPushConstantsDataTypeMemorySize = memorySize; diff --git a/src/include/kompute/Algorithm.hpp b/src/include/kompute/Algorithm.hpp index a0b2ba146..9dd70b1b5 100644 --- a/src/include/kompute/Algorithm.hpp +++ b/src/include/kompute/Algorithm.hpp @@ -185,6 +185,12 @@ class Algorithm { uint32_t memorySize = sizeof(decltype(pushConstants.back())); uint32_t size = pushConstants.size(); + + this->setPushConstants(pushConstants.data(), size, memorySize); + } + + void setPushConstants(void* data, uint32_t size, uint32_t memorySize) { + uint32_t totalSize = memorySize * size; uint32_t previousTotalSize = this->mPushConstantsDataTypeMemorySize * this->mPushConstantsSize; @@ -199,11 +205,6 @@ class Algorithm free(this->mPushConstantsData); } - this->setPushConstants(pushConstants.data(), size, memorySize); - } - - void setPushConstants(void* data, uint32_t size, uint32_t memorySize) { - uint32_t totalSize = size * memorySize; this->mPushConstantsData = malloc(totalSize); memcpy(this->mPushConstantsData, data, totalSize); this->mPushConstantsDataTypeMemorySize = memorySize; From 5193975e5c299d397af4dbe3f60da8f912697c73 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 12 Sep 2021 12:22:42 +0100 Subject: [PATCH 09/19] Updated push const to validate same size params Signed-off-by: Alejandro Saucedo --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0f23f75ec..64ae11155 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ VCPKG_WIN_PATH ?= "C:\\Users\\axsau\\Programming\\lib\\vcpkg\\scripts\\buildsyst VCPKG_UNIX_PATH ?= "/c/Users/axsau/Programming/lib/vcpkg/scripts/buildsystems/vcpkg.cmake" # These are the tests that don't work with swiftshader but can be run directly with vulkan -FILTER_TESTS ?= "-TestAsyncOperations.TestManagerParallelExecution:TestSequence.SequenceTimestamps:TestPushConstants:TestConstantsDouble" +FILTER_TESTS ?= "-TestAsyncOperations.TestManagerParallelExecution:TestSequence.SequenceTimestamps:TestPushConstants.TestConstantsDouble" ifeq ($(OS),Windows_NT) # is Windows_NT on XP, 2000, 7, Vista, 10... CMAKE_BIN ?= "C:\Program Files\CMake\bin\cmake.exe" From 6113d286a9177b41e012f3a291651ff06a0f2607 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 12 Sep 2021 13:13:20 +0100 Subject: [PATCH 10/19] Updated python to build Signed-off-by: Alejandro Saucedo --- python/src/main.cpp | 1 - test/TestSpecializationConstant.cpp | 48 +++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/python/src/main.cpp b/python/src/main.cpp index 846576adb..43c369555 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -61,7 +61,6 @@ PYBIND11_MODULE(kp, m) { py::class_>(m, "Algorithm", DOC(kp, Algorithm, Algorithm)) .def("get_tensors", &kp::Algorithm::getTensors, DOC(kp, Algorithm, getTensors)) .def("destroy", &kp::Algorithm::destroy, DOC(kp, Algorithm, destroy)) - .def("get_spec_consts", &kp::Algorithm::getSpecializationConstants, DOC(kp, Algorithm, getSpecializationConstants)) .def("is_init", &kp::Algorithm::isInit, DOC(kp, Algorithm, isInit)); py::class_>(m, "Tensor", DOC(kp, Tensor)) diff --git a/test/TestSpecializationConstant.cpp b/test/TestSpecializationConstant.cpp index 15da143a0..f57c221ab 100644 --- a/test/TestSpecializationConstant.cpp +++ b/test/TestSpecializationConstant.cpp @@ -53,3 +53,51 @@ TEST(TestSpecializationConstants, TestTwoConstants) } } } + +TEST(TestSpecializationConstants, TestConstantsInt) +{ + { + std::string shader(R"( + #version 450 + layout (constant_id = 0) const float cOne = 1; + layout (constant_id = 1) const float cTwo = 1; + layout (local_size_x = 1) in; + layout(set = 0, binding = 0) buffer a { float pa[]; }; + layout(set = 0, binding = 1) buffer b { float pb[]; }; + void main() { + uint index = gl_GlobalInvocationID.x; + pa[index] = cOne; + pb[index] = cTwo; + })"); + + std::vector spirv = compileSource(shader); + + std::shared_ptr sq = nullptr; + + { + kp::Manager mgr; + + std::shared_ptr> tensorA = + mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorB = + mgr.tensor({ 0, 0, 0 }); + + std::vector> params = { tensorA, + tensorB }; + + kp::Constants spec = kp::Constants({ 5.0, 0.3 }); + + std::shared_ptr algo = + mgr.algorithm(params, spirv, {}, spec); + + sq = mgr.sequence() + ->record(params) + ->record(algo) + ->record(params) + ->eval(); + + EXPECT_EQ(tensorA->vector(), std::vector({ 5, 5, 5 })); + EXPECT_EQ(tensorB->vector(), std::vector({ 0.3, 0.3, 0.3 })); + } + } +} From 1972f2c8f8a10d5656574e0d9f8123edfd3b50ea Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 12 Sep 2021 13:57:54 +0100 Subject: [PATCH 11/19] Updated python build Signed-off-by: Alejandro Saucedo --- python/src/main.cpp | 100 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 96 insertions(+), 4 deletions(-) diff --git a/python/src/main.cpp b/python/src/main.cpp index 43c369555..b0ef31191 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -178,8 +178,8 @@ PYBIND11_MODULE(kp, m) { const std::vector>& tensors, const py::bytes& spirv, const kp::Workgroup& workgroup, - const kp::Constants& spec_consts, - const kp::Constants& push_consts) { + const std::vector& spec_consts, + const std::vector& push_consts) { py::buffer_info info(py::buffer(spirv).request()); const char *data = reinterpret_cast(info.ptr); size_t length = static_cast(info.size); @@ -190,8 +190,100 @@ PYBIND11_MODULE(kp, m) { py::arg("tensors"), py::arg("spirv"), py::arg("workgroup") = kp::Workgroup(), - py::arg("spec_consts") = kp::Constants(), - py::arg("push_consts") = kp::Constants()) + py::arg("spec_consts") = std::vector(), + py::arg("push_consts") = std::vector()) + .def("algorithm_t", [np](kp::Manager& self, + const std::vector>& tensors, + const py::bytes& spirv, + const kp::Workgroup& workgroup, + const py::array& spec_consts, + const py::array& push_consts) { + + py::buffer_info info(py::buffer(spirv).request()); + const char *data = reinterpret_cast(info.ptr); + size_t length = static_cast(info.size); + std::vector spirvVec((uint32_t*)data, (uint32_t*)(data + length)); + + const py::buffer_info pushInfo = push_consts.request(); + const py::buffer_info specInfo = spec_consts.request(); + + KP_LOG_DEBUG("Kompute Python Manager creating Algorithm_T with " + "push consts data size {} dtype {} and spec const data size {} dtype {}", + push_consts.size(), std::string(py::str(push_consts.dtype())), + spec_consts.size(), std::string(py::str(spec_consts.dtype()))); + + // We have to iterate across a combination of parameters due to the lack of support for templating + if (spec_consts.dtype() == py::dtype::of()) { + std::vector specConstsVec((float*)specInfo.ptr, ((float*)specInfo.ptr) + specInfo.size); + if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushConstsVec((float*)pushInfo.ptr, ((float*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specConstsVec, pushConstsVec); + } else if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushConstsVec((int32_t*)pushInfo.ptr, ((int32_t*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specConstsVec, pushConstsVec); + } else if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushConstsVec((uint32_t*)pushInfo.ptr, ((uint32_t*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specConstsVec, pushConstsVec); + } else if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushConstsVec((double*)pushInfo.ptr, ((double*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specConstsVec, pushConstsVec); + } + } else if (spec_consts.dtype() == py::dtype::of()) { + std::vector specconstsvec((int32_t*)specInfo.ptr, ((int32_t*)specInfo.ptr) + specInfo.size); + if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushconstsvec((float*)pushInfo.ptr, ((float*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec); + } else if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushconstsvec((int32_t*)pushInfo.ptr, ((int32_t*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec); + } else if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushconstsvec((uint32_t*)pushInfo.ptr, ((uint32_t*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec); + } else if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushconstsvec((double*)pushInfo.ptr, ((double*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec); + } + } else if (spec_consts.dtype() == py::dtype::of()) { + std::vector specconstsvec((uint32_t*)specInfo.ptr, ((uint32_t*)specInfo.ptr) + specInfo.size); + if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushconstsvec((float*)pushInfo.ptr, ((float*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec); + } else if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushconstsvec((int32_t*)pushInfo.ptr, ((int32_t*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec); + } else if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushconstsvec((uint32_t*)pushInfo.ptr, ((uint32_t*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec); + } else if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushconstsvec((double*)pushInfo.ptr, ((double*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec); + } + } else if (spec_consts.dtype() == py::dtype::of()) { + std::vector specconstsvec((double*)specInfo.ptr, ((double*)specInfo.ptr) + specInfo.size); + if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushconstsvec((float*)pushInfo.ptr, ((float*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec); + } else if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushconstsvec((int32_t*)pushInfo.ptr, ((int32_t*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec); + } else if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushconstsvec((uint32_t*)pushInfo.ptr, ((uint32_t*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec); + } else if (spec_consts.dtype() == py::dtype::of()) { + std::vector pushconstsvec((double*)pushInfo.ptr, ((double*)pushInfo.ptr) + pushInfo.size); + return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec); + } + } else { + // If reach then no valid dtype supported + throw std::runtime_error("Kompute Python no valid dtype supported"); + } + }, + DOC(kp, Manager, algorithm), + py::arg("tensors"), + py::arg("spirv"), + py::arg("workgroup") = kp::Workgroup(), + py::arg("spec_consts") = std::vector(), + py::arg("push_consts") = std::vector()) .def("list_devices", [](kp::Manager& self){ const std::vector devices = self.listDevices(); py::list list; From 3d320ff687a29b0165a431287958c06a9bc8a641 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 12 Sep 2021 14:07:20 +0100 Subject: [PATCH 12/19] Updating python Signed-off-by: Alejandro Saucedo --- single_include/kompute/Kompute.hpp | 520 ----------------------------- src/OpAlgoDispatch.cpp | 1 + 2 files changed, 1 insertion(+), 520 deletions(-) diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index 67efbe708..b202ab580 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -1793,523 +1793,3 @@ class OpMult : public OpAlgoDispatch }; } // End namespace kp - -// SPDX-License-Identifier: Apache-2.0 - -namespace kp { - -/** - * Container of operations that can be sent to GPU as batch - */ -class Sequence : public std::enable_shared_from_this -{ - public: - /** - * Main constructor for sequence which requires core vulkan components to - * generate all dependent resources. - * - * @param physicalDevice Vulkan physical device - * @param device Vulkan logical device - * @param computeQueue Vulkan compute queue - * @param queueIndex Vulkan compute queue index in device - * @param totalTimestamps Maximum number of timestamps to allocate - */ - Sequence(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr computeQueue, - uint32_t queueIndex, - uint32_t totalTimestamps = 0); - /** - * Destructor for sequence which is responsible for cleaning all subsequent - * owned operations. - */ - ~Sequence(); - - /** - * Record function for operation to be added to the GPU queue in batch. This - * template requires classes to be derived from the OpBase class. This - * function also requires the Sequence to be recording, otherwise it will - * not be able to add the operation. - * - * @param op Object derived from kp::BaseOp that will be recoreded by the - * sequence which will be used when the operation is evaluated. - * @return shared_ptr of the Sequence class itself - */ - std::shared_ptr record(std::shared_ptr op); - - /** - * Record function for operation to be added to the GPU queue in batch. This - * template requires classes to be derived from the OpBase class. This - * function also requires the Sequence to be recording, otherwise it will - * not be able to add the operation. - * - * @param tensors Vector of tensors to use for the operation - * @param TArgs Template parameters that are used to initialise operation - * which allows for extensible configurations on initialisation. - * @return shared_ptr of the Sequence class itself - */ - template - std::shared_ptr record( - std::vector> tensors, - TArgs&&... params) - { - std::shared_ptr op{ new T(tensors, std::forward(params)...) }; - return this->record(op); - } - /** - * Record function for operation to be added to the GPU queue in batch. This - * template requires classes to be derived from the OpBase class. This - * function also requires the Sequence to be recording, otherwise it will - * not be able to add the operation. - * - * @param algorithm Algorithm to use for the record often used for OpAlgo - * operations - * @param TArgs Template parameters that are used to initialise operation - * which allows for extensible configurations on initialisation. - * @return shared_ptr of the Sequence class itself - */ - template - std::shared_ptr record(std::shared_ptr algorithm, - TArgs&&... params) - { - std::shared_ptr op{ new T(algorithm, - std::forward(params)...) }; - return this->record(op); - } - - /** - * Eval sends all the recorded and stored operations in the vector of - * operations into the gpu as a submit job synchronously (with a barrier). - * - * @return shared_ptr of the Sequence class itself - */ - std::shared_ptr eval(); - - /** - * Resets all the recorded and stored operations, records the operation - * provided and submits into the gpu as a submit job synchronously (with a - * barrier). - * - * @return shared_ptr of the Sequence class itself - */ - std::shared_ptr eval(std::shared_ptr op); - - /** - * Eval sends all the recorded and stored operations in the vector of - * operations into the gpu as a submit job with a barrier. - * - * @param tensors Vector of tensors to use for the operation - * @param TArgs Template parameters that are used to initialise operation - * which allows for extensible configurations on initialisation. - * @return shared_ptr of the Sequence class itself - */ - template - std::shared_ptr eval(std::vector> tensors, - TArgs&&... params) - { - std::shared_ptr op{ new T(tensors, std::forward(params)...) }; - return this->eval(op); - } - /** - * Eval sends all the recorded and stored operations in the vector of - * operations into the gpu as a submit job with a barrier. - * - * @param algorithm Algorithm to use for the record often used for OpAlgo - * operations - * @param TArgs Template parameters that are used to initialise operation - * which allows for extensible configurations on initialisation. - * @return shared_ptr of the Sequence class itself - */ - template - std::shared_ptr eval(std::shared_ptr algorithm, - TArgs&&... params) - { - std::shared_ptr op{ new T(algorithm, - std::forward(params)...) }; - return this->eval(op); - } - - /** - * Eval Async sends all the recorded and stored operations in the vector of - * operations into the gpu as a submit job without a barrier. EvalAwait() - * must ALWAYS be called after to ensure the sequence is terminated - * correctly. - * - * @return Boolean stating whether execution was successful. - */ - std::shared_ptr evalAsync(); - /** - * Clears currnet operations to record provided one in the vector of - * operations into the gpu as a submit job without a barrier. EvalAwait() - * must ALWAYS be called after to ensure the sequence is terminated - * correctly. - * - * @return Boolean stating whether execution was successful. - */ - std::shared_ptr evalAsync(std::shared_ptr op); - /** - * Eval sends all the recorded and stored operations in the vector of - * operations into the gpu as a submit job with a barrier. - * - * @param tensors Vector of tensors to use for the operation - * @param TArgs Template parameters that are used to initialise operation - * which allows for extensible configurations on initialisation. - * @return shared_ptr of the Sequence class itself - */ - template - std::shared_ptr evalAsync( - std::vector> tensors, - TArgs&&... params) - { - std::shared_ptr op{ new T(tensors, std::forward(params)...) }; - return this->evalAsync(op); - } - /** - * Eval sends all the recorded and stored operations in the vector of - * operations into the gpu as a submit job with a barrier. - * - * @param algorithm Algorithm to use for the record often used for OpAlgo - * operations - * @param TArgs Template parameters that are used to initialise operation - * which allows for extensible configurations on initialisation. - * @return shared_ptr of the Sequence class itself - */ - template - std::shared_ptr evalAsync(std::shared_ptr algorithm, - TArgs&&... params) - { - std::shared_ptr op{ new T(algorithm, - std::forward(params)...) }; - return this->evalAsync(op); - } - - /** - * Eval Await waits for the fence to finish processing and then once it - * finishes, it runs the postEval of all operations. - * - * @param waitFor Number of milliseconds to wait before timing out. - * @return shared_ptr of the Sequence class itself - */ - std::shared_ptr evalAwait(uint64_t waitFor = UINT64_MAX); - - /** - * Clear function clears all operations currently recorded and starts - * recording again. - */ - void clear(); - - /** - * Return the timestamps that were latched at the beginning and - * after each operation during the last eval() call. - */ - std::vector getTimestamps(); - - /** - * Begins recording commands for commands to be submitted into the command - * buffer. - * - * @return Boolean stating whether execution was successful. - */ - void begin(); - - /** - * Ends the recording and stops recording commands when the record command - * is sent. - * - * @return Boolean stating whether execution was successful. - */ - void end(); - - /** - * Returns true if the sequence is currently in recording activated. - * - * @return Boolean stating if recording ongoing. - */ - bool isRecording(); - - /** - * Returns true if the sequence has been initialised, and it's based on the - * GPU resources being refrenced. - * - * @return Boolean stating if is initialized - */ - bool isInit(); - - /** - * Clears command buffer and triggers re-record of all the current - * operations saved, which is useful if the underlying kp::Tensors or - * kp::Algorithms are modified and need to be re-recorded. - */ - void rerecord(); - - /** - * Returns true if the sequence is currently running - mostly used for async - * workloads. - * - * @return Boolean stating if currently running. - */ - bool isRunning(); - - /** - * Destroys and frees the GPU resources which include the buffer and memory - * and sets the sequence as init=False. - */ - void destroy(); - - private: - // -------------- NEVER OWNED RESOURCES - std::shared_ptr mPhysicalDevice = nullptr; - std::shared_ptr mDevice = nullptr; - std::shared_ptr mComputeQueue = nullptr; - uint32_t mQueueIndex = -1; - - // -------------- OPTIONALLY OWNED RESOURCES - std::shared_ptr mCommandPool = nullptr; - bool mFreeCommandPool = false; - std::shared_ptr mCommandBuffer = nullptr; - bool mFreeCommandBuffer = false; - - // -------------- ALWAYS OWNED RESOURCES - vk::Fence mFence; - std::vector> mOperations; - std::shared_ptr timestampQueryPool = nullptr; - - // State - bool mRecording = false; - bool mIsRunning = false; - - // Create functions - void createCommandPool(); - void createCommandBuffer(); - void createTimestampQueryPool(uint32_t totalTimestamps); -}; - -} // End namespace kp - -// SPDX-License-Identifier: Apache-2.0 - -#include -#include - -#define KP_DEFAULT_SESSION "DEFAULT" - -namespace kp { - -/** - Base orchestrator which creates and manages device and child components -*/ -class Manager -{ - public: - /** - Base constructor and default used which creates the base resources - including choosing the device 0 by default. - */ - Manager(); - - /** - * Similar to base constructor but allows for further configuration to use - * when creating the Vulkan resources. - * - * @param physicalDeviceIndex The index of the physical device to use - * @param familyQueueIndices (Optional) List of queue indices to add for - * explicit allocation - * @param desiredExtensions The desired extensions to load from - * physicalDevice - */ - Manager(uint32_t physicalDeviceIndex, - const std::vector& familyQueueIndices = {}, - const std::vector& desiredExtensions = {}); - - /** - * Manager constructor which allows your own vulkan application to integrate - * with the kompute use. - * - * @param instance Vulkan compute instance to base this application - * @param physicalDevice Vulkan physical device to use for application - * @param device Vulkan logical device to use for all base resources - * @param physicalDeviceIndex Index for vulkan physical device used - */ - Manager(std::shared_ptr instance, - std::shared_ptr physicalDevice, - std::shared_ptr device); - - /** - * Manager destructor which would ensure all owned resources are destroyed - * unless explicitly stated that resources should not be destroyed or freed. - */ - ~Manager(); - - /** - * Create a managed sequence that will be destroyed by this manager - * if it hasn't been destroyed by its reference count going to zero. - * - * @param queueIndex The queue to use from the available queues - * @param nrOfTimestamps The maximum number of timestamps to allocate. - * If zero (default), disables latching of timestamps. - * @returns Shared pointer with initialised sequence - */ - std::shared_ptr sequence(uint32_t queueIndex = 0, - uint32_t totalTimestamps = 0); - - /** - * Create a managed tensor that will be destroyed by this manager - * if it hasn't been destroyed by its reference count going to zero. - * - * @param data The data to initialize the tensor with - * @param tensorType The type of tensor to initialize - * @returns Shared pointer with initialised tensor - */ - template - std::shared_ptr> tensorT( - const std::vector& data, - Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) - { - KP_LOG_DEBUG("Kompute Manager tensor creation triggered"); - - std::shared_ptr> tensor{ new kp::TensorT( - this->mPhysicalDevice, this->mDevice, data, tensorType) }; - - if (this->mManageResources) { - this->mManagedTensors.push_back(tensor); - } - - return tensor; - } - - std::shared_ptr> tensor( - const std::vector& data, - Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) - { - return this->tensorT(data, tensorType); - } - - std::shared_ptr tensor( - void* data, - uint32_t elementTotalCount, - uint32_t elementMemorySize, - const Tensor::TensorDataTypes& dataType, - Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) - { - std::shared_ptr tensor{ new kp::Tensor(this->mPhysicalDevice, - this->mDevice, - data, - elementTotalCount, - elementMemorySize, - dataType, - tensorType) }; - - if (this->mManageResources) { - this->mManagedTensors.push_back(tensor); - } - - return tensor; - } - - std::shared_ptr algorithm( - const std::vector>& tensors = {}, - const std::vector& spirv = {}, - const Workgroup& workgroup = {}, - const std::vector& specializationConstants = {}, - const std::vector& pushConstants = {}) - { - return this->algorithm<>(tensors, spirv, workgroup, specializationConstants, pushConstants); - } - - /** - * Create a managed algorithm that will be destroyed by this manager - * if it hasn't been destroyed by its reference count going to zero. - * - * @param tensors (optional) The tensors to initialise the algorithm with - * @param spirv (optional) The SPIRV bytes for the algorithm to dispatch - * @param workgroup (optional) kp::Workgroup for algorithm to use, and - * defaults to (tensor[0].size(), 1, 1) - * @param specializationConstants (optional) kp::Constant to use for - * specialization constants, and defaults to an empty constant - * @param pushConstants (optional) kp::Constant to use for push constants, - * and defaults to an empty constant - * @returns Shared pointer with initialised algorithm - */ - template - std::shared_ptr algorithm( - const std::vector>& tensors, - const std::vector& spirv, - const Workgroup& workgroup, - const std::vector& specializationConstants, - const std::vector

& pushConstants) - { - - KP_LOG_DEBUG("Kompute Manager algorithm creation triggered"); - - std::shared_ptr algorithm{ new kp::Algorithm( - this->mDevice, - tensors, - spirv, - workgroup, - specializationConstants, - pushConstants) }; - - if (this->mManageResources) { - this->mManagedAlgorithms.push_back(algorithm); - } - - return algorithm; - } - - /** - * Destroy the GPU resources and all managed resources by manager. - **/ - void destroy(); - /** - * Run a pseudo-garbage collection to release all the managed resources - * that have been already freed due to these reaching to zero ref count. - **/ - void clear(); - - /** - * Information about the current device. - * - * @return vk::PhysicalDeviceProperties containing information about the device - **/ - vk::PhysicalDeviceProperties getDeviceProperties() const; - - /** - * List the devices available in the current vulkan instance. - * - * @return vector of physical devices containing their respective properties - **/ - std::vector listDevices() const; - - private: - // -------------- OPTIONALLY OWNED RESOURCES - std::shared_ptr mInstance = nullptr; - bool mFreeInstance = false; - std::shared_ptr mPhysicalDevice = nullptr; - std::shared_ptr mDevice = nullptr; - bool mFreeDevice = false; - - // -------------- ALWAYS OWNED RESOURCES - std::vector> mManagedTensors; - std::vector> mManagedSequences; - std::vector> mManagedAlgorithms; - - std::vector mComputeQueueFamilyIndices; - std::vector> mComputeQueues; - - bool mManageResources = false; - -#if DEBUG -#ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS - vk::DebugReportCallbackEXT mDebugReportCallback; - vk::DispatchLoaderDynamic mDebugDispatcher; -#endif -#endif - - // Create functions - void createInstance(); - void createDevice(const std::vector& familyQueueIndices = {}, - uint32_t hysicalDeviceIndex = 0, - const std::vector& desiredExtensions = {}); -}; - -} // End namespace kp diff --git a/src/OpAlgoDispatch.cpp b/src/OpAlgoDispatch.cpp index c6099ff85..88d6e55fb 100644 --- a/src/OpAlgoDispatch.cpp +++ b/src/OpAlgoDispatch.cpp @@ -10,6 +10,7 @@ OpAlgoDispatch::~OpAlgoDispatch() KP_LOG_DEBUG("Kompute OpAlgoDispatch destructor started"); if (this->mPushConstantsData) { + KP_LOG_DEBUG("Kompute freeing push constants data"); free(this->mPushConstantsData); } } From 559b83e07fafa13bb672b760857c4f56a17e9873 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 12 Sep 2021 14:11:57 +0100 Subject: [PATCH 13/19] Fixing deleted sequence from header Signed-off-by: Alejandro Saucedo --- single_include/kompute/Kompute.hpp | 520 +++++++++++++++++++++++++++++ 1 file changed, 520 insertions(+) diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index b202ab580..67efbe708 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -1793,3 +1793,523 @@ class OpMult : public OpAlgoDispatch }; } // End namespace kp + +// SPDX-License-Identifier: Apache-2.0 + +namespace kp { + +/** + * Container of operations that can be sent to GPU as batch + */ +class Sequence : public std::enable_shared_from_this +{ + public: + /** + * Main constructor for sequence which requires core vulkan components to + * generate all dependent resources. + * + * @param physicalDevice Vulkan physical device + * @param device Vulkan logical device + * @param computeQueue Vulkan compute queue + * @param queueIndex Vulkan compute queue index in device + * @param totalTimestamps Maximum number of timestamps to allocate + */ + Sequence(std::shared_ptr physicalDevice, + std::shared_ptr device, + std::shared_ptr computeQueue, + uint32_t queueIndex, + uint32_t totalTimestamps = 0); + /** + * Destructor for sequence which is responsible for cleaning all subsequent + * owned operations. + */ + ~Sequence(); + + /** + * Record function for operation to be added to the GPU queue in batch. This + * template requires classes to be derived from the OpBase class. This + * function also requires the Sequence to be recording, otherwise it will + * not be able to add the operation. + * + * @param op Object derived from kp::BaseOp that will be recoreded by the + * sequence which will be used when the operation is evaluated. + * @return shared_ptr of the Sequence class itself + */ + std::shared_ptr record(std::shared_ptr op); + + /** + * Record function for operation to be added to the GPU queue in batch. This + * template requires classes to be derived from the OpBase class. This + * function also requires the Sequence to be recording, otherwise it will + * not be able to add the operation. + * + * @param tensors Vector of tensors to use for the operation + * @param TArgs Template parameters that are used to initialise operation + * which allows for extensible configurations on initialisation. + * @return shared_ptr of the Sequence class itself + */ + template + std::shared_ptr record( + std::vector> tensors, + TArgs&&... params) + { + std::shared_ptr op{ new T(tensors, std::forward(params)...) }; + return this->record(op); + } + /** + * Record function for operation to be added to the GPU queue in batch. This + * template requires classes to be derived from the OpBase class. This + * function also requires the Sequence to be recording, otherwise it will + * not be able to add the operation. + * + * @param algorithm Algorithm to use for the record often used for OpAlgo + * operations + * @param TArgs Template parameters that are used to initialise operation + * which allows for extensible configurations on initialisation. + * @return shared_ptr of the Sequence class itself + */ + template + std::shared_ptr record(std::shared_ptr algorithm, + TArgs&&... params) + { + std::shared_ptr op{ new T(algorithm, + std::forward(params)...) }; + return this->record(op); + } + + /** + * Eval sends all the recorded and stored operations in the vector of + * operations into the gpu as a submit job synchronously (with a barrier). + * + * @return shared_ptr of the Sequence class itself + */ + std::shared_ptr eval(); + + /** + * Resets all the recorded and stored operations, records the operation + * provided and submits into the gpu as a submit job synchronously (with a + * barrier). + * + * @return shared_ptr of the Sequence class itself + */ + std::shared_ptr eval(std::shared_ptr op); + + /** + * Eval sends all the recorded and stored operations in the vector of + * operations into the gpu as a submit job with a barrier. + * + * @param tensors Vector of tensors to use for the operation + * @param TArgs Template parameters that are used to initialise operation + * which allows for extensible configurations on initialisation. + * @return shared_ptr of the Sequence class itself + */ + template + std::shared_ptr eval(std::vector> tensors, + TArgs&&... params) + { + std::shared_ptr op{ new T(tensors, std::forward(params)...) }; + return this->eval(op); + } + /** + * Eval sends all the recorded and stored operations in the vector of + * operations into the gpu as a submit job with a barrier. + * + * @param algorithm Algorithm to use for the record often used for OpAlgo + * operations + * @param TArgs Template parameters that are used to initialise operation + * which allows for extensible configurations on initialisation. + * @return shared_ptr of the Sequence class itself + */ + template + std::shared_ptr eval(std::shared_ptr algorithm, + TArgs&&... params) + { + std::shared_ptr op{ new T(algorithm, + std::forward(params)...) }; + return this->eval(op); + } + + /** + * Eval Async sends all the recorded and stored operations in the vector of + * operations into the gpu as a submit job without a barrier. EvalAwait() + * must ALWAYS be called after to ensure the sequence is terminated + * correctly. + * + * @return Boolean stating whether execution was successful. + */ + std::shared_ptr evalAsync(); + /** + * Clears currnet operations to record provided one in the vector of + * operations into the gpu as a submit job without a barrier. EvalAwait() + * must ALWAYS be called after to ensure the sequence is terminated + * correctly. + * + * @return Boolean stating whether execution was successful. + */ + std::shared_ptr evalAsync(std::shared_ptr op); + /** + * Eval sends all the recorded and stored operations in the vector of + * operations into the gpu as a submit job with a barrier. + * + * @param tensors Vector of tensors to use for the operation + * @param TArgs Template parameters that are used to initialise operation + * which allows for extensible configurations on initialisation. + * @return shared_ptr of the Sequence class itself + */ + template + std::shared_ptr evalAsync( + std::vector> tensors, + TArgs&&... params) + { + std::shared_ptr op{ new T(tensors, std::forward(params)...) }; + return this->evalAsync(op); + } + /** + * Eval sends all the recorded and stored operations in the vector of + * operations into the gpu as a submit job with a barrier. + * + * @param algorithm Algorithm to use for the record often used for OpAlgo + * operations + * @param TArgs Template parameters that are used to initialise operation + * which allows for extensible configurations on initialisation. + * @return shared_ptr of the Sequence class itself + */ + template + std::shared_ptr evalAsync(std::shared_ptr algorithm, + TArgs&&... params) + { + std::shared_ptr op{ new T(algorithm, + std::forward(params)...) }; + return this->evalAsync(op); + } + + /** + * Eval Await waits for the fence to finish processing and then once it + * finishes, it runs the postEval of all operations. + * + * @param waitFor Number of milliseconds to wait before timing out. + * @return shared_ptr of the Sequence class itself + */ + std::shared_ptr evalAwait(uint64_t waitFor = UINT64_MAX); + + /** + * Clear function clears all operations currently recorded and starts + * recording again. + */ + void clear(); + + /** + * Return the timestamps that were latched at the beginning and + * after each operation during the last eval() call. + */ + std::vector getTimestamps(); + + /** + * Begins recording commands for commands to be submitted into the command + * buffer. + * + * @return Boolean stating whether execution was successful. + */ + void begin(); + + /** + * Ends the recording and stops recording commands when the record command + * is sent. + * + * @return Boolean stating whether execution was successful. + */ + void end(); + + /** + * Returns true if the sequence is currently in recording activated. + * + * @return Boolean stating if recording ongoing. + */ + bool isRecording(); + + /** + * Returns true if the sequence has been initialised, and it's based on the + * GPU resources being refrenced. + * + * @return Boolean stating if is initialized + */ + bool isInit(); + + /** + * Clears command buffer and triggers re-record of all the current + * operations saved, which is useful if the underlying kp::Tensors or + * kp::Algorithms are modified and need to be re-recorded. + */ + void rerecord(); + + /** + * Returns true if the sequence is currently running - mostly used for async + * workloads. + * + * @return Boolean stating if currently running. + */ + bool isRunning(); + + /** + * Destroys and frees the GPU resources which include the buffer and memory + * and sets the sequence as init=False. + */ + void destroy(); + + private: + // -------------- NEVER OWNED RESOURCES + std::shared_ptr mPhysicalDevice = nullptr; + std::shared_ptr mDevice = nullptr; + std::shared_ptr mComputeQueue = nullptr; + uint32_t mQueueIndex = -1; + + // -------------- OPTIONALLY OWNED RESOURCES + std::shared_ptr mCommandPool = nullptr; + bool mFreeCommandPool = false; + std::shared_ptr mCommandBuffer = nullptr; + bool mFreeCommandBuffer = false; + + // -------------- ALWAYS OWNED RESOURCES + vk::Fence mFence; + std::vector> mOperations; + std::shared_ptr timestampQueryPool = nullptr; + + // State + bool mRecording = false; + bool mIsRunning = false; + + // Create functions + void createCommandPool(); + void createCommandBuffer(); + void createTimestampQueryPool(uint32_t totalTimestamps); +}; + +} // End namespace kp + +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#define KP_DEFAULT_SESSION "DEFAULT" + +namespace kp { + +/** + Base orchestrator which creates and manages device and child components +*/ +class Manager +{ + public: + /** + Base constructor and default used which creates the base resources + including choosing the device 0 by default. + */ + Manager(); + + /** + * Similar to base constructor but allows for further configuration to use + * when creating the Vulkan resources. + * + * @param physicalDeviceIndex The index of the physical device to use + * @param familyQueueIndices (Optional) List of queue indices to add for + * explicit allocation + * @param desiredExtensions The desired extensions to load from + * physicalDevice + */ + Manager(uint32_t physicalDeviceIndex, + const std::vector& familyQueueIndices = {}, + const std::vector& desiredExtensions = {}); + + /** + * Manager constructor which allows your own vulkan application to integrate + * with the kompute use. + * + * @param instance Vulkan compute instance to base this application + * @param physicalDevice Vulkan physical device to use for application + * @param device Vulkan logical device to use for all base resources + * @param physicalDeviceIndex Index for vulkan physical device used + */ + Manager(std::shared_ptr instance, + std::shared_ptr physicalDevice, + std::shared_ptr device); + + /** + * Manager destructor which would ensure all owned resources are destroyed + * unless explicitly stated that resources should not be destroyed or freed. + */ + ~Manager(); + + /** + * Create a managed sequence that will be destroyed by this manager + * if it hasn't been destroyed by its reference count going to zero. + * + * @param queueIndex The queue to use from the available queues + * @param nrOfTimestamps The maximum number of timestamps to allocate. + * If zero (default), disables latching of timestamps. + * @returns Shared pointer with initialised sequence + */ + std::shared_ptr sequence(uint32_t queueIndex = 0, + uint32_t totalTimestamps = 0); + + /** + * Create a managed tensor that will be destroyed by this manager + * if it hasn't been destroyed by its reference count going to zero. + * + * @param data The data to initialize the tensor with + * @param tensorType The type of tensor to initialize + * @returns Shared pointer with initialised tensor + */ + template + std::shared_ptr> tensorT( + const std::vector& data, + Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) + { + KP_LOG_DEBUG("Kompute Manager tensor creation triggered"); + + std::shared_ptr> tensor{ new kp::TensorT( + this->mPhysicalDevice, this->mDevice, data, tensorType) }; + + if (this->mManageResources) { + this->mManagedTensors.push_back(tensor); + } + + return tensor; + } + + std::shared_ptr> tensor( + const std::vector& data, + Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) + { + return this->tensorT(data, tensorType); + } + + std::shared_ptr tensor( + void* data, + uint32_t elementTotalCount, + uint32_t elementMemorySize, + const Tensor::TensorDataTypes& dataType, + Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) + { + std::shared_ptr tensor{ new kp::Tensor(this->mPhysicalDevice, + this->mDevice, + data, + elementTotalCount, + elementMemorySize, + dataType, + tensorType) }; + + if (this->mManageResources) { + this->mManagedTensors.push_back(tensor); + } + + return tensor; + } + + std::shared_ptr algorithm( + const std::vector>& tensors = {}, + const std::vector& spirv = {}, + const Workgroup& workgroup = {}, + const std::vector& specializationConstants = {}, + const std::vector& pushConstants = {}) + { + return this->algorithm<>(tensors, spirv, workgroup, specializationConstants, pushConstants); + } + + /** + * Create a managed algorithm that will be destroyed by this manager + * if it hasn't been destroyed by its reference count going to zero. + * + * @param tensors (optional) The tensors to initialise the algorithm with + * @param spirv (optional) The SPIRV bytes for the algorithm to dispatch + * @param workgroup (optional) kp::Workgroup for algorithm to use, and + * defaults to (tensor[0].size(), 1, 1) + * @param specializationConstants (optional) kp::Constant to use for + * specialization constants, and defaults to an empty constant + * @param pushConstants (optional) kp::Constant to use for push constants, + * and defaults to an empty constant + * @returns Shared pointer with initialised algorithm + */ + template + std::shared_ptr algorithm( + const std::vector>& tensors, + const std::vector& spirv, + const Workgroup& workgroup, + const std::vector& specializationConstants, + const std::vector

& pushConstants) + { + + KP_LOG_DEBUG("Kompute Manager algorithm creation triggered"); + + std::shared_ptr algorithm{ new kp::Algorithm( + this->mDevice, + tensors, + spirv, + workgroup, + specializationConstants, + pushConstants) }; + + if (this->mManageResources) { + this->mManagedAlgorithms.push_back(algorithm); + } + + return algorithm; + } + + /** + * Destroy the GPU resources and all managed resources by manager. + **/ + void destroy(); + /** + * Run a pseudo-garbage collection to release all the managed resources + * that have been already freed due to these reaching to zero ref count. + **/ + void clear(); + + /** + * Information about the current device. + * + * @return vk::PhysicalDeviceProperties containing information about the device + **/ + vk::PhysicalDeviceProperties getDeviceProperties() const; + + /** + * List the devices available in the current vulkan instance. + * + * @return vector of physical devices containing their respective properties + **/ + std::vector listDevices() const; + + private: + // -------------- OPTIONALLY OWNED RESOURCES + std::shared_ptr mInstance = nullptr; + bool mFreeInstance = false; + std::shared_ptr mPhysicalDevice = nullptr; + std::shared_ptr mDevice = nullptr; + bool mFreeDevice = false; + + // -------------- ALWAYS OWNED RESOURCES + std::vector> mManagedTensors; + std::vector> mManagedSequences; + std::vector> mManagedAlgorithms; + + std::vector mComputeQueueFamilyIndices; + std::vector> mComputeQueues; + + bool mManageResources = false; + +#if DEBUG +#ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS + vk::DebugReportCallbackEXT mDebugReportCallback; + vk::DispatchLoaderDynamic mDebugDispatcher; +#endif +#endif + + // Create functions + void createInstance(); + void createDevice(const std::vector& familyQueueIndices = {}, + uint32_t hysicalDeviceIndex = 0, + const std::vector& desiredExtensions = {}); +}; + +} // End namespace kp From b9e40d50285ac3192b26e1ed6859c6241b7eb09e Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 12 Sep 2021 14:21:33 +0100 Subject: [PATCH 14/19] Updated algo to align with memory management Signed-off-by: Alejandro Saucedo --- src/Algorithm.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp index 9179cffbd..6caccf9bb 100644 --- a/src/Algorithm.cpp +++ b/src/Algorithm.cpp @@ -23,13 +23,13 @@ Algorithm::isInit() void Algorithm::destroy() { - if (this->mPushConstantsData) { - free(this->mPushConstantsData); - } - - if (this->mSpecializationConstantsData) { - free(this->mSpecializationConstantsData); - } + // We don't have to free memory on destroy as it's freed by the commandBuffer destructor + // if (this->mPushConstantsData) { + // free(this->mPushConstantsData); + // } + // if (this->mSpecializationConstantsData) { + // free(this->mSpecializationConstantsData); + // } if (!this->mDevice) { KP_LOG_WARN("Kompute Algorithm destroy function reached with null " From 2d4c2f733371afb4875b712cbd3f8e04ef33da05 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 12 Sep 2021 15:00:07 +0100 Subject: [PATCH 15/19] Fully functional python Signed-off-by: Alejandro Saucedo --- python/src/main.cpp | 30 ++++++++++++++++++++++++++- python/test/test_kompute.py | 41 ++++++++++++++++++++++++++++++++++++- 2 files changed, 69 insertions(+), 2 deletions(-) diff --git a/python/src/main.cpp b/python/src/main.cpp index b0ef31191..82a4bff5e 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -14,6 +14,31 @@ namespace py = pybind11; //used in Core.hpp py::object kp_debug, kp_info, kp_warning, kp_error; +std::unique_ptr opAlgoDispatchPyInit( + std::shared_ptr& algorithm, + const py::array& push_consts) { + const py::buffer_info info = push_consts.request(); + KP_LOG_DEBUG("Kompute Python Manager creating tensor_T with push_consts size {} dtype {}", + push_consts.size(), std::string(py::str(push_consts.dtype()))); + + + if (push_consts.dtype() == py::dtype::of()) { + std::vector dataVec((float*)info.ptr, ((float*)info.ptr) + info.size); + return std::unique_ptr{new kp::OpAlgoDispatch(algorithm, dataVec)}; + } else if (push_consts.dtype() == py::dtype::of()) { + std::vector dataVec((uint32_t*)info.ptr, ((uint32_t*)info.ptr) + info.size); + return std::unique_ptr{new kp::OpAlgoDispatch(algorithm, dataVec)}; + } else if (push_consts.dtype() == py::dtype::of()) { + std::vector dataVec((int32_t*)info.ptr, ((int32_t*)info.ptr) + info.size); + return std::unique_ptr{new kp::OpAlgoDispatch(algorithm, dataVec)}; + } else if (push_consts.dtype() == py::dtype::of()) { + std::vector dataVec((double*)info.ptr, ((double*)info.ptr) + info.size); + return std::unique_ptr{new kp::OpAlgoDispatch(algorithm, dataVec)}; + } else { + throw std::runtime_error("Kompute Python no valid dtype supported"); + } +} + PYBIND11_MODULE(kp, m) { // The logging modules are used in the Kompute.hpp file @@ -51,7 +76,10 @@ PYBIND11_MODULE(kp, m) { m, "OpAlgoDispatch", py::base(), DOC(kp, OpAlgoDispatch)) .def(py::init&,const kp::Constants&>(), DOC(kp, OpAlgoDispatch, OpAlgoDispatch), - py::arg("algorithm"), py::arg("push_consts") = kp::Constants()); + py::arg("algorithm"), py::arg("push_consts") = kp::Constants()) + .def(py::init(&opAlgoDispatchPyInit), + DOC(kp, OpAlgoDispatch, OpAlgoDispatch), + py::arg("algorithm"), py::arg("push_consts")); py::class_>( m, "OpMult", py::base(), DOC(kp, OpMult)) diff --git a/python/test/test_kompute.py b/python/test/test_kompute.py index eaf6b28db..385933f26 100644 --- a/python/test/test_kompute.py +++ b/python/test/test_kompute.py @@ -197,10 +197,49 @@ def test_pushconsts(): .record(kp.OpTensorSyncDevice([tensor])) .record(kp.OpAlgoDispatch(algo)) .record(kp.OpAlgoDispatch(algo, [0.3, 0.2, 0.1])) + .record(kp.OpAlgoDispatch(algo, [0.3, 0.2, 0.1])) .record(kp.OpTensorSyncLocal([tensor])) .eval()) - assert np.all(tensor.data() == np.array([0.4, 0.4, 0.4], dtype=np.float32)) + assert np.allclose(tensor.data(), np.array([0.7, 0.6, 0.5], dtype=np.float32)) + + +def test_pushconsts_int(): + + spirv = compile_source(""" + #version 450 + layout(push_constant) uniform PushConstants { + int x; + int y; + int z; + } pcs; + layout (local_size_x = 1) in; + layout(set = 0, binding = 0) buffer a { int pa[]; }; + void main() { + pa[0] += pcs.x; + pa[1] += pcs.y; + pa[2] += pcs.z; + } + """) + + mgr = kp.Manager() + + tensor = mgr.tensor_t(np.array([0, 0, 0], dtype=np.int32)) + + spec_consts = np.array([], dtype=np.int32) + push_consts = np.array([-1, -1, -1], dtype=np.int32) + + algo = mgr.algorithm_t([tensor], spirv, (1, 1, 1), spec_consts, push_consts) + + (mgr.sequence() + .record(kp.OpTensorSyncDevice([tensor])) + .record(kp.OpAlgoDispatch(algo)) + .record(kp.OpAlgoDispatch(algo, np.array([-1, -1, -1], dtype=np.int32))) + .record(kp.OpAlgoDispatch(algo, np.array([-1, -1, -1], dtype=np.int32))) + .record(kp.OpTensorSyncLocal([tensor])) + .eval()) + + assert np.all(tensor.data() == np.array([-3, -3, -3], dtype=np.int32)) def test_workgroup(): From 1b2f42c3aca71bbfdf2503c228f348fe2cdb01db Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 12 Sep 2021 15:18:11 +0100 Subject: [PATCH 16/19] Added specconst test for int Signed-off-by: Alejandro Saucedo --- test/TestSpecializationConstant.cpp | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/test/TestSpecializationConstant.cpp b/test/TestSpecializationConstant.cpp index f57c221ab..a1dd5587d 100644 --- a/test/TestSpecializationConstant.cpp +++ b/test/TestSpecializationConstant.cpp @@ -59,11 +59,11 @@ TEST(TestSpecializationConstants, TestConstantsInt) { std::string shader(R"( #version 450 - layout (constant_id = 0) const float cOne = 1; - layout (constant_id = 1) const float cTwo = 1; + layout (constant_id = 0) const int cOne = 1; + layout (constant_id = 1) const int cTwo = 1; layout (local_size_x = 1) in; - layout(set = 0, binding = 0) buffer a { float pa[]; }; - layout(set = 0, binding = 1) buffer b { float pb[]; }; + layout(set = 0, binding = 0) buffer a { int pa[]; }; + layout(set = 0, binding = 1) buffer b { int pb[]; }; void main() { uint index = gl_GlobalInvocationID.x; pa[index] = cOne; @@ -77,18 +77,18 @@ TEST(TestSpecializationConstants, TestConstantsInt) { kp::Manager mgr; - std::shared_ptr> tensorA = - mgr.tensor({ 0, 0, 0 }); - std::shared_ptr> tensorB = - mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorA = + mgr.tensorT({ 0, 0, 0 }); + std::shared_ptr> tensorB = + mgr.tensorT({ 0, 0, 0 }); std::vector> params = { tensorA, tensorB }; - kp::Constants spec = kp::Constants({ 5.0, 0.3 }); + std::vector spec({ -1, -2 }); std::shared_ptr algo = - mgr.algorithm(params, spirv, {}, spec); + mgr.algorithm(params, spirv, {}, spec, {}); sq = mgr.sequence() ->record(params) @@ -96,8 +96,9 @@ TEST(TestSpecializationConstants, TestConstantsInt) ->record(params) ->eval(); - EXPECT_EQ(tensorA->vector(), std::vector({ 5, 5, 5 })); - EXPECT_EQ(tensorB->vector(), std::vector({ 0.3, 0.3, 0.3 })); + EXPECT_EQ(tensorA->vector(), std::vector({ -1, -1, -1 })); + EXPECT_EQ(tensorB->vector(), std::vector({ -2, -2, -2 })); } } } + From 990ccd5f3b8ada4e7744e14ac1658b1f6f8124d5 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 12 Sep 2021 16:06:49 +0100 Subject: [PATCH 17/19] Added docstrings for new functions Signed-off-by: Alejandro Saucedo --- src/include/kompute/Algorithm.hpp | 18 +++++++++++++----- src/include/kompute/Manager.hpp | 18 ++++++++++++++++-- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/src/include/kompute/Algorithm.hpp b/src/include/kompute/Algorithm.hpp index 9dd70b1b5..cd9d913f0 100644 --- a/src/include/kompute/Algorithm.hpp +++ b/src/include/kompute/Algorithm.hpp @@ -24,12 +24,12 @@ class Algorithm * @param spirv (optional) The spirv code to use to create the algorithm * @param workgroup (optional) The kp::Workgroup to use for the dispatch * which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set. - * @param specializationConstants (optional) The kp::Constants to use to + * @param specializationConstants (optional) The templatable param is to be used to * initialize the specialization constants which cannot be changed once set. - * @param pushConstants (optional) The kp::Constants to use when + * @param pushConstants (optional) This templatable param is to be used when * initializing the pipeline, which set the size of the push constants - - * these can be modified but all new values must have the same vector size - * as this initial value. + * these can be modified but all new values must have the same data type and length + * as otherwise it will result in errors. */ template Algorithm(std::shared_ptr device, @@ -176,7 +176,7 @@ class Algorithm * Sets the push constants to the new value provided to use in the next * bindPush() * - * @param The kp::Constant to use to set the push constants to use in the + * @param pushConstants The templatable vector is to be used to set the push constants to use in the * next bindPush(...) calls. The constants provided must be of the same size * as the ones created during initialization. */ @@ -189,6 +189,14 @@ class Algorithm this->setPushConstants(pushConstants.data(), size, memorySize); } + /** + * Sets the push constants to the new value provided to use in the next + * bindPush() with the raw memory block location and memory size to be used. + * + * @param data The raw data point to copy the data from, without modifying the pointer. + * @param size The number of data elements provided in the data + * @param memorySize The memory size of each of the data elements in bytes. + */ void setPushConstants(void* data, uint32_t size, uint32_t memorySize) { uint32_t totalSize = memorySize * size; diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index 8a4244a2b..d9b850efd 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -123,6 +123,20 @@ class Manager return tensor; } + /** + * Default non-template function that can be used to create algorithm objects + * which provides default types to the push and spec constants as floats. + * + * @param tensors (optional) The tensors to initialise the algorithm with + * @param spirv (optional) The SPIRV bytes for the algorithm to dispatch + * @param workgroup (optional) kp::Workgroup for algorithm to use, and + * defaults to (tensor[0].size(), 1, 1) + * @param specializationConstants (optional) float vector to use for + * specialization constants, and defaults to an empty constant + * @param pushConstants (optional) float vector to use for push constants, + * and defaults to an empty constant + * @returns Shared pointer with initialised algorithm + */ std::shared_ptr algorithm( const std::vector>& tensors = {}, const std::vector& spirv = {}, @@ -141,9 +155,9 @@ class Manager * @param spirv (optional) The SPIRV bytes for the algorithm to dispatch * @param workgroup (optional) kp::Workgroup for algorithm to use, and * defaults to (tensor[0].size(), 1, 1) - * @param specializationConstants (optional) kp::Constant to use for + * @param specializationConstants (optional) templatable vector parameter to use for * specialization constants, and defaults to an empty constant - * @param pushConstants (optional) kp::Constant to use for push constants, + * @param pushConstants (optional) templatable vector parameter to use for push constants, * and defaults to an empty constant * @returns Shared pointer with initialised algorithm */ From 932620091cb178477ab63db8fd0df2487ebcff38 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 12 Sep 2021 16:51:43 +0100 Subject: [PATCH 18/19] Updated docs and renamig kp::Constants Signed-off-by: Alejandro Saucedo --- README.md | 6 +- docs/index.rst | 5 +- docs/overview/advanced-examples.rst | 6 +- docs/overview/variable-types.rst | 92 +++++++++++++++++++ .../app/src/main/cpp/KomputeModelML.cpp | 2 +- examples/logistic_regression/src/Main.cpp | 2 +- python/src/docstrings.hpp | 12 +-- python/src/main.cpp | 4 +- single_include/kompute/Kompute.hpp | 44 ++++++--- src/include/kompute/Algorithm.hpp | 8 +- test/TestLogisticRegression.cpp | 4 +- test/TestMultipleAlgoExecutions.cpp | 10 +- test/TestPushConstant.cpp | 12 +-- test/TestSpecializationConstant.cpp | 2 +- 14 files changed, 162 insertions(+), 47 deletions(-) create mode 100644 docs/overview/variable-types.rst diff --git a/README.md b/README.md index 71711973b..88ddbd36e 100644 --- a/README.md +++ b/README.md @@ -89,9 +89,9 @@ void kompute(const std::string& shader) { // 3. Create algorithm based on shader (supports buffers & push/spec constants) kp::Workgroup workgroup({3, 1, 1}); - kp::Constants specConsts({ 2 }); - kp::Constants pushConstsA({ 2.0 }); - kp::Constants pushConstsB({ 3.0 }); + std::vector specConsts({ 2 }); + std::vector pushConstsA({ 2.0 }); + std::vector pushConstsB({ 3.0 }); auto algorithm = mgr.algorithm(params, // See documentation shader section for compileSource diff --git a/docs/index.rst b/docs/index.rst index 9a62e7814..f3358a00f 100755 --- a/docs/index.rst +++ b/docs/index.rst @@ -38,9 +38,10 @@ Documentation Index (as per sidebar) .. toctree:: :titlesonly: - :caption: Concepts & Deep Dives: + :caption: Advanced Concepts & Deep Dives: - CI, Docker Images Docs & Tests + CI, Docker Images Docs & Tests + Variable Types for Tensors, and Push/Spec Constants Asynchronous & Parallel Operations Mobile App Integration (Android) Game Engine Integration (Godot Engine) diff --git a/docs/overview/advanced-examples.rst b/docs/overview/advanced-examples.rst index 68b901f76..ee44c5821 100644 --- a/docs/overview/advanced-examples.rst +++ b/docs/overview/advanced-examples.rst @@ -71,13 +71,13 @@ The example below shows how you can enable the "VK_EXT_shader_atomic_float" exte sq = mgr.sequence() ->record({ tensor }) ->record(algo, - kp::Constants{ 0.1, 0.2, 0.3 }) + std::vector{ 0.1, 0.2, 0.3 }) ->record(algo, - kp::Constants{ 0.3, 0.2, 0.1 }) + std::vector{ 0.3, 0.2, 0.1 }) ->record({ tensor }) ->eval(); - EXPECT_EQ(tensor->data(), kp::Constants({ 0.4, 0.4, 0.4 })); + EXPECT_EQ(tensor->data(), std::vector({ 0.4, 0.4, 0.4 })); } } diff --git a/docs/overview/variable-types.rst b/docs/overview/variable-types.rst new file mode 100644 index 000000000..a4c6481a7 --- /dev/null +++ b/docs/overview/variable-types.rst @@ -0,0 +1,92 @@ + +Variable Types for Tensors and Constants +============= + +By default the initial interfaces you may interact with, will be primarily using float values by default, which is enough to get through the basic conceptual examples. However as real world applications are being developed, more specialized types may be required for kp::Tensor, as well as for SpecializationConstants and PushConstants. + +Before diving into the practical classes and interfaces that can be used to take advantage of the variable type support of Kompute, we want to provide some high level intution on what each of these components are. + +Variable Tensor Types +------ + +For the kp::Tensor class, Kompute provides under the hood an interface to have more seamless interaction with multiple different underlying data types. This is done through the introduction of the class kp::TensorT and parent class kp::Tensor, however you as a developer you will be primarily interacting with the top level kp::Tensor class, as this is what is provided through the high level kp::Manager class. + +The kp::Tensor class does provide an "integrated" experience, which allows users to "seamlessly" retrieve the underlying data through the `data()` and `vector()` functions. This is done by leveraging C++ templates, as well as limiting the types that can be used, which are namely: + +* float +* uint32 +* int32 +* double +* bool + +Any other data type provided would result in an error, and for the time being Kompute will focus on primarily provide support for these classes. + +The tests under `TestTensor.cpp` and `test_tensor_types.py` provide an overview of how users can take advantage of these features using std::vector for C++ and numpy array for Python. + +C++ Tensor Types Usage +^^^^^^^ + +Below you can see how it is possible to define different types in C++. + +.. literalinclude:: ../../test/TestTensor.cpp + :language: cpp + :lines: 21- + +Python Tensor Types Usage +^^^^^^^^^^^^^^^^^ + +.. literalinclude:: ../../python/test/test_tensor_types.py + :language: python + :lines: 26-46 + +Variable Push Constants +---- + +Push constants are a relatively non-expensive way to provide dynamic data to a GPU Algorithm (shader) as further CPU compute is performed. Although Push Constants are a more efficient way to provide data, it is also a limited manner as there is a memory limit for push constants. + +Push constants with Kompute are flexible as it is possible to pass user-defined structs in C++. In Python it is limited to providing numpy arrays with multiple elements of the same type. + +C++ Push Consts Types Usage +^^^^^^^ + +As mentioned above, this test under `TestPushConstants.cpp` shows how it is possible to use user-defined structs for multiple elements from different types, which is not possible for specialized constants or tensors. + +These are defined in the `algorithm` function of the `kp::Manager`, and once it push constant is set, all other push constants provided have to consist of the same types and element size. + +More specifically, when passing a custom struct it is possible to pass a single element, or alternatively passing multiple scalar values as part of the vector, and access them as outlined in the rest of the tests. + +.. literalinclude:: ../../test/TestPushConstant.cpp + :language: cpp + :lines: 182-231 + + +Python Push Consts Types Usage +^^^^^^^^^^^^^^^^^ + +In python the push constants are limited to a single list of elements of the same type. These are provided by passing a numpy array to the `algorithm` function or the `kp::OpAlgoDispatch` operation. + +.. literalinclude:: ../../python/test/test_tensor_types.py + :language: python + :lines: 207-242 + +Variable Specialization Constants +------ + +Specialization constants are analogous to push constants, but these are not dynamic, can only be set on initialization or rebuild of `kp::Algorithm` and cannot be changed unless a `rebuild` is carried out. + +The usage of specailization constants is very similar to the push constants, but the only limitation are: + +* These are defined using the constant_id in the glsl shader +* Spec constants do not support complex types (i.e. user defined struct) +* Kompute supports an array of elements of same type for specialization constants + +C++ Push Consts Types Usage +^^^^ + +The specialization constant example shows how it is possible to define as a std::vector. + +.. literalinclude:: ../../test/TestSpecializationConstant.cpp + :language: cpp + :lines: 57- + + diff --git a/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp b/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp index 647cd5236..ef604909b 100755 --- a/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp +++ b/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp @@ -47,7 +47,7 @@ void KomputeModelML::train(std::vector yData, std::vector xIData, + kp::shader_data::shaders_glsl_logisticregression_comp_spv_len)); std::shared_ptr algo = - mgr.algorithm(params, spirv, kp::Workgroup({ 5 }), kp::Constants({ 5.0 })); + mgr.algorithm(params, spirv, kp::Workgroup({ 5 }), std::vector({ 5.0 })); mgr.sequence()->eval(params); diff --git a/examples/logistic_regression/src/Main.cpp b/examples/logistic_regression/src/Main.cpp index c7cc827ba..ec52439ed 100755 --- a/examples/logistic_regression/src/Main.cpp +++ b/examples/logistic_regression/src/Main.cpp @@ -41,7 +41,7 @@ int main() + kp::shader_data::shaders_glsl_logisticregression_comp_spv_len)); std::shared_ptr algo = mgr.algorithm( - params, spirv, kp::Workgroup({ 5 }), kp::Constants({ 5.0 })); + params, spirv, kp::Workgroup({ 5 }), std::vector({ 5.0 })); mgr.sequence()->eval(params); diff --git a/python/src/docstrings.hpp b/python/src/docstrings.hpp index 8f2a7fe6b..fb1def637 100644 --- a/python/src/docstrings.hpp +++ b/python/src/docstrings.hpp @@ -36,9 +36,9 @@ tensors (optional) The tensors to use to create the descriptor resources @param spirv (optional) The spirv code to use to create the algorithm @param workgroup (optional) The kp::Workgroup to use for the dispatch which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if -not set. @param specializationConstants (optional) The kp::Constants +not set. @param specializationConstants (optional) The std::vector to use to initialize the specialization constants which cannot be -changed once set. @param pushConstants (optional) The kp::Constants to +changed once set. @param pushConstants (optional) The std::vector to use when initializing the pipeline, which set the size of the push constants - these can be modified but all new values must have the same vector size as this initial value.)doc"; @@ -54,12 +54,12 @@ static const char *__doc_kp_Algorithm_destroy = R"doc()doc"; static const char *__doc_kp_Algorithm_getPush = R"doc(Gets the specialization constants of the current algorithm. -@returns The kp::Constants currently set for push constants)doc"; +@returns The std::vector currently set for push constants)doc"; static const char *__doc_kp_Algorithm_getSpecializationConstants = R"doc(Gets the specialization constants of the current algorithm. -@returns The kp::Constants currently set for specialization constants)doc"; +@returns The std::vector currently set for specialization constants)doc"; static const char *__doc_kp_Algorithm_getTensors = R"doc(Gets the current tensors that are used in the algorithm. @@ -127,9 +127,9 @@ parameters to create the underlying resources. @param spirv The spirv code to use to create the algorithm @param workgroup (optional) The kp::Workgroup to use for the dispatch which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set. @param -specializationConstants (optional) The kp::Constants to use to +specializationConstants (optional) The std::vector to use to initialize the specialization constants which cannot be changed once -set. @param pushConstants (optional) The kp::Constants to use when +set. @param pushConstants (optional) The std::vector to use when initializing the pipeline, which set the size of the push constants - these can be modified but all new values must have the same vector size as this initial value.)doc"; diff --git a/python/src/main.cpp b/python/src/main.cpp index 82a4bff5e..f2bb13090 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -74,9 +74,9 @@ PYBIND11_MODULE(kp, m) { py::class_>( m, "OpAlgoDispatch", py::base(), DOC(kp, OpAlgoDispatch)) - .def(py::init&,const kp::Constants&>(), + .def(py::init&,const std::vector&>(), DOC(kp, OpAlgoDispatch, OpAlgoDispatch), - py::arg("algorithm"), py::arg("push_consts") = kp::Constants()) + py::arg("algorithm"), py::arg("push_consts") = std::vector()) .def(py::init(&opAlgoDispatchPyInit), DOC(kp, OpAlgoDispatch, OpAlgoDispatch), py::arg("algorithm"), py::arg("push_consts")); diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index 67efbe708..012eae394 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -1087,12 +1087,12 @@ class Algorithm * @param spirv (optional) The spirv code to use to create the algorithm * @param workgroup (optional) The kp::Workgroup to use for the dispatch * which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set. - * @param specializationConstants (optional) The kp::Constants to use to + * @param specializationConstants (optional) The templatable param is to be used to * initialize the specialization constants which cannot be changed once set. - * @param pushConstants (optional) The kp::Constants to use when + * @param pushConstants (optional) This templatable param is to be used when * initializing the pipeline, which set the size of the push constants - - * these can be modified but all new values must have the same vector size - * as this initial value. + * these can be modified but all new values must have the same data type and length + * as otherwise it will result in errors. */ template Algorithm(std::shared_ptr device, @@ -1127,9 +1127,9 @@ class Algorithm * @param spirv The spirv code to use to create the algorithm * @param workgroup (optional) The kp::Workgroup to use for the dispatch * which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set. - * @param specializationConstants (optional) The kp::Constants to use to + * @param specializationConstants (optional) The std::vector to use to * initialize the specialization constants which cannot be changed once set. - * @param pushConstants (optional) The kp::Constants to use when + * @param pushConstants (optional) The std::vector to use when * initializing the pipeline, which set the size of the push constants - * these can be modified but all new values must have the same vector size * as this initial value. @@ -1239,7 +1239,7 @@ class Algorithm * Sets the push constants to the new value provided to use in the next * bindPush() * - * @param The kp::Constant to use to set the push constants to use in the + * @param pushConstants The templatable vector is to be used to set the push constants to use in the * next bindPush(...) calls. The constants provided must be of the same size * as the ones created during initialization. */ @@ -1252,6 +1252,14 @@ class Algorithm this->setPushConstants(pushConstants.data(), size, memorySize); } + /** + * Sets the push constants to the new value provided to use in the next + * bindPush() with the raw memory block location and memory size to be used. + * + * @param data The raw data point to copy the data from, without modifying the pointer. + * @param size The number of data elements provided in the data + * @param memorySize The memory size of each of the data elements in bytes. + */ void setPushConstants(void* data, uint32_t size, uint32_t memorySize) { uint32_t totalSize = memorySize * size; @@ -1285,7 +1293,7 @@ class Algorithm /** * Gets the specialization constants of the current algorithm. * - * @returns The kp::Constants currently set for specialization constants + * @returns The std::vector currently set for specialization constants */ template const std::vector getSpecializationConstants() @@ -1296,7 +1304,7 @@ class Algorithm /** * Gets the specialization constants of the current algorithm. * - * @returns The kp::Constants currently set for push constants + * @returns The std::vector currently set for push constants */ template const std::vector getPushConstants() @@ -2206,6 +2214,20 @@ class Manager return tensor; } + /** + * Default non-template function that can be used to create algorithm objects + * which provides default types to the push and spec constants as floats. + * + * @param tensors (optional) The tensors to initialise the algorithm with + * @param spirv (optional) The SPIRV bytes for the algorithm to dispatch + * @param workgroup (optional) kp::Workgroup for algorithm to use, and + * defaults to (tensor[0].size(), 1, 1) + * @param specializationConstants (optional) float vector to use for + * specialization constants, and defaults to an empty constant + * @param pushConstants (optional) float vector to use for push constants, + * and defaults to an empty constant + * @returns Shared pointer with initialised algorithm + */ std::shared_ptr algorithm( const std::vector>& tensors = {}, const std::vector& spirv = {}, @@ -2224,9 +2246,9 @@ class Manager * @param spirv (optional) The SPIRV bytes for the algorithm to dispatch * @param workgroup (optional) kp::Workgroup for algorithm to use, and * defaults to (tensor[0].size(), 1, 1) - * @param specializationConstants (optional) kp::Constant to use for + * @param specializationConstants (optional) templatable vector parameter to use for * specialization constants, and defaults to an empty constant - * @param pushConstants (optional) kp::Constant to use for push constants, + * @param pushConstants (optional) templatable vector parameter to use for push constants, * and defaults to an empty constant * @returns Shared pointer with initialised algorithm */ diff --git a/src/include/kompute/Algorithm.hpp b/src/include/kompute/Algorithm.hpp index cd9d913f0..be17a2d09 100644 --- a/src/include/kompute/Algorithm.hpp +++ b/src/include/kompute/Algorithm.hpp @@ -64,9 +64,9 @@ class Algorithm * @param spirv The spirv code to use to create the algorithm * @param workgroup (optional) The kp::Workgroup to use for the dispatch * which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set. - * @param specializationConstants (optional) The kp::Constants to use to + * @param specializationConstants (optional) The std::vector to use to * initialize the specialization constants which cannot be changed once set. - * @param pushConstants (optional) The kp::Constants to use when + * @param pushConstants (optional) The std::vector to use when * initializing the pipeline, which set the size of the push constants - * these can be modified but all new values must have the same vector size * as this initial value. @@ -230,7 +230,7 @@ class Algorithm /** * Gets the specialization constants of the current algorithm. * - * @returns The kp::Constants currently set for specialization constants + * @returns The std::vector currently set for specialization constants */ template const std::vector getSpecializationConstants() @@ -241,7 +241,7 @@ class Algorithm /** * Gets the specialization constants of the current algorithm. * - * @returns The kp::Constants currently set for push constants + * @returns The std::vector currently set for push constants */ template const std::vector getPushConstants() diff --git a/test/TestLogisticRegression.cpp b/test/TestLogisticRegression.cpp index c1818ec27..9b736213f 100644 --- a/test/TestLogisticRegression.cpp +++ b/test/TestLogisticRegression.cpp @@ -48,7 +48,7 @@ TEST(TestLogisticRegression, TestMainLogisticRegression) test_shaders_glsl_test_logistic_regression_comp_spv_len)); std::shared_ptr algorithm = mgr.algorithm( - params, spirv, kp::Workgroup({ 5 }), kp::Constants({ 5.0 })); + params, spirv, kp::Workgroup({ 5 }), std::vector({ 5.0 })); std::shared_ptr sq = mgr.sequence() @@ -127,7 +127,7 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy) shaders_glsl_logisticregression_comp_spv_len)); std::shared_ptr algorithm = - mgr.algorithm(params, spirv, kp::Workgroup(), kp::Constants({ 5.0 })); + mgr.algorithm(params, spirv, kp::Workgroup(), std::vector({ 5.0 })); std::shared_ptr sq = mgr.sequence() diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp index 1fe6a6664..40d190c62 100644 --- a/test/TestMultipleAlgoExecutions.cpp +++ b/test/TestMultipleAlgoExecutions.cpp @@ -49,9 +49,9 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality) }; kp::Workgroup workgroup({ 3, 1, 1 }); - kp::Constants specConsts({ 2 }); - kp::Constants pushConstsA({ 2.0 }); - kp::Constants pushConstsB({ 3.0 }); + std::vector specConsts({ 2 }); + std::vector pushConstsA({ 2.0 }); + std::vector pushConstsB({ 3.0 }); auto algorithm = mgr.algorithm(params, compileSource(shader), @@ -263,8 +263,8 @@ TEST(TestMultipleAlgoExecutions, TestAlgorithmUtilFunctions) }; kp::Workgroup workgroup({ 3, 1, 1 }); - kp::Constants specConsts({ 2 }); - kp::Constants pushConsts({ 2.0 }); + std::vector specConsts({ 2 }); + std::vector pushConsts({ 2.0 }); auto algorithm = mgr.algorithm(params, compileSource(shader), diff --git a/test/TestPushConstant.cpp b/test/TestPushConstant.cpp index c885f1d87..4742cd187 100644 --- a/test/TestPushConstant.cpp +++ b/test/TestPushConstant.cpp @@ -44,11 +44,11 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchOverride) // We need to run this in sequence to avoid race condition // We can't use atomicAdd as swiftshader doesn't support it for // float - sq->eval(algo, kp::Constants{ 0.1, 0.2, 0.3 }); - sq->eval(algo, kp::Constants{ 0.3, 0.2, 0.1 }); + sq->eval(algo, std::vector{ 0.1, 0.2, 0.3 }); + sq->eval(algo, std::vector{ 0.3, 0.2, 0.1 }); sq->eval({ tensor }); - EXPECT_EQ(tensor->vector(), kp::Constants({ 0.4, 0.4, 0.4 })); + EXPECT_EQ(tensor->vector(), std::vector({ 0.4, 0.4, 0.4 })); } } } @@ -90,10 +90,10 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchNoOverride) // We can't use atomicAdd as swiftshader doesn't support it for // float sq->eval(algo); - sq->eval(algo, kp::Constants{ 0.3, 0.2, 0.1 }); + sq->eval(algo, std::vector{ 0.3, 0.2, 0.1 }); sq->eval({ tensor }); - EXPECT_EQ(tensor->vector(), kp::Constants({ 0.4, 0.4, 0.4 })); + EXPECT_EQ(tensor->vector(), std::vector({ 0.4, 0.4, 0.4 })); } } } @@ -132,7 +132,7 @@ TEST(TestPushConstants, TestConstantsWrongSize) sq = mgr.sequence()->record({ tensor }); EXPECT_THROW(sq->record( - algo, kp::Constants{ 0.1, 0.2, 0.3 }), + algo, std::vector{ 0.1, 0.2, 0.3 }), std::runtime_error); } } diff --git a/test/TestSpecializationConstant.cpp b/test/TestSpecializationConstant.cpp index a1dd5587d..abc507e7e 100644 --- a/test/TestSpecializationConstant.cpp +++ b/test/TestSpecializationConstant.cpp @@ -37,7 +37,7 @@ TEST(TestSpecializationConstants, TestTwoConstants) std::vector> params = { tensorA, tensorB }; - kp::Constants spec = kp::Constants({ 5.0, 0.3 }); + std::vector spec = std::vector({ 5.0, 0.3 }); std::shared_ptr algo = mgr.algorithm(params, spirv, {}, spec); From c758ec0c202d9856be2863e12f00f9a3598ccd91 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 12 Sep 2021 16:56:44 +0100 Subject: [PATCH 19/19] Updated algo python Signed-off-by: Alejandro Saucedo --- python/src/main.cpp | 2 +- python/test/test_kompute.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/src/main.cpp b/python/src/main.cpp index f2bb13090..d0447fe8e 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -220,7 +220,7 @@ PYBIND11_MODULE(kp, m) { py::arg("workgroup") = kp::Workgroup(), py::arg("spec_consts") = std::vector(), py::arg("push_consts") = std::vector()) - .def("algorithm_t", [np](kp::Manager& self, + .def("algorithm", [np](kp::Manager& self, const std::vector>& tensors, const py::bytes& spirv, const kp::Workgroup& workgroup, diff --git a/python/test/test_kompute.py b/python/test/test_kompute.py index 385933f26..8660b0151 100644 --- a/python/test/test_kompute.py +++ b/python/test/test_kompute.py @@ -229,7 +229,7 @@ def test_pushconsts_int(): spec_consts = np.array([], dtype=np.int32) push_consts = np.array([-1, -1, -1], dtype=np.int32) - algo = mgr.algorithm_t([tensor], spirv, (1, 1, 1), spec_consts, push_consts) + algo = mgr.algorithm([tensor], spirv, (1, 1, 1), spec_consts, push_consts) (mgr.sequence() .record(kp.OpTensorSyncDevice([tensor]))