From 0b84876c95dc37d1349d99d2580fa025c10660d6 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sat, 13 Feb 2021 19:38:02 +0000 Subject: [PATCH] Extended algorithm to add spec consts for int and float --- src/Algorithm.cpp | 26 ++-- src/OpAlgoBase.cpp | 15 ++- src/include/kompute/Algorithm.hpp | 121 +++++++++++++++++- src/include/kompute/operations/OpAlgoBase.hpp | 9 +- 4 files changed, 145 insertions(+), 26 deletions(-) diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp index eb0be22a8..b73391386 100644 --- a/src/Algorithm.cpp +++ b/src/Algorithm.cpp @@ -10,12 +10,14 @@ Algorithm::Algorithm() } Algorithm::Algorithm(std::shared_ptr device, - std::shared_ptr commandBuffer) + std::shared_ptr commandBuffer, + const SpecializationContainer& specializationConstants) { SPDLOG_DEBUG("Kompute Algorithm Constructor with device"); this->mDevice = device; this->mCommandBuffer = commandBuffer; + this->mSpecializationConstants = specializationConstants; } Algorithm::~Algorithm() @@ -114,11 +116,7 @@ Algorithm::init(const std::vector& shaderFileData, this->createParameters(tensorParams); this->createShaderModule(shaderFileData); - std::vector sizes; - for (std::shared_ptr tensor : tensorParams) { - sizes.push_back(tensor->size()); - } - this->createPipeline(sizes); + this->createPipeline(); } void @@ -225,7 +223,7 @@ Algorithm::createShaderModule(const std::vector& shaderFileData) } void -Algorithm::createPipeline(std::vector specializationData) +Algorithm::createPipeline() { SPDLOG_DEBUG("Kompute Algorithm calling create Pipeline"); @@ -241,20 +239,22 @@ Algorithm::createPipeline(std::vector specializationData) std::vector specializationEntries; - for (size_t i = 0; i < specializationData.size(); i++) { + for (uint32_t i = 0; i < this->mSpecializationConstants.size(); i++) { vk::SpecializationMapEntry specializationEntry( - static_cast(i), - static_cast(sizeof(uint32_t) * i), - sizeof(uint32_t)); + static_cast(i), + static_cast(this->mSpecializationConstants.instanceMemorySize() * i), + this->mSpecializationConstants.instanceMemorySize()); specializationEntries.push_back(specializationEntry); } + // This passes ownership of the memory so we remove ownership from + // specialization container by using "transferDataOwnership" vk::SpecializationInfo specializationInfo( static_cast(specializationEntries.size()), specializationEntries.data(), - sizeof(uint32_t) * specializationEntries.size(), - specializationData.data()); + this->mSpecializationConstants.totalMemorySize(), + this->mSpecializationConstants.transferDataOwnership()); vk::PipelineShaderStageCreateInfo shaderStage( vk::PipelineShaderStageCreateFlags(), diff --git a/src/OpAlgoBase.cpp b/src/OpAlgoBase.cpp index ad4bbc17b..b8755af35 100644 --- a/src/OpAlgoBase.cpp +++ b/src/OpAlgoBase.cpp @@ -13,7 +13,8 @@ OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, std::vector>& tensors, - KomputeWorkgroup komputeWorkgroup) + KomputeWorkgroup komputeWorkgroup, + const Algorithm::SpecializationContainer& specializationConstants) : OpBase(physicalDevice, device, commandBuffer, tensors) { SPDLOG_DEBUG("Kompute OpAlgoBase constructor with params numTensors: {}", @@ -37,7 +38,7 @@ OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, this->mKomputeWorkgroup.y, this->mKomputeWorkgroup.z); - this->mAlgorithm = std::make_shared(device, commandBuffer); + this->mAlgorithm = std::make_shared(device, commandBuffer, specializationConstants); } OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, @@ -45,8 +46,9 @@ OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, std::shared_ptr commandBuffer, std::vector>& tensors, std::string shaderFilePath, - KomputeWorkgroup komputeWorkgroup) - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, komputeWorkgroup) + KomputeWorkgroup komputeWorkgroup, + const Algorithm::SpecializationContainer& specializationConstants) + : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, komputeWorkgroup, specializationConstants) { SPDLOG_DEBUG( "Kompute OpAlgoBase shaderFilePath constructo with shaderfile path: {}", @@ -60,8 +62,9 @@ OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, std::shared_ptr commandBuffer, std::vector>& tensors, const std::vector& shaderDataRaw, - KomputeWorkgroup komputeWorkgroup) - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, komputeWorkgroup) + KomputeWorkgroup komputeWorkgroup, + const Algorithm::SpecializationContainer& specializationConstants) + : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, komputeWorkgroup, specializationConstants) { SPDLOG_DEBUG("Kompute OpAlgoBase shaderFilePath constructo with shader raw " "data length: {}", diff --git a/src/include/kompute/Algorithm.hpp b/src/include/kompute/Algorithm.hpp index 08f2832e7..dddebccdf 100644 --- a/src/include/kompute/Algorithm.hpp +++ b/src/include/kompute/Algorithm.hpp @@ -12,7 +12,115 @@ namespace kp { */ class Algorithm { - public: +public: + // TODO: Move as internal struct of speccontainer + class SpecializationConstant { + public: + SpecializationConstant(const SpecializationConstant& specializationConstant) { + SPDLOG_DEBUG("Kompute SpecializationConstant copy constructor: {}", *((uint32_t*)specializationConstant.mInstanceData)); + this->mInstanceData = (char*)malloc(sizeof(uint32_t)); + memcpy(this->mInstanceData, specializationConstant.mInstanceData, sizeof(uint32_t)); + } + // This class is required in absence of std::variant to ensure C++11 support + SpecializationConstant(uint32_t val) { + SPDLOG_DEBUG("Kompute SpecializationConstant uint32_t constructor: {}", val); + this->mInstanceData = (char*)malloc(sizeof(uint32_t)); + memcpy(this->mInstanceData, &val, sizeof(uint32_t)); + } + SpecializationConstant(float val) { + SPDLOG_DEBUG("Kompute SpecializationConstant float constructor: {}", val); + this->mInstanceData = (char*)malloc(sizeof(uint32_t)); + memcpy(this->mInstanceData, &val, sizeof(uint32_t)); + } + ~SpecializationConstant() { + free(this->mInstanceData); + } + void *data() { + return this->mInstanceData; + } + private: + // We use char pointer to enable for pointer arithmetic + char *mInstanceData = nullptr; + }; + + class SpecializationContainer { + public: + SpecializationContainer() { + SPDLOG_DEBUG("Kompute SpecializationContainer default initialiser"); + this->mFreeData = false; + } + + SpecializationContainer(const SpecializationContainer& specializationContainer) + { + SPDLOG_DEBUG("Kompute SpecializationContainer copy constructor, size: {}", specializationContainer.mSpecializationConstants.size()); + SpecializationContainer(specializationContainer.mSpecializationConstants); + } + + SpecializationContainer(std::vector instances) { + SPDLOG_DEBUG("Kompute SpecializationContainer initialiser with instances size {}", instances.size()); + + static_assert(sizeof(uint32_t) == sizeof(float) && sizeof(uint32_t) == sizeof(char) * 4, + "Kompute requires uint32_t and float to be of same size. Please report this to github."); + + // totalMemorySize depends on instances being set so this needs to be set before + this->mSpecializationConstants = instances; + + // Data has then to be allocated in order to copy memory into it + this->mData = (char*)malloc(this->totalMemorySize()); + + this->mFreeData = true; + + for (size_t i = 0; i < this->size(); i++) { + + memcpy(this->mData + (i * sizeof(uint32_t)), instances[i].data(), sizeof(uint32_t)); + } + } + + ~SpecializationContainer() { + SPDLOG_DEBUG("Kompute SpecializationContainer destructor started"); + + this->mSpecializationConstants.clear(); + + if (this->mFreeData) { + SPDLOG_DEBUG("Kompute SpecializationContainer freeing data"); + this->mFreeData = false; + free(this->mData); + } else { + SPDLOG_DEBUG("Kompute SpecializationContainer no data was freed"); + } + + SPDLOG_DEBUG("kompute SpecializationContainer freed data"); + } + + void *transferDataOwnership() { + SPDLOG_DEBUG("Kompute SpecializationContainer data transfer ownership requested"); + this->mFreeData = false; + return (void*)this->mData; + } + + uint32_t size() { + return this->mSpecializationConstants.size(); + } + + uint32_t totalMemorySize() { + return this->instanceMemorySize() * this->size(); + } + + uint32_t instanceMemorySize() { + // At this point only variables accepted are uint32_t and float which are same size + return sizeof(uint32_t); + } + + private: + + std::vector mSpecializationConstants; + bool mFreeData = false; + // We use char pointer to enable for pointer arithmetic + char *mData = nullptr; + }; +private: + // Private struct template which is then +public: /** Base constructor for Algorithm. Should not be used unless explicit intended. @@ -27,7 +135,8 @@ class Algorithm * shaders */ Algorithm(std::shared_ptr device, - std::shared_ptr commandBuffer); + std::shared_ptr commandBuffer, + const SpecializationContainer& specializationConstants = {}); /** * Initialiser for the shader data provided to the algorithm as well as @@ -35,6 +144,7 @@ class Algorithm * * @param shaderFileData The bytes in spir-v format of the shader * @tensorParams The Tensors to be used in the Algorithm / shader for + * @specalizationInstalces The specialization parameters to pass to the function * processing */ void init(const std::vector& shaderFileData, @@ -56,7 +166,7 @@ class Algorithm */ void recordDispatch(uint32_t x = 1, uint32_t y = 1, uint32_t z = 1); - private: +private: // -------------- NEVER OWNED RESOURCES std::shared_ptr mDevice; std::shared_ptr mCommandBuffer; @@ -77,9 +187,12 @@ class Algorithm std::shared_ptr mPipeline; bool mFreePipeline = false; + // -------------- ALWAYS OWNED RESOURCES + SpecializationContainer mSpecializationConstants; + // Create util functions void createShaderModule(const std::vector& shaderFileData); - void createPipeline(std::vector specializationData = {}); + void createPipeline(); // Parameters void createParameters(std::vector>& tensorParams); diff --git a/src/include/kompute/operations/OpAlgoBase.hpp b/src/include/kompute/operations/OpAlgoBase.hpp index 74108d285..484f91a88 100644 --- a/src/include/kompute/operations/OpAlgoBase.hpp +++ b/src/include/kompute/operations/OpAlgoBase.hpp @@ -49,7 +49,8 @@ class OpAlgoBase : public OpBase std::shared_ptr device, std::shared_ptr commandBuffer, std::vector>& tensors, - KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup()); + KomputeWorkgroup komputeWorkgroup = {}, + const Algorithm::SpecializationContainer& specializationConstants = {}); /** * Constructor that enables a file to be passed to the operation with @@ -68,7 +69,8 @@ class OpAlgoBase : public OpBase std::shared_ptr commandBuffer, std::vector>& tensors, std::string shaderFilePath, - KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup()); + KomputeWorkgroup komputeWorkgroup = {}, + const Algorithm::SpecializationContainer& specializationConstants = {}); /** * Constructor that enables raw shader data to be passed to the main operation @@ -86,7 +88,8 @@ class OpAlgoBase : public OpBase std::shared_ptr commandBuffer, std::vector>& tensors, const std::vector& shaderDataRaw, - KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup()); + KomputeWorkgroup komputeWorkgroup = {}, + const Algorithm::SpecializationContainer& specializationConstants = {}); /** * Default destructor, which is in charge of destroying the algorithm