From b0d394a50b6f7f633f41073d75d4774b0bb4fe99 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 1 Nov 2020 16:29:27 +0000 Subject: [PATCH] Updated single include with non-templated opalgobase classes --- single_include/kompute/Kompute.hpp | 374 +++-------------------------- 1 file changed, 31 insertions(+), 343 deletions(-) diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index 8def06e4a..382b7131d 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -1620,20 +1620,17 @@ namespace kp { * Operation that provides a general abstraction that simplifies the use of * algorithm and parameter components which can be used with shaders. * By default it enables the user to provide a dynamic number of tensors - * which are then passed as inputs. - * - * All of these tensors are expected to be initlaised and this is checked with throw std exception in the init function. - * - * See OpLhsRhsOut for an example implementation on a more specific granularity on tensor parameters. - * - * The template parameters specify the processing GPU layout number of - * iterations for each x, y, z parameter. More specifically, this will be the - * input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)" + * which are then passed as inputs. */ -template class OpAlgoBase : public OpBase { public: + struct KomputeWorkgroup { + uint32_t x; + uint32_t y; + uint32_t z; + }; + /** * Base constructor, should not be used unless explicitly intended. */ @@ -1649,11 +1646,13 @@ class OpAlgoBase : public OpBase * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation * @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format) + * @param komputeWorkgroup Optional parameter to specify the layout for processing */ OpAlgoBase(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, - std::vector>& tensors); + std::vector>& tensors, + KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup()); /** * Constructor that enables a file to be passed to the operation with @@ -1664,13 +1663,15 @@ class OpAlgoBase : public OpBase * @param device Vulkan logical device for passing to Algorithm * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation - * @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format) + * @param shaderFilePath Parameter to specify the shader to load (either in spirv or raw format) + * @param komputeWorkgroup Optional parameter to specify the layout for processing */ OpAlgoBase(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, std::vector>& tensors, - std::string shaderFilePath); + std::string shaderFilePath, + KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup()); /** * Constructor that enables raw shader data to be passed to the main operation @@ -1681,12 +1682,14 @@ class OpAlgoBase : public OpBase * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation * @param shaderDataRaw Optional parameter to specify the shader data either in binary or raw form + * @param komputeWorkgroup Optional parameter to specify the layout for processing */ OpAlgoBase(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, std::vector>& tensors, - const std::vector& shaderDataRaw); + const std::vector& shaderDataRaw, + KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup()); /** * Default destructor, which is in charge of destroying the algorithm @@ -1733,9 +1736,7 @@ class OpAlgoBase : public OpBase // -------------- ALWAYS OWNED RESOURCES - uint32_t mX; - uint32_t mY; - uint32_t mZ; + KomputeWorkgroup mKomputeWorkgroup; std::string mShaderFilePath; ///< Optional member variable which can be provided for the OpAlgoBase to find the data automatically and load for processing std::vector mShaderDataRaw; ///< Optional member variable which can be provided to contain either the raw shader content or the spirv binary content @@ -1745,177 +1746,6 @@ class OpAlgoBase : public OpBase } // End namespace kp -// Including implementation for template class -#ifndef OPALGOBASE_IMPL -#define OPALGOBASE_IMPL - -namespace kp { - -template -OpAlgoBase::OpAlgoBase() -{ - SPDLOG_DEBUG("Kompute OpAlgoBase constructor base"); -} - -template -OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector>& tensors) - : OpBase(physicalDevice, device, commandBuffer, tensors, false) -{ - SPDLOG_DEBUG("Kompute OpAlgoBase constructor with params numTensors: {}", tensors.size()); - - // The dispatch size is set up based on either explicitly provided template - // parameters or by default it would take the shape and size of the tensors - if (tX > 0) { - // If at least the x value is provided we use mainly the parameters - // provided - this->mX = tX; - this->mY = tY > 0 ? tY : 1; - this->mZ = tZ > 0 ? tZ : 1; - } else { - this->mX = tensors[0]->size(); - this->mY = 1; - this->mZ = 1; - } - SPDLOG_INFO("Kompute OpAlgoBase dispatch size X: {}, Y: {}, Z: {}", - this->mX, - this->mY, - this->mZ); - - this->mAlgorithm = std::make_shared(device, commandBuffer); -} - -template -OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector>& tensors, - std::string shaderFilePath) - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors) -{ - SPDLOG_DEBUG("Kompute OpAlgoBase shaderFilePath constructo with shaderfile path: {}", shaderFilePath); - - this->mShaderFilePath = shaderFilePath; -} - -template -OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector>& tensors, - const std::vector& shaderDataRaw) - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors) -{ - SPDLOG_DEBUG("Kompute OpAlgoBase shaderFilePath constructo with shader raw data length: {}", shaderDataRaw.size()); - - this->mShaderDataRaw = shaderDataRaw; -} - -template -OpAlgoBase::~OpAlgoBase() -{ - SPDLOG_DEBUG("Kompute OpAlgoBase destructor started"); -} - -template -void -OpAlgoBase::init() -{ - SPDLOG_DEBUG("Kompute OpAlgoBase init called"); - - if (this->mTensors.size() < 1) { - throw std::runtime_error( - "Kompute OpAlgoBase called with less than 1 tensor"); - } - - for (std::shared_ptr tensor : this->mTensors) { - if(!tensor->isInit()) { - throw std::runtime_error("Kompute OpAlgoBase validation failed; all tensor parameters must be initialised."); - } - } - - SPDLOG_DEBUG("Kompute OpAlgoBase fetching spirv data"); - - std::vector shaderFileData = this->fetchSpirvBinaryData(); - - SPDLOG_DEBUG("Kompute OpAlgoBase Initialising algorithm component"); - - this->mAlgorithm->init(shaderFileData, this->mTensors); -} - -template -void -OpAlgoBase::record() -{ - SPDLOG_DEBUG("Kompute OpAlgoBase record called"); - - // Barrier to ensure the data is finished writing to buffer memory - for (std::shared_ptr tensor : this->mTensors) { - tensor->recordBufferMemoryBarrier( - this->mCommandBuffer, - vk::AccessFlagBits::eHostWrite, - vk::AccessFlagBits::eShaderRead, - vk::PipelineStageFlagBits::eHost, - vk::PipelineStageFlagBits::eComputeShader); - } - - this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ); -} - -template -void -OpAlgoBase::preEval() -{ - SPDLOG_DEBUG("Kompute OpAlgoBase preEval called"); -} - -template -void -OpAlgoBase::postEval() -{ - SPDLOG_DEBUG("Kompute OpAlgoBase postSubmit called"); -} - -template -std::vector OpAlgoBase::fetchSpirvBinaryData() -{ - SPDLOG_WARN( - "Kompute OpAlgoBase Running shaders directly from spirv file"); - - if (this->mShaderFilePath.size()) { - std::ifstream fileStream(this->mShaderFilePath, - std::ios::binary | std::ios::in | std::ios::ate); - - if (!fileStream.good()) { - throw std::runtime_error("Error reading file: " + this->mShaderFilePath); - } - - size_t shaderFileSize = fileStream.tellg(); - fileStream.seekg(0, std::ios::beg); - char* shaderDataRaw = new char[shaderFileSize]; - fileStream.read(shaderDataRaw, shaderFileSize); - fileStream.close(); - - SPDLOG_WARN( - "Kompute OpAlgoBase fetched {} bytes", shaderFileSize); - - return std::vector(shaderDataRaw, - shaderDataRaw + shaderFileSize); - } - else if (this->mShaderDataRaw.size()) { - return this->mShaderDataRaw; - } - else { - throw std::runtime_error("Kompute OpAlgoBase Error reached fetchSpirvBinaryData but neither filepath nor data provided"); - } -} - -} - -#endif // #ifndef OPALGOBASE_IMPL - #include namespace kp { @@ -1924,12 +1754,8 @@ namespace kp { * Operation base class to simplify the creation of operations that require * right hand and left hand side datapoints together with a single output. * The expected data passed is two input tensors and one output tensor. - * The template parameters specify the processing GPU layout number of - * iterations for each x, y, z parameter. More specifically, this will be the - * input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)" */ -template -class OpAlgoLhsRhsOut : public OpAlgoBase +class OpAlgoLhsRhsOut : public OpAlgoBase { public: /** @@ -1947,11 +1773,13 @@ class OpAlgoLhsRhsOut : public OpAlgoBase * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation * @param freeTensors Whether operation manages the memory of the Tensors + * @param komputeWorkgroup Optional parameter to specify the layout for processing */ OpAlgoLhsRhsOut(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, - std::vector> tensors); + std::vector> tensors, + KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup()); /** * Default destructor, which is in charge of destroying the algorithm @@ -1982,7 +1810,7 @@ class OpAlgoLhsRhsOut : public OpAlgoBase * of the GPU Device memory into the staging buffer so the output data can * be retrieved. */ - virtual void postSubmit() override; + virtual void postEval() override; protected: // -------------- NEVER OWNED RESOURCES @@ -1996,138 +1824,6 @@ class OpAlgoLhsRhsOut : public OpAlgoBase } // End namespace kp -// Including implementation for template class -#ifndef OPALGOLHSRHSOUT_CPP -#define OPALGOLHSRHSOUT_CPP - -namespace kp { - -template -OpAlgoLhsRhsOut::OpAlgoLhsRhsOut() -{ - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor base"); -} - -template -OpAlgoLhsRhsOut::OpAlgoLhsRhsOut(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector> tensors) - // The inheritance is initialised with the copyOutputData to false given that - // this depencendant class handles the transfer of data via staging buffers in - // a granular way. - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors) -{ - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor with params"); -} - -template -OpAlgoLhsRhsOut::~OpAlgoLhsRhsOut() -{ - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut destructor started"); -} - -template -void -OpAlgoLhsRhsOut::init() -{ - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut init called"); - - if (this->mTensors.size() < 3) { - throw std::runtime_error( - "Kompute OpAlgoLhsRhsOut called with less than 1 tensor"); - } else if (this->mTensors.size() > 3) { - SPDLOG_WARN("Kompute OpAlgoLhsRhsOut called with more than 3 this->mTensors"); - } - - this->mTensorLHS = this->mTensors[0]; - this->mTensorRHS = this->mTensors[1]; - this->mTensorOutput = this->mTensors[2]; - - if (!(this->mTensorLHS->isInit() && this->mTensorRHS->isInit() && - this->mTensorOutput->isInit())) { - throw std::runtime_error( - "Kompute OpAlgoLhsRhsOut all tensor parameters must be initialised. LHS: " + - std::to_string(this->mTensorLHS->isInit()) + - " RHS: " + std::to_string(this->mTensorRHS->isInit()) + - " Output: " + std::to_string(this->mTensorOutput->isInit())); - } - - if (!(this->mTensorLHS->size() == this->mTensorRHS->size() && - this->mTensorRHS->size() == this->mTensorOutput->size())) { - throw std::runtime_error( - "Kompute OpAlgoLhsRhsOut all tensor parameters must be the same size LHS: " + - std::to_string(this->mTensorLHS->size()) + - " RHS: " + std::to_string(this->mTensorRHS->size()) + - " Output: " + std::to_string(this->mTensorOutput->size())); - } - - this->mTensorOutputStaging = std::make_shared( - this->mTensorOutput->data(), Tensor::TensorTypes::eStaging); - - this->mTensorOutputStaging->init( - this->mPhysicalDevice, this->mDevice, this->mCommandBuffer); - - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut fetching spirv data"); - - std::vector shaderFileData = this->fetchSpirvBinaryData(); - - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut Initialising algorithm component"); - - this->mAlgorithm->init(shaderFileData, this->mTensors); -} - -template -void -OpAlgoLhsRhsOut::record() -{ - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut record called"); - - // Barrier to ensure the data is finished writing to buffer memory - this->mTensorLHS->recordBufferMemoryBarrier( - this->mCommandBuffer, - vk::AccessFlagBits::eHostWrite, - vk::AccessFlagBits::eShaderRead, - vk::PipelineStageFlagBits::eHost, - vk::PipelineStageFlagBits::eComputeShader); - this->mTensorRHS->recordBufferMemoryBarrier( - this->mCommandBuffer, - vk::AccessFlagBits::eHostWrite, - vk::AccessFlagBits::eShaderRead, - vk::PipelineStageFlagBits::eHost, - vk::PipelineStageFlagBits::eComputeShader); - - this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ); - - // Barrier to ensure the shader code is executed before buffer read - this->mTensorOutput->recordBufferMemoryBarrier( - this->mCommandBuffer, - vk::AccessFlagBits::eShaderWrite, - vk::AccessFlagBits::eTransferRead, - vk::PipelineStageFlagBits::eComputeShader, - vk::PipelineStageFlagBits::eTransfer); - - this->mTensorOutputStaging->recordCopyFrom( - this->mCommandBuffer, - this->mTensorOutput, - true); -} - -template -void -OpAlgoLhsRhsOut::postSubmit() -{ - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut postSubmit called"); - - this->mTensorOutputStaging->mapDataFromHostMemory(); - - this->mTensorOutput->setData(this->mTensorOutputStaging->data()); -} - -} - -#endif // #ifndef OPALGOLHSRHSOUT_CPP - #include #if RELEASE @@ -2138,12 +1834,9 @@ namespace kp { /** * Operation that performs multiplication on two tensors and outpus on third - * tensor. The template parameters specify the processing GPU layout number of - * iterations for each x, y, z parameter. More specifically, this will be the - * input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)" + * tensor. */ -template -class OpMult : public OpAlgoBase +class OpMult : public OpAlgoBase { public: /** @@ -2162,13 +1855,14 @@ class OpMult : public OpAlgoBase * @param device Vulkan logical device for passing to Algorithm * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation - * @param freeTensors Whether operation manages the memory of the Tensors + * @param komputeWorkgroup Optional parameter to specify the layout for processing */ OpMult(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, - std::vector> tensors) - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, "") + std::vector> tensors, + KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup()) + : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, "", komputeWorkgroup) { SPDLOG_DEBUG("Kompute OpMult constructor with params"); @@ -2179,14 +1873,8 @@ class OpMult : public OpAlgoBase #if RELEASE /** - * If release it will be using the static version of the shader which is - * loaded using this file directly. - * - * @param physicalDevice Vulkan physical device used to find device queues - * @param device Vulkan logical device for passing to Algorithm - * @param commandBuffer Vulkan Command Buffer to record commands into - * @param tensors Tensors that are to be used in this operation - * @param freeTensors Whether operation manages the memory of the Tensors + * If RELEASE=1 it will be using the static version of the shader which is + * loaded using this file directly. Otherwise it should not override the function. */ std::vector fetchSpirvBinaryData() override {