diff --git a/src/OpAlgoBase.cpp b/src/OpAlgoBase.cpp new file mode 100644 index 000000000..99e3a9ac1 --- /dev/null +++ b/src/OpAlgoBase.cpp @@ -0,0 +1,162 @@ +#pragma once + +#include "kompute/operations/OpAlgoBase.hpp" + +namespace kp { + +OpAlgoBase::OpAlgoBase() +{ + SPDLOG_DEBUG("Kompute OpAlgoBase constructor base"); +} + +OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, + std::shared_ptr device, + std::shared_ptr commandBuffer, + std::vector>& tensors, + KomputeWorkgroup komputeWorkgroup) + : OpBase(physicalDevice, device, commandBuffer, tensors, false) +{ + SPDLOG_DEBUG("Kompute OpAlgoBase constructor with params numTensors: {}", tensors.size()); + + // The dispatch size is set up based on either explicitly provided template + // parameters or by default it would take the shape and size of the tensors + if (komputeWorkgroup.x > 0) { + // If at least the x value is provided we use mainly the parameters + // provided + this->mKomputeWorkgroup = { + 0, + komputeWorkgroup.y > 0 ? komputeWorkgroup.y : 1, + komputeWorkgroup.z > 0 ? komputeWorkgroup.z : 1 + }; + } else { + this->mKomputeWorkgroup = {tensors[0]->size(), 1, 1}; + } + SPDLOG_INFO("Kompute OpAlgoBase dispatch size X: {}, Y: {}, Z: {}", + this->mKomputeWorkgroup.x, + this->mKomputeWorkgroup.y, + this->mKomputeWorkgroup.z); + + this->mAlgorithm = std::make_shared(device, commandBuffer); +} + +OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, + std::shared_ptr device, + std::shared_ptr commandBuffer, + std::vector>& tensors, + std::string shaderFilePath, + KomputeWorkgroup komputeWorkgroup) + : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, komputeWorkgroup) +{ + SPDLOG_DEBUG("Kompute OpAlgoBase shaderFilePath constructo with shaderfile path: {}", shaderFilePath); + + this->mShaderFilePath = shaderFilePath; +} + +OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, + std::shared_ptr device, + std::shared_ptr commandBuffer, + std::vector>& tensors, + const std::vector& shaderDataRaw, + KomputeWorkgroup komputeWorkgroup) + : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, komputeWorkgroup) +{ + SPDLOG_DEBUG("Kompute OpAlgoBase shaderFilePath constructo with shader raw data length: {}", shaderDataRaw.size()); + + this->mShaderDataRaw = shaderDataRaw; +} + +OpAlgoBase::~OpAlgoBase() +{ + SPDLOG_DEBUG("Kompute OpAlgoBase destructor started"); +} + +void +OpAlgoBase::init() +{ + SPDLOG_DEBUG("Kompute OpAlgoBase init called"); + + if (this->mTensors.size() < 1) { + throw std::runtime_error( + "Kompute OpAlgoBase called with less than 1 tensor"); + } + + for (std::shared_ptr tensor : this->mTensors) { + if(!tensor->isInit()) { + throw std::runtime_error("Kompute OpAlgoBase validation failed; all tensor parameters must be initialised."); + } + } + + SPDLOG_DEBUG("Kompute OpAlgoBase fetching spirv data"); + + std::vector shaderFileData = this->fetchSpirvBinaryData(); + + SPDLOG_DEBUG("Kompute OpAlgoBase Initialising algorithm component"); + + this->mAlgorithm->init(shaderFileData, this->mTensors); +} + +void +OpAlgoBase::record() +{ + SPDLOG_DEBUG("Kompute OpAlgoBase record called"); + + // Barrier to ensure the data is finished writing to buffer memory + for (std::shared_ptr tensor : this->mTensors) { + tensor->recordBufferMemoryBarrier( + this->mCommandBuffer, + vk::AccessFlagBits::eHostWrite, + vk::AccessFlagBits::eShaderRead, + vk::PipelineStageFlagBits::eHost, + vk::PipelineStageFlagBits::eComputeShader); + } + + this->mAlgorithm->recordDispatch(this->mKomputeWorkgroup.x, this->mKomputeWorkgroup.y, this->mKomputeWorkgroup.z); +} + +void +OpAlgoBase::preEval() +{ + SPDLOG_DEBUG("Kompute OpAlgoBase preEval called"); +} + +void +OpAlgoBase::postEval() +{ + SPDLOG_DEBUG("Kompute OpAlgoBase postSubmit called"); +} + +std::vector OpAlgoBase::fetchSpirvBinaryData() +{ + SPDLOG_WARN( + "Kompute OpAlgoBase Running shaders directly from spirv file"); + + if (this->mShaderFilePath.size()) { + std::ifstream fileStream(this->mShaderFilePath, + std::ios::binary | std::ios::in | std::ios::ate); + + if (!fileStream.good()) { + throw std::runtime_error("Error reading file: " + this->mShaderFilePath); + } + + size_t shaderFileSize = fileStream.tellg(); + fileStream.seekg(0, std::ios::beg); + char* shaderDataRaw = new char[shaderFileSize]; + fileStream.read(shaderDataRaw, shaderFileSize); + fileStream.close(); + + SPDLOG_WARN( + "Kompute OpAlgoBase fetched {} bytes", shaderFileSize); + + return std::vector(shaderDataRaw, + shaderDataRaw + shaderFileSize); + } + else if (this->mShaderDataRaw.size()) { + return this->mShaderDataRaw; + } + else { + throw std::runtime_error("Kompute OpAlgoBase Error reached fetchSpirvBinaryData but neither filepath nor data provided"); + } +} + +} + diff --git a/src/OpAlgoLhsRhsOut.cpp b/src/OpAlgoLhsRhsOut.cpp new file mode 100644 index 000000000..444ec63a3 --- /dev/null +++ b/src/OpAlgoLhsRhsOut.cpp @@ -0,0 +1,129 @@ +#pragma once + +#include "kompute/operations/OpAlgoLhsRhsOut.hpp" + +namespace kp { + +OpAlgoLhsRhsOut::OpAlgoLhsRhsOut() +{ + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor base"); +} + +OpAlgoLhsRhsOut::OpAlgoLhsRhsOut(std::shared_ptr physicalDevice, + std::shared_ptr device, + std::shared_ptr commandBuffer, + std::vector> tensors, + KomputeWorkgroup komputeWorkgroup) + // The inheritance is initialised with the copyOutputData to false given that + // this depencendant class handles the transfer of data via staging buffers in + // a granular way. + : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, komputeWorkgroup) +{ + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor with params"); +} + +OpAlgoLhsRhsOut::~OpAlgoLhsRhsOut() +{ + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut destructor started"); +} + +void +OpAlgoLhsRhsOut::init() +{ + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut init called"); + + if (this->mTensors.size() < 3) { + throw std::runtime_error( + "Kompute OpAlgoLhsRhsOut called with less than 1 tensor"); + } else if (this->mTensors.size() > 3) { + SPDLOG_WARN("Kompute OpAlgoLhsRhsOut called with more than 3 this->mTensors"); + } + + this->mTensorLHS = this->mTensors[0]; + this->mTensorRHS = this->mTensors[1]; + this->mTensorOutput = this->mTensors[2]; + + + if (!(this->mTensorLHS->isInit() && this->mTensorRHS->isInit() && + this->mTensorOutput->isInit())) { + throw std::runtime_error( + "Kompute OpAlgoLhsRhsOut all tensor parameters must be initialised. LHS: " + + std::to_string(this->mTensorLHS->isInit()) + + " RHS: " + std::to_string(this->mTensorRHS->isInit()) + + " Output: " + std::to_string(this->mTensorOutput->isInit())); + } + + if (!(this->mTensorLHS->size() == this->mTensorRHS->size() && + this->mTensorRHS->size() == this->mTensorOutput->size())) { + throw std::runtime_error( + "Kompute OpAlgoLhsRhsOut all tensor parameters must be the same size LHS: " + + std::to_string(this->mTensorLHS->size()) + + " RHS: " + std::to_string(this->mTensorRHS->size()) + + " Output: " + std::to_string(this->mTensorOutput->size())); + } + + this->mTensorOutputStaging = std::make_shared( + this->mTensorOutput->data(), Tensor::TensorTypes::eStaging); + + this->mTensorOutputStaging->init( + this->mPhysicalDevice, this->mDevice); + + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut fetching spirv data"); + + std::vector shaderFileData = this->fetchSpirvBinaryData(); + + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut Initialising algorithm component"); + + this->mAlgorithm->init(shaderFileData, this->mTensors); +} + +void +OpAlgoLhsRhsOut::record() +{ + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut record called"); + + // Barrier to ensure the data is finished writing to buffer memory + this->mTensorLHS->recordBufferMemoryBarrier( + this->mCommandBuffer, + vk::AccessFlagBits::eHostWrite, + vk::AccessFlagBits::eShaderRead, + vk::PipelineStageFlagBits::eHost, + vk::PipelineStageFlagBits::eComputeShader); + this->mTensorRHS->recordBufferMemoryBarrier( + this->mCommandBuffer, + vk::AccessFlagBits::eHostWrite, + vk::AccessFlagBits::eShaderRead, + vk::PipelineStageFlagBits::eHost, + vk::PipelineStageFlagBits::eComputeShader); + + this->mAlgorithm->recordDispatch( + this->mKomputeWorkgroup.x, + this->mKomputeWorkgroup.y, + this->mKomputeWorkgroup.z); + + // Barrier to ensure the shader code is executed before buffer read + this->mTensorOutput->recordBufferMemoryBarrier( + this->mCommandBuffer, + vk::AccessFlagBits::eShaderWrite, + vk::AccessFlagBits::eTransferRead, + vk::PipelineStageFlagBits::eComputeShader, + vk::PipelineStageFlagBits::eTransfer); + + this->mTensorOutputStaging->recordCopyFrom( + this->mCommandBuffer, + this->mTensorOutput, + true); +} + +void +OpAlgoLhsRhsOut::postEval() +{ + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut postSubmit called"); + + this->mTensorOutputStaging->mapDataFromHostMemory(); + + this->mTensorOutput->setData(this->mTensorOutputStaging->data()); +} + +} + diff --git a/src/include/kompute/operations/OpAlgoBase.hpp b/src/include/kompute/operations/OpAlgoBase.hpp index 653006952..74108d285 100644 --- a/src/include/kompute/operations/OpAlgoBase.hpp +++ b/src/include/kompute/operations/OpAlgoBase.hpp @@ -17,20 +17,17 @@ namespace kp { * Operation that provides a general abstraction that simplifies the use of * algorithm and parameter components which can be used with shaders. * By default it enables the user to provide a dynamic number of tensors - * which are then passed as inputs. - * - * All of these tensors are expected to be initlaised and this is checked with throw std exception in the init function. - * - * See OpLhsRhsOut for an example implementation on a more specific granularity on tensor parameters. - * - * The template parameters specify the processing GPU layout number of - * iterations for each x, y, z parameter. More specifically, this will be the - * input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)" + * which are then passed as inputs. */ -template class OpAlgoBase : public OpBase { public: + struct KomputeWorkgroup { + uint32_t x; + uint32_t y; + uint32_t z; + }; + /** * Base constructor, should not be used unless explicitly intended. */ @@ -46,11 +43,13 @@ class OpAlgoBase : public OpBase * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation * @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format) + * @param komputeWorkgroup Optional parameter to specify the layout for processing */ OpAlgoBase(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, - std::vector>& tensors); + std::vector>& tensors, + KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup()); /** * Constructor that enables a file to be passed to the operation with @@ -61,13 +60,15 @@ class OpAlgoBase : public OpBase * @param device Vulkan logical device for passing to Algorithm * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation - * @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format) + * @param shaderFilePath Parameter to specify the shader to load (either in spirv or raw format) + * @param komputeWorkgroup Optional parameter to specify the layout for processing */ OpAlgoBase(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, std::vector>& tensors, - std::string shaderFilePath); + std::string shaderFilePath, + KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup()); /** * Constructor that enables raw shader data to be passed to the main operation @@ -78,12 +79,14 @@ class OpAlgoBase : public OpBase * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation * @param shaderDataRaw Optional parameter to specify the shader data either in binary or raw form + * @param komputeWorkgroup Optional parameter to specify the layout for processing */ OpAlgoBase(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, std::vector>& tensors, - const std::vector& shaderDataRaw); + const std::vector& shaderDataRaw, + KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup()); /** * Default destructor, which is in charge of destroying the algorithm @@ -131,9 +134,7 @@ class OpAlgoBase : public OpBase // -------------- ALWAYS OWNED RESOURCES - uint32_t mX; - uint32_t mY; - uint32_t mZ; + KomputeWorkgroup mKomputeWorkgroup; std::string mShaderFilePath; ///< Optional member variable which can be provided for the OpAlgoBase to find the data automatically and load for processing std::vector mShaderDataRaw; ///< Optional member variable which can be provided to contain either the raw shader content or the spirv binary content @@ -143,174 +144,3 @@ class OpAlgoBase : public OpBase } // End namespace kp -// Including implementation for template class -#ifndef OPALGOBASE_IMPL -#define OPALGOBASE_IMPL - -namespace kp { - -template -OpAlgoBase::OpAlgoBase() -{ - SPDLOG_DEBUG("Kompute OpAlgoBase constructor base"); -} - -template -OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector>& tensors) - : OpBase(physicalDevice, device, commandBuffer, tensors, false) -{ - SPDLOG_DEBUG("Kompute OpAlgoBase constructor with params numTensors: {}", tensors.size()); - - // The dispatch size is set up based on either explicitly provided template - // parameters or by default it would take the shape and size of the tensors - if (tX > 0) { - // If at least the x value is provided we use mainly the parameters - // provided - this->mX = tX; - this->mY = tY > 0 ? tY : 1; - this->mZ = tZ > 0 ? tZ : 1; - } else { - this->mX = tensors[0]->size(); - this->mY = 1; - this->mZ = 1; - } - SPDLOG_INFO("Kompute OpAlgoBase dispatch size X: {}, Y: {}, Z: {}", - this->mX, - this->mY, - this->mZ); - - this->mAlgorithm = std::make_shared(device, commandBuffer); -} - -template -OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector>& tensors, - std::string shaderFilePath) - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors) -{ - SPDLOG_DEBUG("Kompute OpAlgoBase shaderFilePath constructo with shaderfile path: {}", shaderFilePath); - - this->mShaderFilePath = shaderFilePath; -} - -template -OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector>& tensors, - const std::vector& shaderDataRaw) - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors) -{ - SPDLOG_DEBUG("Kompute OpAlgoBase shaderFilePath constructo with shader raw data length: {}", shaderDataRaw.size()); - - this->mShaderDataRaw = shaderDataRaw; -} - -template -OpAlgoBase::~OpAlgoBase() -{ - SPDLOG_DEBUG("Kompute OpAlgoBase destructor started"); -} - -template -void -OpAlgoBase::init() -{ - SPDLOG_DEBUG("Kompute OpAlgoBase init called"); - - if (this->mTensors.size() < 1) { - throw std::runtime_error( - "Kompute OpAlgoBase called with less than 1 tensor"); - } - - for (std::shared_ptr tensor : this->mTensors) { - if(!tensor->isInit()) { - throw std::runtime_error("Kompute OpAlgoBase validation failed; all tensor parameters must be initialised."); - } - } - - SPDLOG_DEBUG("Kompute OpAlgoBase fetching spirv data"); - - std::vector shaderFileData = this->fetchSpirvBinaryData(); - - SPDLOG_DEBUG("Kompute OpAlgoBase Initialising algorithm component"); - - this->mAlgorithm->init(shaderFileData, this->mTensors); -} - -template -void -OpAlgoBase::record() -{ - SPDLOG_DEBUG("Kompute OpAlgoBase record called"); - - // Barrier to ensure the data is finished writing to buffer memory - for (std::shared_ptr tensor : this->mTensors) { - tensor->recordBufferMemoryBarrier( - this->mCommandBuffer, - vk::AccessFlagBits::eHostWrite, - vk::AccessFlagBits::eShaderRead, - vk::PipelineStageFlagBits::eHost, - vk::PipelineStageFlagBits::eComputeShader); - } - - this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ); -} - -template -void -OpAlgoBase::preEval() -{ - SPDLOG_DEBUG("Kompute OpAlgoBase preEval called"); -} - -template -void -OpAlgoBase::postEval() -{ - SPDLOG_DEBUG("Kompute OpAlgoBase postSubmit called"); -} - -template -std::vector OpAlgoBase::fetchSpirvBinaryData() -{ - SPDLOG_WARN( - "Kompute OpAlgoBase Running shaders directly from spirv file"); - - if (this->mShaderFilePath.size()) { - std::ifstream fileStream(this->mShaderFilePath, - std::ios::binary | std::ios::in | std::ios::ate); - - if (!fileStream.good()) { - throw std::runtime_error("Error reading file: " + this->mShaderFilePath); - } - - size_t shaderFileSize = fileStream.tellg(); - fileStream.seekg(0, std::ios::beg); - char* shaderDataRaw = new char[shaderFileSize]; - fileStream.read(shaderDataRaw, shaderFileSize); - fileStream.close(); - - SPDLOG_WARN( - "Kompute OpAlgoBase fetched {} bytes", shaderFileSize); - - return std::vector(shaderDataRaw, - shaderDataRaw + shaderFileSize); - } - else if (this->mShaderDataRaw.size()) { - return this->mShaderDataRaw; - } - else { - throw std::runtime_error("Kompute OpAlgoBase Error reached fetchSpirvBinaryData but neither filepath nor data provided"); - } -} - -} - -#endif // #ifndef OPALGOBASE_IMPL - diff --git a/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp b/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp index 5c22bdcc6..c826bd324 100644 --- a/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp +++ b/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp @@ -15,12 +15,8 @@ namespace kp { * Operation base class to simplify the creation of operations that require * right hand and left hand side datapoints together with a single output. * The expected data passed is two input tensors and one output tensor. - * The template parameters specify the processing GPU layout number of - * iterations for each x, y, z parameter. More specifically, this will be the - * input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)" */ -template -class OpAlgoLhsRhsOut : public OpAlgoBase +class OpAlgoLhsRhsOut : public OpAlgoBase { public: /** @@ -38,11 +34,13 @@ class OpAlgoLhsRhsOut : public OpAlgoBase * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation * @param freeTensors Whether operation manages the memory of the Tensors + * @param komputeWorkgroup Optional parameter to specify the layout for processing */ OpAlgoLhsRhsOut(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, - std::vector> tensors); + std::vector> tensors, + KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup()); /** * Default destructor, which is in charge of destroying the algorithm @@ -73,7 +71,7 @@ class OpAlgoLhsRhsOut : public OpAlgoBase * of the GPU Device memory into the staging buffer so the output data can * be retrieved. */ - virtual void postSubmit() override; + virtual void postEval() override; protected: // -------------- NEVER OWNED RESOURCES @@ -87,136 +85,3 @@ class OpAlgoLhsRhsOut : public OpAlgoBase } // End namespace kp -// Including implementation for template class -#ifndef OPALGOLHSRHSOUT_CPP -#define OPALGOLHSRHSOUT_CPP - -namespace kp { - -template -OpAlgoLhsRhsOut::OpAlgoLhsRhsOut() -{ - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor base"); -} - -template -OpAlgoLhsRhsOut::OpAlgoLhsRhsOut(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector> tensors) - // The inheritance is initialised with the copyOutputData to false given that - // this depencendant class handles the transfer of data via staging buffers in - // a granular way. - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors) -{ - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor with params"); -} - -template -OpAlgoLhsRhsOut::~OpAlgoLhsRhsOut() -{ - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut destructor started"); -} - -template -void -OpAlgoLhsRhsOut::init() -{ - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut init called"); - - if (this->mTensors.size() < 3) { - throw std::runtime_error( - "Kompute OpAlgoLhsRhsOut called with less than 1 tensor"); - } else if (this->mTensors.size() > 3) { - SPDLOG_WARN("Kompute OpAlgoLhsRhsOut called with more than 3 this->mTensors"); - } - - this->mTensorLHS = this->mTensors[0]; - this->mTensorRHS = this->mTensors[1]; - this->mTensorOutput = this->mTensors[2]; - - - if (!(this->mTensorLHS->isInit() && this->mTensorRHS->isInit() && - this->mTensorOutput->isInit())) { - throw std::runtime_error( - "Kompute OpAlgoLhsRhsOut all tensor parameters must be initialised. LHS: " + - std::to_string(this->mTensorLHS->isInit()) + - " RHS: " + std::to_string(this->mTensorRHS->isInit()) + - " Output: " + std::to_string(this->mTensorOutput->isInit())); - } - - if (!(this->mTensorLHS->size() == this->mTensorRHS->size() && - this->mTensorRHS->size() == this->mTensorOutput->size())) { - throw std::runtime_error( - "Kompute OpAlgoLhsRhsOut all tensor parameters must be the same size LHS: " + - std::to_string(this->mTensorLHS->size()) + - " RHS: " + std::to_string(this->mTensorRHS->size()) + - " Output: " + std::to_string(this->mTensorOutput->size())); - } - - this->mTensorOutputStaging = std::make_shared( - this->mTensorOutput->data(), Tensor::TensorTypes::eStaging); - - this->mTensorOutputStaging->init( - this->mPhysicalDevice, this->mDevice, this->mCommandBuffer); - - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut fetching spirv data"); - - std::vector shaderFileData = this->fetchSpirvBinaryData(); - - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut Initialising algorithm component"); - - this->mAlgorithm->init(shaderFileData, this->mTensors); -} - -template -void -OpAlgoLhsRhsOut::record() -{ - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut record called"); - - // Barrier to ensure the data is finished writing to buffer memory - this->mTensorLHS->recordBufferMemoryBarrier( - this->mCommandBuffer, - vk::AccessFlagBits::eHostWrite, - vk::AccessFlagBits::eShaderRead, - vk::PipelineStageFlagBits::eHost, - vk::PipelineStageFlagBits::eComputeShader); - this->mTensorRHS->recordBufferMemoryBarrier( - this->mCommandBuffer, - vk::AccessFlagBits::eHostWrite, - vk::AccessFlagBits::eShaderRead, - vk::PipelineStageFlagBits::eHost, - vk::PipelineStageFlagBits::eComputeShader); - - this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ); - - // Barrier to ensure the shader code is executed before buffer read - this->mTensorOutput->recordBufferMemoryBarrier( - this->mCommandBuffer, - vk::AccessFlagBits::eShaderWrite, - vk::AccessFlagBits::eTransferRead, - vk::PipelineStageFlagBits::eComputeShader, - vk::PipelineStageFlagBits::eTransfer); - - this->mTensorOutputStaging->recordCopyFrom( - this->mCommandBuffer, - this->mTensorOutput, - true); -} - -template -void -OpAlgoLhsRhsOut::postSubmit() -{ - SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut postSubmit called"); - - this->mTensorOutputStaging->mapDataFromHostMemory(); - - this->mTensorOutput->setData(this->mTensorOutputStaging->data()); -} - -} - -#endif // #ifndef OPALGOLHSRHSOUT_CPP - diff --git a/src/include/kompute/operations/OpMult.hpp b/src/include/kompute/operations/OpMult.hpp index ba3cb21a0..f555f8ac1 100644 --- a/src/include/kompute/operations/OpMult.hpp +++ b/src/include/kompute/operations/OpMult.hpp @@ -17,12 +17,9 @@ namespace kp { /** * Operation that performs multiplication on two tensors and outpus on third - * tensor. The template parameters specify the processing GPU layout number of - * iterations for each x, y, z parameter. More specifically, this will be the - * input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)" + * tensor. */ -template -class OpMult : public OpAlgoBase +class OpMult : public OpAlgoBase { public: /** @@ -41,13 +38,14 @@ class OpMult : public OpAlgoBase * @param device Vulkan logical device for passing to Algorithm * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation - * @param freeTensors Whether operation manages the memory of the Tensors + * @param komputeWorkgroup Optional parameter to specify the layout for processing */ OpMult(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, - std::vector> tensors) - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, "") + std::vector> tensors, + KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup()) + : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, "", komputeWorkgroup) { SPDLOG_DEBUG("Kompute OpMult constructor with params"); @@ -58,14 +56,8 @@ class OpMult : public OpAlgoBase #if RELEASE /** - * If release it will be using the static version of the shader which is - * loaded using this file directly. - * - * @param physicalDevice Vulkan physical device used to find device queues - * @param device Vulkan logical device for passing to Algorithm - * @param commandBuffer Vulkan Command Buffer to record commands into - * @param tensors Tensors that are to be used in this operation - * @param freeTensors Whether operation manages the memory of the Tensors + * If RELEASE=1 it will be using the static version of the shader which is + * loaded using this file directly. Otherwise it should not override the function. */ std::vector fetchSpirvBinaryData() override {