Created OpAlgoBase file to provide abstraction to files that implement algorithm classes

This commit is contained in:
Alejandro Saucedo 2020-08-29 15:37:34 +01:00
parent 68e46abc7a
commit a2efc441db
7 changed files with 429 additions and 127 deletions

View file

@ -15,7 +15,7 @@
<td>
<h1>Vulkan Kompute</h1>
<h3>The General Purpose Vulkan Compute Framework</h3>
<h3>The General Purpose Vulkan Compute Framework. Blazing fast, lightweight, easy to set up and optimized for advanced data processing usecases.</h3>
</td>

View file

@ -3,6 +3,7 @@
#include "kompute/Manager.hpp"
#include "kompute/Sequence.hpp"
#include "kompute/operations/OpBase.hpp"
#include "kompute/operations/OpAlgoBase.hpp"
#include "kompute/operations/OpMult.hpp"
#include "kompute/operations/OpCreateTensor.hpp"
#include "kompute/Algorithm.hpp"

View file

@ -281,15 +281,18 @@ class Tensor
void mapDataIntoHostMemory();
private:
// -------------- NEVER OWNED RESOURCES
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
std::shared_ptr<vk::Device> mDevice;
std::shared_ptr<vk::CommandBuffer> mCommandBuffer;
// -------------- OPTIONALLY OWNED RESOURCES
std::shared_ptr<vk::Buffer> mBuffer;
bool mFreeBuffer;
std::shared_ptr<vk::DeviceMemory> mMemory;
bool mFreeMemory;
// -------------- ALWAYS OWNED RESOURCES
std::vector<uint32_t> mData;
TensorTypes mTensorType = TensorTypes::eDevice;
@ -297,8 +300,7 @@ class Tensor
std::array<uint32_t, KP_MAX_DIM_SIZE> mShape;
bool mIsInit = false;
// Creates the vulkan buffer
void createBuffer();
void createBuffer(); // Creates the vulkan buffer
// Private util functions
vk::BufferUsageFlags getBufferUsageFlags();
@ -400,19 +402,19 @@ class OpBase
virtual void postSubmit() = 0;
protected:
// OPTIONALLY OWNED RESOURCES
std::vector<std::shared_ptr<Tensor>>
mTensors; ///< Tensors referenced by operation that can be managed
///< optionally by operation
bool mFreeTensors = false; ///< Explicit boolean that specifies whether the
///< tensors are freed (if they are managed)
// NEVER OWNED RESOURCES
// -------------- NEVER OWNED RESOURCES
std::shared_ptr<vk::PhysicalDevice>
mPhysicalDevice; ///< Vulkan Physical Device
std::shared_ptr<vk::Device> mDevice; ///< Vulkan Logical Device
std::shared_ptr<vk::CommandBuffer>
mCommandBuffer; ///< Vulkan Command Buffer
// -------------- OPTIONALLY OWNED RESOURCES
std::vector<std::shared_ptr<Tensor>>
mTensors; ///< Tensors referenced by operation that can be managed
///< optionally by operation
bool mFreeTensors = false; ///< Explicit boolean that specifies whether the
///< tensors are freed (if they are managed)
};
} // End namespace kp
@ -517,10 +519,13 @@ class Sequence
}
private:
// -------------- NEVER OWNED RESOURCES
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice = nullptr;
std::shared_ptr<vk::Device> mDevice = nullptr;
std::shared_ptr<vk::Queue> mComputeQueue = nullptr;
uint32_t mQueueIndex = -1;
// -------------- OPTIONALLY OWNED RESOURCES
std::shared_ptr<vk::CommandPool> mCommandPool = nullptr;
bool mFreeCommandPool = false;
std::shared_ptr<vk::CommandBuffer> mCommandBuffer = nullptr;
@ -618,6 +623,7 @@ class Manager
}
private:
// -------------- OPTIONALLY OWNED RESOURCES
std::shared_ptr<vk::Instance> mInstance = nullptr;
bool mFreeInstance = false;
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice = nullptr;
@ -627,7 +633,7 @@ class Manager
uint32_t mComputeQueueFamilyIndex = -1;
std::shared_ptr<vk::Queue> mComputeQueue = nullptr;
// Always owned resources
// -------------- ALWAYS OWNED RESOURCES
std::unordered_map<std::string, std::shared_ptr<Sequence>>
mManagedSequences;
@ -691,16 +697,15 @@ class Algorithm
void recordDispatch(uint32_t x = 1, uint32_t y = 1, uint32_t z = 1);
private:
// Never Owned Resources
// -------------- NEVER OWNED RESOURCES
std::shared_ptr<vk::Device> mDevice;
std::shared_ptr<vk::CommandBuffer> mCommandBuffer;
// Optionally owned resources
// -------------- OPTIONALLY OWNED RESOURCES
std::shared_ptr<vk::DescriptorSetLayout> mDescriptorSetLayout;
bool mFreeDescriptorSetLayout = false;
std::shared_ptr<vk::DescriptorPool> mDescriptorPool;
bool mFreeDescriptorPool = false;
// TODO: Explore design for multiple descriptor sets
std::shared_ptr<vk::DescriptorSet> mDescriptorSet;
bool mFreeDescriptorSet = false;
@ -726,6 +731,195 @@ class Algorithm
namespace kp {
/**
* Operation that provides a general abstraction that simplifies the use of
* algorithm and parameter components which can be used with shaders.
* The template parameters specify the processing GPU layout number of
* iterations for each x, y, z parameter. More specifically, this will be the
* input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)"
*/
template<uint32_t tX = 0, uint32_t tY = 0, uint32_t tZ = 0>
class OpAlgoBase : public OpBase
{
public:
/**
* Base constructor, should not be used unless explicitly intended.
*/
OpAlgoBase();
/**
* Default constructor with parameters that provides the bare minimum
* requirements for the operations to be able to create and manage their
* sub-components.
*
* @param physicalDevice Vulkan physical device used to find device queues
* @param device Vulkan logical device for passing to Algorithm
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that are to be used in this operation
*/
OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors);
/**
* Default destructor, which is in charge of destroying the algorithm
* components but does not destroy the underlying tensors
*/
~OpAlgoBase();
/**
* The init function is responsible for the initialisation of the algorithm
* component based on the parameters specified, and allows for extensibility
* on the options provided. Further dependent classes can perform more
* specific checks such as ensuring tensors provided are initialised, etc.
*/
virtual void init() override;
/**
* This records the commands that are to be sent to the GPU. This includes
* the barriers that ensure the memory has been copied before going in and
* out of the shader, as well as the dispatch operation that sends the
* shader processing to the gpu. This function also records the GPU memory
* copy of the output data for the staging bufffer so it can be read by the
* host.
*/
virtual void record() override;
/**
* Executes after the recorded commands are submitted, and performs a copy
* of the GPU Device memory into the staging buffer so the output data can
* be retrieved.
*/
virtual void postSubmit() override;
protected:
// -------------- NEVER OWNED RESOURCES
// -------------- OPTIONALLY OWNED RESOURCES
std::shared_ptr<Algorithm> mAlgorithm;
bool mFreeAlgorithm = false;
// -------------- ALWAYS OWNED RESOURCES
uint32_t mX;
uint32_t mY;
uint32_t mZ;
std::string mOptSpirvBinPath; ///< Optional member variable which can be provided for the OpAlgoBase to find the data automatically and load for processing
virtual std::vector<char> fetchSpirvBinaryData();
};
} // End namespace kp
// Including implemenation for template class
#ifndef OPALGOBASE_IMPL
#define OPALGOBASE_IMPL
namespace kp {
template<uint32_t tX, uint32_t tY, uint32_t tZ>
OpAlgoBase<tX, tY, tZ>::OpAlgoBase()
{
SPDLOG_DEBUG("Kompute OpAlgoBase constructor base");
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
OpAlgoBase<tX, tY, tZ>::OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors)
: OpBase(physicalDevice, device, commandBuffer, tensors, false)
{
SPDLOG_DEBUG("Kompute OpAlgoBase constructor with params");
// The dispatch size is set up based on either explicitly provided template
// parameters or by default it would take the shape and size of the tensors
if (tX > 0) {
// If at least the x value is provided we use mainly the parameters
// provided
this->mX = tX;
this->mY = tY > 0 ? tY : 1;
this->mZ = tZ > 0 ? tZ : 1;
} else {
// TODO: If tensor empty vector exception would be thrown
// TODO: Fully support the full size dispatch using size for the shape
this->mX = tensors[0]->size();
this->mY = 1;
this->mZ = 1;
}
spdlog::info("Kompute OpAlgoBase dispatch size X: {}, Y: {}, Z: {}",
this->mX,
this->mY,
this->mZ);
this->mAlgorithm = std::make_shared<Algorithm>(device, commandBuffer);
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
OpAlgoBase<tX, tY, tZ>::~OpAlgoBase()
{
SPDLOG_DEBUG("Kompute OpAlgoBase destructor started");
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
std::vector<char> OpAlgoBase<tX, tY, tZ>::fetchSpirvBinaryData()
{
SPDLOG_WARN(
"Kompute OpAlgoBase Running shaders directly from spirv file");
std::ifstream fileStream(this->mOptSpirvBinPath,
std::ios::binary | std::ios::in | std::ios::ate);
size_t shaderFileSize = fileStream.tellg();
fileStream.seekg(0, std::ios::beg);
char* shaderDataRaw = new char[shaderFileSize];
fileStream.read(shaderDataRaw, shaderFileSize);
fileStream.close();
return std::vector<char>(shaderDataRaw,
shaderDataRaw + shaderFileSize);
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
void
OpAlgoBase<tX, tY, tZ>::init()
{
SPDLOG_DEBUG("Kompute OpAlgoBase init called");
std::vector<char> shaderFileData = this->fetchSpirvBinaryData();
this->mAlgorithm->init(shaderFileData, this->mTensors);
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
void
OpAlgoBase<tX, tY, tZ>::record()
{
SPDLOG_DEBUG("Kompute OpAlgoBase record called");
this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ);
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
void
OpAlgoBase<tX, tY, tZ>::postSubmit()
{
SPDLOG_DEBUG("Kompute OpAlgoBase postSubmit called");
}
}
#endif // #ifndef OPALGOBASE_IMPL
#include <fstream>
#if RELEASE
#endif
namespace kp {
/**
* Operation that performs multiplication on two tensors and outpus on third
* tensor. The template parameters specify the processing GPU layout number of
@ -733,7 +927,7 @@ namespace kp {
* input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)"
*/
template<uint32_t tX = 0, uint32_t tY = 0, uint32_t tZ = 0>
class OpMult : public OpBase
class OpMult : public OpAlgoBase<tX, tY, tZ>
{
public:
/**
@ -755,8 +949,7 @@ class OpMult : public OpBase
OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors,
bool freeTensors = false);
std::vector<std::shared_ptr<Tensor>>& tensors);
/**
* Default destructor, which is in charge of destroying the algorithm
@ -790,21 +983,13 @@ class OpMult : public OpBase
void postSubmit() override;
private:
// Always owned resources
std::shared_ptr<Tensor> mTensorOutputStaging;
// Optionally owned resources
std::shared_ptr<Algorithm> mAlgorithm;
bool mFreeAlgorithm = false;
// Never owned resources
// -------------- NEVER OWNED RESOURCES
std::shared_ptr<Tensor> mTensorLHS;
std::shared_ptr<Tensor> mTensorRHS;
std::shared_ptr<Tensor> mTensorOutput;
uint32_t mX;
uint32_t mY;
uint32_t mZ;
// -------------- ALWAYS OWNED RESOURCES
std::shared_ptr<Tensor> mTensorOutputStaging;
};
} // End namespace kp
@ -825,13 +1010,10 @@ template<uint32_t tX, uint32_t tY, uint32_t tZ>
OpMult<tX, tY, tZ>::OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors,
bool freeTensors)
: OpBase(physicalDevice, device, commandBuffer, tensors, freeTensors)
std::vector<std::shared_ptr<Tensor>>& tensors)
: OpAlgoBase<tX, tY, tZ>(physicalDevice, device, commandBuffer, tensors)
{
SPDLOG_DEBUG("Kompute OpMult constructor with params");
this->mAlgorithm = std::make_shared<Algorithm>(device, commandBuffer);
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
@ -857,25 +1039,6 @@ OpMult<tX, tY, tZ>::init()
this->mTensorRHS = this->mTensors[1];
this->mTensorOutput = this->mTensors[2];
// The dispatch size is set up based on either explicitly provided template
// parameters or by default it would take the shape and size of the tensors
if (tX > 0) {
// If at least the x value is provided we use mainly the parameters
// provided
this->mX = tX;
this->mY = tY > 0 ? tY : 1;
this->mZ = tZ > 0 ? tZ : 1;
} else {
// TODO: Fully support the full size dispatch using size for the shape
this->mX = this->mTensorLHS->size();
this->mY = 1;
this->mZ = 1;
}
spdlog::info("Kompute OpMult dispatch size X: {}, Y: {}, Z: {}",
this->mX,
this->mY,
this->mZ);
// TODO: Explore adding a validate function
if (!(this->mTensorLHS->isInit() && this->mTensorRHS->isInit() &&
this->mTensorOutput->isInit())) {
@ -909,25 +1072,12 @@ OpMult<tX, tY, tZ>::init()
shader_data::shaders_glsl_opmult_comp_spv +
kp::shader_data::shaders_glsl_opmult_comp_spv_len);
#else
SPDLOG_DEBUG(
"Kompute OpMult Running debug loading shaders directly from spirv file");
// TODO: Move to utility function
std::string shaderFilePath = "shaders/glsl/opmult.comp.spv";
std::ifstream fileStream(shaderFilePath,
std::ios::binary | std::ios::in | std::ios::ate);
size_t shaderFileSize = fileStream.tellg();
fileStream.seekg(0, std::ios::beg);
char* shaderDataRaw = new char[shaderFileSize];
fileStream.read(shaderDataRaw, shaderFileSize);
fileStream.close();
std::vector<char> shaderFileData(shaderDataRaw,
shaderDataRaw + shaderFileSize);
this->mOptSpirvBinPath = "shaders/glsl/opmult.comp.spv";
std::vector<char>& shaderFileData = this->fetchSpirvBinaryData();
#endif
SPDLOG_DEBUG("Kompute OpMult Initialising algorithm component");
SPDLOG_DEBUG("Kompute vector size {}", shaderFileData.size());
this->mAlgorithm->init(shaderFileData, this->mTensors);
}
@ -1019,8 +1169,7 @@ class OpCreateTensor : public OpBase
OpCreateTensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors,
bool freeTensors = true);
std::vector<std::shared_ptr<Tensor>>& tensors);
/**
* Default destructor which in this case expects the parent class to free

View file

@ -14,9 +14,8 @@ OpCreateTensor::OpCreateTensor(
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors,
bool freeTensors)
: OpBase(physicalDevice, device, commandBuffer, tensors, freeTensors)
std::vector<std::shared_ptr<Tensor>>& tensors)
: OpBase(physicalDevice, device, commandBuffer, tensors, true)
{
SPDLOG_DEBUG("Kompute OpCreateTensor constructor with params");
}

View file

@ -0,0 +1,196 @@
#pragma once
#include <fstream>
#include "kompute/Core.hpp"
#include "kompute/shaders/shaderopmult.hpp"
#include "kompute/Algorithm.hpp"
#include "kompute/Tensor.hpp"
#include "kompute/operations/OpBase.hpp"
namespace kp {
/**
* Operation that provides a general abstraction that simplifies the use of
* algorithm and parameter components which can be used with shaders.
* The template parameters specify the processing GPU layout number of
* iterations for each x, y, z parameter. More specifically, this will be the
* input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)"
*/
template<uint32_t tX = 0, uint32_t tY = 0, uint32_t tZ = 0>
class OpAlgoBase : public OpBase
{
public:
/**
* Base constructor, should not be used unless explicitly intended.
*/
OpAlgoBase();
/**
* Default constructor with parameters that provides the bare minimum
* requirements for the operations to be able to create and manage their
* sub-components.
*
* @param physicalDevice Vulkan physical device used to find device queues
* @param device Vulkan logical device for passing to Algorithm
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that are to be used in this operation
*/
OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors);
/**
* Default destructor, which is in charge of destroying the algorithm
* components but does not destroy the underlying tensors
*/
~OpAlgoBase();
/**
* The init function is responsible for the initialisation of the algorithm
* component based on the parameters specified, and allows for extensibility
* on the options provided. Further dependent classes can perform more
* specific checks such as ensuring tensors provided are initialised, etc.
*/
virtual void init() override;
/**
* This records the commands that are to be sent to the GPU. This includes
* the barriers that ensure the memory has been copied before going in and
* out of the shader, as well as the dispatch operation that sends the
* shader processing to the gpu. This function also records the GPU memory
* copy of the output data for the staging bufffer so it can be read by the
* host.
*/
virtual void record() override;
/**
* Executes after the recorded commands are submitted, and performs a copy
* of the GPU Device memory into the staging buffer so the output data can
* be retrieved.
*/
virtual void postSubmit() override;
protected:
// -------------- NEVER OWNED RESOURCES
// -------------- OPTIONALLY OWNED RESOURCES
std::shared_ptr<Algorithm> mAlgorithm;
bool mFreeAlgorithm = false;
// -------------- ALWAYS OWNED RESOURCES
uint32_t mX;
uint32_t mY;
uint32_t mZ;
std::string mOptSpirvBinPath; ///< Optional member variable which can be provided for the OpAlgoBase to find the data automatically and load for processing
virtual std::vector<char> fetchSpirvBinaryData();
};
} // End namespace kp
// Including implemenation for template class
#ifndef OPALGOBASE_IMPL
#define OPALGOBASE_IMPL
namespace kp {
template<uint32_t tX, uint32_t tY, uint32_t tZ>
OpAlgoBase<tX, tY, tZ>::OpAlgoBase()
{
SPDLOG_DEBUG("Kompute OpAlgoBase constructor base");
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
OpAlgoBase<tX, tY, tZ>::OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors)
: OpBase(physicalDevice, device, commandBuffer, tensors, false)
{
SPDLOG_DEBUG("Kompute OpAlgoBase constructor with params");
// The dispatch size is set up based on either explicitly provided template
// parameters or by default it would take the shape and size of the tensors
if (tX > 0) {
// If at least the x value is provided we use mainly the parameters
// provided
this->mX = tX;
this->mY = tY > 0 ? tY : 1;
this->mZ = tZ > 0 ? tZ : 1;
} else {
// TODO: If tensor empty vector exception would be thrown
// TODO: Fully support the full size dispatch using size for the shape
this->mX = tensors[0]->size();
this->mY = 1;
this->mZ = 1;
}
spdlog::info("Kompute OpAlgoBase dispatch size X: {}, Y: {}, Z: {}",
this->mX,
this->mY,
this->mZ);
this->mAlgorithm = std::make_shared<Algorithm>(device, commandBuffer);
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
OpAlgoBase<tX, tY, tZ>::~OpAlgoBase()
{
SPDLOG_DEBUG("Kompute OpAlgoBase destructor started");
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
std::vector<char> OpAlgoBase<tX, tY, tZ>::fetchSpirvBinaryData()
{
SPDLOG_WARN(
"Kompute OpAlgoBase Running shaders directly from spirv file");
std::ifstream fileStream(this->mOptSpirvBinPath,
std::ios::binary | std::ios::in | std::ios::ate);
size_t shaderFileSize = fileStream.tellg();
fileStream.seekg(0, std::ios::beg);
char* shaderDataRaw = new char[shaderFileSize];
fileStream.read(shaderDataRaw, shaderFileSize);
fileStream.close();
return std::vector<char>(shaderDataRaw,
shaderDataRaw + shaderFileSize);
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
void
OpAlgoBase<tX, tY, tZ>::init()
{
SPDLOG_DEBUG("Kompute OpAlgoBase init called");
std::vector<char> shaderFileData = this->fetchSpirvBinaryData();
this->mAlgorithm->init(shaderFileData, this->mTensors);
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
void
OpAlgoBase<tX, tY, tZ>::record()
{
SPDLOG_DEBUG("Kompute OpAlgoBase record called");
this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ);
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
void
OpAlgoBase<tX, tY, tZ>::postSubmit()
{
SPDLOG_DEBUG("Kompute OpAlgoBase postSubmit called");
}
}
#endif // #ifndef OPALGOBASE_IMPL

View file

@ -31,8 +31,7 @@ class OpCreateTensor : public OpBase
OpCreateTensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors,
bool freeTensors = true);
std::vector<std::shared_ptr<Tensor>>& tensors);
/**
* Default destructor which in this case expects the parent class to free

View file

@ -4,12 +4,14 @@
#include "kompute/Core.hpp"
#if RELEASE
#include "kompute/shaders/shaderopmult.hpp"
#endif
#include "kompute/Algorithm.hpp"
#include "kompute/Tensor.hpp"
#include "kompute/operations/OpBase.hpp"
#include "kompute/operations/OpAlgoBase.hpp"
namespace kp {
@ -20,7 +22,7 @@ namespace kp {
* input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)"
*/
template<uint32_t tX = 0, uint32_t tY = 0, uint32_t tZ = 0>
class OpMult : public OpBase
class OpMult : public OpAlgoBase<tX, tY, tZ>
{
public:
/**
@ -42,8 +44,7 @@ class OpMult : public OpBase
OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors,
bool freeTensors = false);
std::vector<std::shared_ptr<Tensor>>& tensors);
/**
* Default destructor, which is in charge of destroying the algorithm
@ -82,16 +83,8 @@ class OpMult : public OpBase
std::shared_ptr<Tensor> mTensorRHS;
std::shared_ptr<Tensor> mTensorOutput;
// -------------- OPTIONALLY OWNED RESOURCES
std::shared_ptr<Algorithm> mAlgorithm;
bool mFreeAlgorithm = false;
// -------------- ALWAYS OWNED RESOURCES
std::shared_ptr<Tensor> mTensorOutputStaging;
uint32_t mX;
uint32_t mY;
uint32_t mZ;
};
} // End namespace kp
@ -112,13 +105,10 @@ template<uint32_t tX, uint32_t tY, uint32_t tZ>
OpMult<tX, tY, tZ>::OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors,
bool freeTensors)
: OpBase(physicalDevice, device, commandBuffer, tensors, freeTensors)
std::vector<std::shared_ptr<Tensor>>& tensors)
: OpAlgoBase<tX, tY, tZ>(physicalDevice, device, commandBuffer, tensors)
{
SPDLOG_DEBUG("Kompute OpMult constructor with params");
this->mAlgorithm = std::make_shared<Algorithm>(device, commandBuffer);
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
@ -144,24 +134,6 @@ OpMult<tX, tY, tZ>::init()
this->mTensorRHS = this->mTensors[1];
this->mTensorOutput = this->mTensors[2];
// The dispatch size is set up based on either explicitly provided template
// parameters or by default it would take the shape and size of the tensors
if (tX > 0) {
// If at least the x value is provided we use mainly the parameters
// provided
this->mX = tX;
this->mY = tY > 0 ? tY : 1;
this->mZ = tZ > 0 ? tZ : 1;
} else {
// TODO: Fully support the full size dispatch using size for the shape
this->mX = this->mTensorLHS->size();
this->mY = 1;
this->mZ = 1;
}
spdlog::info("Kompute OpMult dispatch size X: {}, Y: {}, Z: {}",
this->mX,
this->mY,
this->mZ);
// TODO: Explore adding a validate function
if (!(this->mTensorLHS->isInit() && this->mTensorRHS->isInit() &&
@ -196,22 +168,8 @@ OpMult<tX, tY, tZ>::init()
shader_data::shaders_glsl_opmult_comp_spv +
kp::shader_data::shaders_glsl_opmult_comp_spv_len);
#else
SPDLOG_DEBUG(
"Kompute OpMult Running debug loading shaders directly from spirv file");
// TODO: Move to utility function
std::string shaderFilePath = "shaders/glsl/opmult.comp.spv";
std::ifstream fileStream(shaderFilePath,
std::ios::binary | std::ios::in | std::ios::ate);
size_t shaderFileSize = fileStream.tellg();
fileStream.seekg(0, std::ios::beg);
char* shaderDataRaw = new char[shaderFileSize];
fileStream.read(shaderDataRaw, shaderFileSize);
fileStream.close();
std::vector<char> shaderFileData(shaderDataRaw,
shaderDataRaw + shaderFileSize);
this->mOptSpirvBinPath = "shaders/glsl/opmult.comp.spv";
std::vector<char>& shaderFileData = this->fetchSpirvBinaryData();
#endif
SPDLOG_DEBUG("Kompute OpMult Initialising algorithm component");