Created OpAlgoBase file to provide abstraction to files that implement algorithm classes
This commit is contained in:
parent
68e46abc7a
commit
a2efc441db
7 changed files with 429 additions and 127 deletions
|
|
@ -15,7 +15,7 @@
|
|||
<td>
|
||||
|
||||
<h1>Vulkan Kompute</h1>
|
||||
<h3>The General Purpose Vulkan Compute Framework</h3>
|
||||
<h3>The General Purpose Vulkan Compute Framework. Blazing fast, lightweight, easy to set up and optimized for advanced data processing usecases.</h3>
|
||||
|
||||
</td>
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
#include "kompute/Manager.hpp"
|
||||
#include "kompute/Sequence.hpp"
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
#include "kompute/operations/OpAlgoBase.hpp"
|
||||
#include "kompute/operations/OpMult.hpp"
|
||||
#include "kompute/operations/OpCreateTensor.hpp"
|
||||
#include "kompute/Algorithm.hpp"
|
||||
|
|
|
|||
|
|
@ -281,15 +281,18 @@ class Tensor
|
|||
void mapDataIntoHostMemory();
|
||||
|
||||
private:
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
|
||||
std::shared_ptr<vk::Device> mDevice;
|
||||
std::shared_ptr<vk::CommandBuffer> mCommandBuffer;
|
||||
|
||||
// -------------- OPTIONALLY OWNED RESOURCES
|
||||
std::shared_ptr<vk::Buffer> mBuffer;
|
||||
bool mFreeBuffer;
|
||||
std::shared_ptr<vk::DeviceMemory> mMemory;
|
||||
bool mFreeMemory;
|
||||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::vector<uint32_t> mData;
|
||||
|
||||
TensorTypes mTensorType = TensorTypes::eDevice;
|
||||
|
|
@ -297,8 +300,7 @@ class Tensor
|
|||
std::array<uint32_t, KP_MAX_DIM_SIZE> mShape;
|
||||
bool mIsInit = false;
|
||||
|
||||
// Creates the vulkan buffer
|
||||
void createBuffer();
|
||||
void createBuffer(); // Creates the vulkan buffer
|
||||
|
||||
// Private util functions
|
||||
vk::BufferUsageFlags getBufferUsageFlags();
|
||||
|
|
@ -400,19 +402,19 @@ class OpBase
|
|||
virtual void postSubmit() = 0;
|
||||
|
||||
protected:
|
||||
// OPTIONALLY OWNED RESOURCES
|
||||
std::vector<std::shared_ptr<Tensor>>
|
||||
mTensors; ///< Tensors referenced by operation that can be managed
|
||||
///< optionally by operation
|
||||
bool mFreeTensors = false; ///< Explicit boolean that specifies whether the
|
||||
///< tensors are freed (if they are managed)
|
||||
|
||||
// NEVER OWNED RESOURCES
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
std::shared_ptr<vk::PhysicalDevice>
|
||||
mPhysicalDevice; ///< Vulkan Physical Device
|
||||
std::shared_ptr<vk::Device> mDevice; ///< Vulkan Logical Device
|
||||
std::shared_ptr<vk::CommandBuffer>
|
||||
mCommandBuffer; ///< Vulkan Command Buffer
|
||||
|
||||
// -------------- OPTIONALLY OWNED RESOURCES
|
||||
std::vector<std::shared_ptr<Tensor>>
|
||||
mTensors; ///< Tensors referenced by operation that can be managed
|
||||
///< optionally by operation
|
||||
bool mFreeTensors = false; ///< Explicit boolean that specifies whether the
|
||||
///< tensors are freed (if they are managed)
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
@ -517,10 +519,13 @@ class Sequence
|
|||
}
|
||||
|
||||
private:
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice = nullptr;
|
||||
std::shared_ptr<vk::Device> mDevice = nullptr;
|
||||
std::shared_ptr<vk::Queue> mComputeQueue = nullptr;
|
||||
uint32_t mQueueIndex = -1;
|
||||
|
||||
// -------------- OPTIONALLY OWNED RESOURCES
|
||||
std::shared_ptr<vk::CommandPool> mCommandPool = nullptr;
|
||||
bool mFreeCommandPool = false;
|
||||
std::shared_ptr<vk::CommandBuffer> mCommandBuffer = nullptr;
|
||||
|
|
@ -618,6 +623,7 @@ class Manager
|
|||
}
|
||||
|
||||
private:
|
||||
// -------------- OPTIONALLY OWNED RESOURCES
|
||||
std::shared_ptr<vk::Instance> mInstance = nullptr;
|
||||
bool mFreeInstance = false;
|
||||
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice = nullptr;
|
||||
|
|
@ -627,7 +633,7 @@ class Manager
|
|||
uint32_t mComputeQueueFamilyIndex = -1;
|
||||
std::shared_ptr<vk::Queue> mComputeQueue = nullptr;
|
||||
|
||||
// Always owned resources
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::unordered_map<std::string, std::shared_ptr<Sequence>>
|
||||
mManagedSequences;
|
||||
|
||||
|
|
@ -691,16 +697,15 @@ class Algorithm
|
|||
void recordDispatch(uint32_t x = 1, uint32_t y = 1, uint32_t z = 1);
|
||||
|
||||
private:
|
||||
// Never Owned Resources
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
std::shared_ptr<vk::Device> mDevice;
|
||||
std::shared_ptr<vk::CommandBuffer> mCommandBuffer;
|
||||
|
||||
// Optionally owned resources
|
||||
// -------------- OPTIONALLY OWNED RESOURCES
|
||||
std::shared_ptr<vk::DescriptorSetLayout> mDescriptorSetLayout;
|
||||
bool mFreeDescriptorSetLayout = false;
|
||||
std::shared_ptr<vk::DescriptorPool> mDescriptorPool;
|
||||
bool mFreeDescriptorPool = false;
|
||||
|
||||
// TODO: Explore design for multiple descriptor sets
|
||||
std::shared_ptr<vk::DescriptorSet> mDescriptorSet;
|
||||
bool mFreeDescriptorSet = false;
|
||||
|
|
@ -726,6 +731,195 @@ class Algorithm
|
|||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
* Operation that provides a general abstraction that simplifies the use of
|
||||
* algorithm and parameter components which can be used with shaders.
|
||||
* The template parameters specify the processing GPU layout number of
|
||||
* iterations for each x, y, z parameter. More specifically, this will be the
|
||||
* input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)"
|
||||
*/
|
||||
template<uint32_t tX = 0, uint32_t tY = 0, uint32_t tZ = 0>
|
||||
class OpAlgoBase : public OpBase
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Base constructor, should not be used unless explicitly intended.
|
||||
*/
|
||||
OpAlgoBase();
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
* requirements for the operations to be able to create and manage their
|
||||
* sub-components.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device used to find device queues
|
||||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
*/
|
||||
OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors);
|
||||
|
||||
/**
|
||||
* Default destructor, which is in charge of destroying the algorithm
|
||||
* components but does not destroy the underlying tensors
|
||||
*/
|
||||
~OpAlgoBase();
|
||||
|
||||
/**
|
||||
* The init function is responsible for the initialisation of the algorithm
|
||||
* component based on the parameters specified, and allows for extensibility
|
||||
* on the options provided. Further dependent classes can perform more
|
||||
* specific checks such as ensuring tensors provided are initialised, etc.
|
||||
*/
|
||||
virtual void init() override;
|
||||
|
||||
/**
|
||||
* This records the commands that are to be sent to the GPU. This includes
|
||||
* the barriers that ensure the memory has been copied before going in and
|
||||
* out of the shader, as well as the dispatch operation that sends the
|
||||
* shader processing to the gpu. This function also records the GPU memory
|
||||
* copy of the output data for the staging bufffer so it can be read by the
|
||||
* host.
|
||||
*/
|
||||
virtual void record() override;
|
||||
|
||||
/**
|
||||
* Executes after the recorded commands are submitted, and performs a copy
|
||||
* of the GPU Device memory into the staging buffer so the output data can
|
||||
* be retrieved.
|
||||
*/
|
||||
virtual void postSubmit() override;
|
||||
|
||||
protected:
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
|
||||
// -------------- OPTIONALLY OWNED RESOURCES
|
||||
std::shared_ptr<Algorithm> mAlgorithm;
|
||||
bool mFreeAlgorithm = false;
|
||||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
uint32_t mX;
|
||||
uint32_t mY;
|
||||
uint32_t mZ;
|
||||
|
||||
std::string mOptSpirvBinPath; ///< Optional member variable which can be provided for the OpAlgoBase to find the data automatically and load for processing
|
||||
|
||||
virtual std::vector<char> fetchSpirvBinaryData();
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
||||
// Including implemenation for template class
|
||||
#ifndef OPALGOBASE_IMPL
|
||||
#define OPALGOBASE_IMPL
|
||||
|
||||
namespace kp {
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
OpAlgoBase<tX, tY, tZ>::OpAlgoBase()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoBase constructor base");
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
OpAlgoBase<tX, tY, tZ>::OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors)
|
||||
: OpBase(physicalDevice, device, commandBuffer, tensors, false)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoBase constructor with params");
|
||||
|
||||
// The dispatch size is set up based on either explicitly provided template
|
||||
// parameters or by default it would take the shape and size of the tensors
|
||||
if (tX > 0) {
|
||||
// If at least the x value is provided we use mainly the parameters
|
||||
// provided
|
||||
this->mX = tX;
|
||||
this->mY = tY > 0 ? tY : 1;
|
||||
this->mZ = tZ > 0 ? tZ : 1;
|
||||
} else {
|
||||
// TODO: If tensor empty vector exception would be thrown
|
||||
// TODO: Fully support the full size dispatch using size for the shape
|
||||
this->mX = tensors[0]->size();
|
||||
this->mY = 1;
|
||||
this->mZ = 1;
|
||||
}
|
||||
spdlog::info("Kompute OpAlgoBase dispatch size X: {}, Y: {}, Z: {}",
|
||||
this->mX,
|
||||
this->mY,
|
||||
this->mZ);
|
||||
|
||||
this->mAlgorithm = std::make_shared<Algorithm>(device, commandBuffer);
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
OpAlgoBase<tX, tY, tZ>::~OpAlgoBase()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoBase destructor started");
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
std::vector<char> OpAlgoBase<tX, tY, tZ>::fetchSpirvBinaryData()
|
||||
{
|
||||
SPDLOG_WARN(
|
||||
"Kompute OpAlgoBase Running shaders directly from spirv file");
|
||||
|
||||
std::ifstream fileStream(this->mOptSpirvBinPath,
|
||||
std::ios::binary | std::ios::in | std::ios::ate);
|
||||
|
||||
size_t shaderFileSize = fileStream.tellg();
|
||||
fileStream.seekg(0, std::ios::beg);
|
||||
char* shaderDataRaw = new char[shaderFileSize];
|
||||
fileStream.read(shaderDataRaw, shaderFileSize);
|
||||
fileStream.close();
|
||||
|
||||
return std::vector<char>(shaderDataRaw,
|
||||
shaderDataRaw + shaderFileSize);
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
void
|
||||
OpAlgoBase<tX, tY, tZ>::init()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoBase init called");
|
||||
|
||||
std::vector<char> shaderFileData = this->fetchSpirvBinaryData();
|
||||
|
||||
this->mAlgorithm->init(shaderFileData, this->mTensors);
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
void
|
||||
OpAlgoBase<tX, tY, tZ>::record()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoBase record called");
|
||||
|
||||
this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ);
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
void
|
||||
OpAlgoBase<tX, tY, tZ>::postSubmit()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoBase postSubmit called");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif // #ifndef OPALGOBASE_IMPL
|
||||
|
||||
#include <fstream>
|
||||
|
||||
#if RELEASE
|
||||
|
||||
#endif
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
* Operation that performs multiplication on two tensors and outpus on third
|
||||
* tensor. The template parameters specify the processing GPU layout number of
|
||||
|
|
@ -733,7 +927,7 @@ namespace kp {
|
|||
* input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)"
|
||||
*/
|
||||
template<uint32_t tX = 0, uint32_t tY = 0, uint32_t tZ = 0>
|
||||
class OpMult : public OpBase
|
||||
class OpMult : public OpAlgoBase<tX, tY, tZ>
|
||||
{
|
||||
public:
|
||||
/**
|
||||
|
|
@ -755,8 +949,7 @@ class OpMult : public OpBase
|
|||
OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
bool freeTensors = false);
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors);
|
||||
|
||||
/**
|
||||
* Default destructor, which is in charge of destroying the algorithm
|
||||
|
|
@ -790,21 +983,13 @@ class OpMult : public OpBase
|
|||
void postSubmit() override;
|
||||
|
||||
private:
|
||||
// Always owned resources
|
||||
std::shared_ptr<Tensor> mTensorOutputStaging;
|
||||
|
||||
// Optionally owned resources
|
||||
std::shared_ptr<Algorithm> mAlgorithm;
|
||||
bool mFreeAlgorithm = false;
|
||||
|
||||
// Never owned resources
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
std::shared_ptr<Tensor> mTensorLHS;
|
||||
std::shared_ptr<Tensor> mTensorRHS;
|
||||
std::shared_ptr<Tensor> mTensorOutput;
|
||||
|
||||
uint32_t mX;
|
||||
uint32_t mY;
|
||||
uint32_t mZ;
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::shared_ptr<Tensor> mTensorOutputStaging;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
@ -825,13 +1010,10 @@ template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
|||
OpMult<tX, tY, tZ>::OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
bool freeTensors)
|
||||
: OpBase(physicalDevice, device, commandBuffer, tensors, freeTensors)
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors)
|
||||
: OpAlgoBase<tX, tY, tZ>(physicalDevice, device, commandBuffer, tensors)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpMult constructor with params");
|
||||
|
||||
this->mAlgorithm = std::make_shared<Algorithm>(device, commandBuffer);
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
|
|
@ -857,25 +1039,6 @@ OpMult<tX, tY, tZ>::init()
|
|||
this->mTensorRHS = this->mTensors[1];
|
||||
this->mTensorOutput = this->mTensors[2];
|
||||
|
||||
// The dispatch size is set up based on either explicitly provided template
|
||||
// parameters or by default it would take the shape and size of the tensors
|
||||
if (tX > 0) {
|
||||
// If at least the x value is provided we use mainly the parameters
|
||||
// provided
|
||||
this->mX = tX;
|
||||
this->mY = tY > 0 ? tY : 1;
|
||||
this->mZ = tZ > 0 ? tZ : 1;
|
||||
} else {
|
||||
// TODO: Fully support the full size dispatch using size for the shape
|
||||
this->mX = this->mTensorLHS->size();
|
||||
this->mY = 1;
|
||||
this->mZ = 1;
|
||||
}
|
||||
spdlog::info("Kompute OpMult dispatch size X: {}, Y: {}, Z: {}",
|
||||
this->mX,
|
||||
this->mY,
|
||||
this->mZ);
|
||||
|
||||
// TODO: Explore adding a validate function
|
||||
if (!(this->mTensorLHS->isInit() && this->mTensorRHS->isInit() &&
|
||||
this->mTensorOutput->isInit())) {
|
||||
|
|
@ -909,25 +1072,12 @@ OpMult<tX, tY, tZ>::init()
|
|||
shader_data::shaders_glsl_opmult_comp_spv +
|
||||
kp::shader_data::shaders_glsl_opmult_comp_spv_len);
|
||||
#else
|
||||
SPDLOG_DEBUG(
|
||||
"Kompute OpMult Running debug loading shaders directly from spirv file");
|
||||
|
||||
// TODO: Move to utility function
|
||||
std::string shaderFilePath = "shaders/glsl/opmult.comp.spv";
|
||||
std::ifstream fileStream(shaderFilePath,
|
||||
std::ios::binary | std::ios::in | std::ios::ate);
|
||||
|
||||
size_t shaderFileSize = fileStream.tellg();
|
||||
fileStream.seekg(0, std::ios::beg);
|
||||
char* shaderDataRaw = new char[shaderFileSize];
|
||||
fileStream.read(shaderDataRaw, shaderFileSize);
|
||||
fileStream.close();
|
||||
|
||||
std::vector<char> shaderFileData(shaderDataRaw,
|
||||
shaderDataRaw + shaderFileSize);
|
||||
this->mOptSpirvBinPath = "shaders/glsl/opmult.comp.spv";
|
||||
std::vector<char>& shaderFileData = this->fetchSpirvBinaryData();
|
||||
#endif
|
||||
|
||||
SPDLOG_DEBUG("Kompute OpMult Initialising algorithm component");
|
||||
SPDLOG_DEBUG("Kompute vector size {}", shaderFileData.size());
|
||||
|
||||
this->mAlgorithm->init(shaderFileData, this->mTensors);
|
||||
}
|
||||
|
|
@ -1019,8 +1169,7 @@ class OpCreateTensor : public OpBase
|
|||
OpCreateTensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
bool freeTensors = true);
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors);
|
||||
|
||||
/**
|
||||
* Default destructor which in this case expects the parent class to free
|
||||
|
|
|
|||
|
|
@ -14,9 +14,8 @@ OpCreateTensor::OpCreateTensor(
|
|||
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
bool freeTensors)
|
||||
: OpBase(physicalDevice, device, commandBuffer, tensors, freeTensors)
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors)
|
||||
: OpBase(physicalDevice, device, commandBuffer, tensors, true)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpCreateTensor constructor with params");
|
||||
}
|
||||
|
|
|
|||
196
src/include/kompute/operations/OpAlgoBase.hpp
Normal file
196
src/include/kompute/operations/OpAlgoBase.hpp
Normal file
|
|
@ -0,0 +1,196 @@
|
|||
#pragma once
|
||||
|
||||
#include <fstream>
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
#include "kompute/shaders/shaderopmult.hpp"
|
||||
|
||||
#include "kompute/Algorithm.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
* Operation that provides a general abstraction that simplifies the use of
|
||||
* algorithm and parameter components which can be used with shaders.
|
||||
* The template parameters specify the processing GPU layout number of
|
||||
* iterations for each x, y, z parameter. More specifically, this will be the
|
||||
* input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)"
|
||||
*/
|
||||
template<uint32_t tX = 0, uint32_t tY = 0, uint32_t tZ = 0>
|
||||
class OpAlgoBase : public OpBase
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Base constructor, should not be used unless explicitly intended.
|
||||
*/
|
||||
OpAlgoBase();
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
* requirements for the operations to be able to create and manage their
|
||||
* sub-components.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device used to find device queues
|
||||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
*/
|
||||
OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors);
|
||||
|
||||
/**
|
||||
* Default destructor, which is in charge of destroying the algorithm
|
||||
* components but does not destroy the underlying tensors
|
||||
*/
|
||||
~OpAlgoBase();
|
||||
|
||||
/**
|
||||
* The init function is responsible for the initialisation of the algorithm
|
||||
* component based on the parameters specified, and allows for extensibility
|
||||
* on the options provided. Further dependent classes can perform more
|
||||
* specific checks such as ensuring tensors provided are initialised, etc.
|
||||
*/
|
||||
virtual void init() override;
|
||||
|
||||
/**
|
||||
* This records the commands that are to be sent to the GPU. This includes
|
||||
* the barriers that ensure the memory has been copied before going in and
|
||||
* out of the shader, as well as the dispatch operation that sends the
|
||||
* shader processing to the gpu. This function also records the GPU memory
|
||||
* copy of the output data for the staging bufffer so it can be read by the
|
||||
* host.
|
||||
*/
|
||||
virtual void record() override;
|
||||
|
||||
/**
|
||||
* Executes after the recorded commands are submitted, and performs a copy
|
||||
* of the GPU Device memory into the staging buffer so the output data can
|
||||
* be retrieved.
|
||||
*/
|
||||
virtual void postSubmit() override;
|
||||
|
||||
protected:
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
|
||||
// -------------- OPTIONALLY OWNED RESOURCES
|
||||
std::shared_ptr<Algorithm> mAlgorithm;
|
||||
bool mFreeAlgorithm = false;
|
||||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
uint32_t mX;
|
||||
uint32_t mY;
|
||||
uint32_t mZ;
|
||||
|
||||
std::string mOptSpirvBinPath; ///< Optional member variable which can be provided for the OpAlgoBase to find the data automatically and load for processing
|
||||
|
||||
virtual std::vector<char> fetchSpirvBinaryData();
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
||||
// Including implemenation for template class
|
||||
#ifndef OPALGOBASE_IMPL
|
||||
#define OPALGOBASE_IMPL
|
||||
|
||||
namespace kp {
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
OpAlgoBase<tX, tY, tZ>::OpAlgoBase()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoBase constructor base");
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
OpAlgoBase<tX, tY, tZ>::OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors)
|
||||
: OpBase(physicalDevice, device, commandBuffer, tensors, false)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoBase constructor with params");
|
||||
|
||||
// The dispatch size is set up based on either explicitly provided template
|
||||
// parameters or by default it would take the shape and size of the tensors
|
||||
if (tX > 0) {
|
||||
// If at least the x value is provided we use mainly the parameters
|
||||
// provided
|
||||
this->mX = tX;
|
||||
this->mY = tY > 0 ? tY : 1;
|
||||
this->mZ = tZ > 0 ? tZ : 1;
|
||||
} else {
|
||||
// TODO: If tensor empty vector exception would be thrown
|
||||
// TODO: Fully support the full size dispatch using size for the shape
|
||||
this->mX = tensors[0]->size();
|
||||
this->mY = 1;
|
||||
this->mZ = 1;
|
||||
}
|
||||
spdlog::info("Kompute OpAlgoBase dispatch size X: {}, Y: {}, Z: {}",
|
||||
this->mX,
|
||||
this->mY,
|
||||
this->mZ);
|
||||
|
||||
this->mAlgorithm = std::make_shared<Algorithm>(device, commandBuffer);
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
OpAlgoBase<tX, tY, tZ>::~OpAlgoBase()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoBase destructor started");
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
std::vector<char> OpAlgoBase<tX, tY, tZ>::fetchSpirvBinaryData()
|
||||
{
|
||||
SPDLOG_WARN(
|
||||
"Kompute OpAlgoBase Running shaders directly from spirv file");
|
||||
|
||||
std::ifstream fileStream(this->mOptSpirvBinPath,
|
||||
std::ios::binary | std::ios::in | std::ios::ate);
|
||||
|
||||
size_t shaderFileSize = fileStream.tellg();
|
||||
fileStream.seekg(0, std::ios::beg);
|
||||
char* shaderDataRaw = new char[shaderFileSize];
|
||||
fileStream.read(shaderDataRaw, shaderFileSize);
|
||||
fileStream.close();
|
||||
|
||||
return std::vector<char>(shaderDataRaw,
|
||||
shaderDataRaw + shaderFileSize);
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
void
|
||||
OpAlgoBase<tX, tY, tZ>::init()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoBase init called");
|
||||
|
||||
std::vector<char> shaderFileData = this->fetchSpirvBinaryData();
|
||||
|
||||
this->mAlgorithm->init(shaderFileData, this->mTensors);
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
void
|
||||
OpAlgoBase<tX, tY, tZ>::record()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoBase record called");
|
||||
|
||||
this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ);
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
void
|
||||
OpAlgoBase<tX, tY, tZ>::postSubmit()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoBase postSubmit called");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif // #ifndef OPALGOBASE_IMPL
|
||||
|
||||
|
|
@ -31,8 +31,7 @@ class OpCreateTensor : public OpBase
|
|||
OpCreateTensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
bool freeTensors = true);
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors);
|
||||
|
||||
/**
|
||||
* Default destructor which in this case expects the parent class to free
|
||||
|
|
|
|||
|
|
@ -4,12 +4,14 @@
|
|||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
#if RELEASE
|
||||
#include "kompute/shaders/shaderopmult.hpp"
|
||||
#endif
|
||||
|
||||
#include "kompute/Algorithm.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
#include "kompute/operations/OpAlgoBase.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
|
|
@ -20,7 +22,7 @@ namespace kp {
|
|||
* input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)"
|
||||
*/
|
||||
template<uint32_t tX = 0, uint32_t tY = 0, uint32_t tZ = 0>
|
||||
class OpMult : public OpBase
|
||||
class OpMult : public OpAlgoBase<tX, tY, tZ>
|
||||
{
|
||||
public:
|
||||
/**
|
||||
|
|
@ -42,8 +44,7 @@ class OpMult : public OpBase
|
|||
OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
bool freeTensors = false);
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors);
|
||||
|
||||
/**
|
||||
* Default destructor, which is in charge of destroying the algorithm
|
||||
|
|
@ -82,16 +83,8 @@ class OpMult : public OpBase
|
|||
std::shared_ptr<Tensor> mTensorRHS;
|
||||
std::shared_ptr<Tensor> mTensorOutput;
|
||||
|
||||
// -------------- OPTIONALLY OWNED RESOURCES
|
||||
std::shared_ptr<Algorithm> mAlgorithm;
|
||||
bool mFreeAlgorithm = false;
|
||||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::shared_ptr<Tensor> mTensorOutputStaging;
|
||||
|
||||
uint32_t mX;
|
||||
uint32_t mY;
|
||||
uint32_t mZ;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
@ -112,13 +105,10 @@ template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
|||
OpMult<tX, tY, tZ>::OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
bool freeTensors)
|
||||
: OpBase(physicalDevice, device, commandBuffer, tensors, freeTensors)
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors)
|
||||
: OpAlgoBase<tX, tY, tZ>(physicalDevice, device, commandBuffer, tensors)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpMult constructor with params");
|
||||
|
||||
this->mAlgorithm = std::make_shared<Algorithm>(device, commandBuffer);
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
|
|
@ -144,24 +134,6 @@ OpMult<tX, tY, tZ>::init()
|
|||
this->mTensorRHS = this->mTensors[1];
|
||||
this->mTensorOutput = this->mTensors[2];
|
||||
|
||||
// The dispatch size is set up based on either explicitly provided template
|
||||
// parameters or by default it would take the shape and size of the tensors
|
||||
if (tX > 0) {
|
||||
// If at least the x value is provided we use mainly the parameters
|
||||
// provided
|
||||
this->mX = tX;
|
||||
this->mY = tY > 0 ? tY : 1;
|
||||
this->mZ = tZ > 0 ? tZ : 1;
|
||||
} else {
|
||||
// TODO: Fully support the full size dispatch using size for the shape
|
||||
this->mX = this->mTensorLHS->size();
|
||||
this->mY = 1;
|
||||
this->mZ = 1;
|
||||
}
|
||||
spdlog::info("Kompute OpMult dispatch size X: {}, Y: {}, Z: {}",
|
||||
this->mX,
|
||||
this->mY,
|
||||
this->mZ);
|
||||
|
||||
// TODO: Explore adding a validate function
|
||||
if (!(this->mTensorLHS->isInit() && this->mTensorRHS->isInit() &&
|
||||
|
|
@ -196,22 +168,8 @@ OpMult<tX, tY, tZ>::init()
|
|||
shader_data::shaders_glsl_opmult_comp_spv +
|
||||
kp::shader_data::shaders_glsl_opmult_comp_spv_len);
|
||||
#else
|
||||
SPDLOG_DEBUG(
|
||||
"Kompute OpMult Running debug loading shaders directly from spirv file");
|
||||
|
||||
// TODO: Move to utility function
|
||||
std::string shaderFilePath = "shaders/glsl/opmult.comp.spv";
|
||||
std::ifstream fileStream(shaderFilePath,
|
||||
std::ios::binary | std::ios::in | std::ios::ate);
|
||||
|
||||
size_t shaderFileSize = fileStream.tellg();
|
||||
fileStream.seekg(0, std::ios::beg);
|
||||
char* shaderDataRaw = new char[shaderFileSize];
|
||||
fileStream.read(shaderDataRaw, shaderFileSize);
|
||||
fileStream.close();
|
||||
|
||||
std::vector<char> shaderFileData(shaderDataRaw,
|
||||
shaderDataRaw + shaderFileSize);
|
||||
this->mOptSpirvBinPath = "shaders/glsl/opmult.comp.spv";
|
||||
std::vector<char>& shaderFileData = this->fetchSpirvBinaryData();
|
||||
#endif
|
||||
|
||||
SPDLOG_DEBUG("Kompute OpMult Initialising algorithm component");
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue