Removed workgroup templates on opalgobase classes

This commit is contained in:
Alejandro Saucedo 2020-11-01 16:28:48 +00:00
parent 6afe6463c2
commit 3ad5e4d3e7
5 changed files with 322 additions and 344 deletions

162
src/OpAlgoBase.cpp Normal file
View file

@ -0,0 +1,162 @@
#pragma once
#include "kompute/operations/OpAlgoBase.hpp"
namespace kp {
OpAlgoBase::OpAlgoBase()
{
SPDLOG_DEBUG("Kompute OpAlgoBase constructor base");
}
OpAlgoBase::OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors,
KomputeWorkgroup komputeWorkgroup)
: OpBase(physicalDevice, device, commandBuffer, tensors, false)
{
SPDLOG_DEBUG("Kompute OpAlgoBase constructor with params numTensors: {}", tensors.size());
// The dispatch size is set up based on either explicitly provided template
// parameters or by default it would take the shape and size of the tensors
if (komputeWorkgroup.x > 0) {
// If at least the x value is provided we use mainly the parameters
// provided
this->mKomputeWorkgroup = {
0,
komputeWorkgroup.y > 0 ? komputeWorkgroup.y : 1,
komputeWorkgroup.z > 0 ? komputeWorkgroup.z : 1
};
} else {
this->mKomputeWorkgroup = {tensors[0]->size(), 1, 1};
}
SPDLOG_INFO("Kompute OpAlgoBase dispatch size X: {}, Y: {}, Z: {}",
this->mKomputeWorkgroup.x,
this->mKomputeWorkgroup.y,
this->mKomputeWorkgroup.z);
this->mAlgorithm = std::make_shared<Algorithm>(device, commandBuffer);
}
OpAlgoBase::OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors,
std::string shaderFilePath,
KomputeWorkgroup komputeWorkgroup)
: OpAlgoBase(physicalDevice, device, commandBuffer, tensors, komputeWorkgroup)
{
SPDLOG_DEBUG("Kompute OpAlgoBase shaderFilePath constructo with shaderfile path: {}", shaderFilePath);
this->mShaderFilePath = shaderFilePath;
}
OpAlgoBase::OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors,
const std::vector<char>& shaderDataRaw,
KomputeWorkgroup komputeWorkgroup)
: OpAlgoBase(physicalDevice, device, commandBuffer, tensors, komputeWorkgroup)
{
SPDLOG_DEBUG("Kompute OpAlgoBase shaderFilePath constructo with shader raw data length: {}", shaderDataRaw.size());
this->mShaderDataRaw = shaderDataRaw;
}
OpAlgoBase::~OpAlgoBase()
{
SPDLOG_DEBUG("Kompute OpAlgoBase destructor started");
}
void
OpAlgoBase::init()
{
SPDLOG_DEBUG("Kompute OpAlgoBase init called");
if (this->mTensors.size() < 1) {
throw std::runtime_error(
"Kompute OpAlgoBase called with less than 1 tensor");
}
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
if(!tensor->isInit()) {
throw std::runtime_error("Kompute OpAlgoBase validation failed; all tensor parameters must be initialised.");
}
}
SPDLOG_DEBUG("Kompute OpAlgoBase fetching spirv data");
std::vector<char> shaderFileData = this->fetchSpirvBinaryData();
SPDLOG_DEBUG("Kompute OpAlgoBase Initialising algorithm component");
this->mAlgorithm->init(shaderFileData, this->mTensors);
}
void
OpAlgoBase::record()
{
SPDLOG_DEBUG("Kompute OpAlgoBase record called");
// Barrier to ensure the data is finished writing to buffer memory
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
tensor->recordBufferMemoryBarrier(
this->mCommandBuffer,
vk::AccessFlagBits::eHostWrite,
vk::AccessFlagBits::eShaderRead,
vk::PipelineStageFlagBits::eHost,
vk::PipelineStageFlagBits::eComputeShader);
}
this->mAlgorithm->recordDispatch(this->mKomputeWorkgroup.x, this->mKomputeWorkgroup.y, this->mKomputeWorkgroup.z);
}
void
OpAlgoBase::preEval()
{
SPDLOG_DEBUG("Kompute OpAlgoBase preEval called");
}
void
OpAlgoBase::postEval()
{
SPDLOG_DEBUG("Kompute OpAlgoBase postSubmit called");
}
std::vector<char> OpAlgoBase::fetchSpirvBinaryData()
{
SPDLOG_WARN(
"Kompute OpAlgoBase Running shaders directly from spirv file");
if (this->mShaderFilePath.size()) {
std::ifstream fileStream(this->mShaderFilePath,
std::ios::binary | std::ios::in | std::ios::ate);
if (!fileStream.good()) {
throw std::runtime_error("Error reading file: " + this->mShaderFilePath);
}
size_t shaderFileSize = fileStream.tellg();
fileStream.seekg(0, std::ios::beg);
char* shaderDataRaw = new char[shaderFileSize];
fileStream.read(shaderDataRaw, shaderFileSize);
fileStream.close();
SPDLOG_WARN(
"Kompute OpAlgoBase fetched {} bytes", shaderFileSize);
return std::vector<char>(shaderDataRaw,
shaderDataRaw + shaderFileSize);
}
else if (this->mShaderDataRaw.size()) {
return this->mShaderDataRaw;
}
else {
throw std::runtime_error("Kompute OpAlgoBase Error reached fetchSpirvBinaryData but neither filepath nor data provided");
}
}
}

129
src/OpAlgoLhsRhsOut.cpp Normal file
View file

@ -0,0 +1,129 @@
#pragma once
#include "kompute/operations/OpAlgoLhsRhsOut.hpp"
namespace kp {
OpAlgoLhsRhsOut::OpAlgoLhsRhsOut()
{
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor base");
}
OpAlgoLhsRhsOut::OpAlgoLhsRhsOut(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>> tensors,
KomputeWorkgroup komputeWorkgroup)
// The inheritance is initialised with the copyOutputData to false given that
// this depencendant class handles the transfer of data via staging buffers in
// a granular way.
: OpAlgoBase(physicalDevice, device, commandBuffer, tensors, komputeWorkgroup)
{
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor with params");
}
OpAlgoLhsRhsOut::~OpAlgoLhsRhsOut()
{
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut destructor started");
}
void
OpAlgoLhsRhsOut::init()
{
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut init called");
if (this->mTensors.size() < 3) {
throw std::runtime_error(
"Kompute OpAlgoLhsRhsOut called with less than 1 tensor");
} else if (this->mTensors.size() > 3) {
SPDLOG_WARN("Kompute OpAlgoLhsRhsOut called with more than 3 this->mTensors");
}
this->mTensorLHS = this->mTensors[0];
this->mTensorRHS = this->mTensors[1];
this->mTensorOutput = this->mTensors[2];
if (!(this->mTensorLHS->isInit() && this->mTensorRHS->isInit() &&
this->mTensorOutput->isInit())) {
throw std::runtime_error(
"Kompute OpAlgoLhsRhsOut all tensor parameters must be initialised. LHS: " +
std::to_string(this->mTensorLHS->isInit()) +
" RHS: " + std::to_string(this->mTensorRHS->isInit()) +
" Output: " + std::to_string(this->mTensorOutput->isInit()));
}
if (!(this->mTensorLHS->size() == this->mTensorRHS->size() &&
this->mTensorRHS->size() == this->mTensorOutput->size())) {
throw std::runtime_error(
"Kompute OpAlgoLhsRhsOut all tensor parameters must be the same size LHS: " +
std::to_string(this->mTensorLHS->size()) +
" RHS: " + std::to_string(this->mTensorRHS->size()) +
" Output: " + std::to_string(this->mTensorOutput->size()));
}
this->mTensorOutputStaging = std::make_shared<Tensor>(
this->mTensorOutput->data(), Tensor::TensorTypes::eStaging);
this->mTensorOutputStaging->init(
this->mPhysicalDevice, this->mDevice);
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut fetching spirv data");
std::vector<char> shaderFileData = this->fetchSpirvBinaryData();
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut Initialising algorithm component");
this->mAlgorithm->init(shaderFileData, this->mTensors);
}
void
OpAlgoLhsRhsOut::record()
{
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut record called");
// Barrier to ensure the data is finished writing to buffer memory
this->mTensorLHS->recordBufferMemoryBarrier(
this->mCommandBuffer,
vk::AccessFlagBits::eHostWrite,
vk::AccessFlagBits::eShaderRead,
vk::PipelineStageFlagBits::eHost,
vk::PipelineStageFlagBits::eComputeShader);
this->mTensorRHS->recordBufferMemoryBarrier(
this->mCommandBuffer,
vk::AccessFlagBits::eHostWrite,
vk::AccessFlagBits::eShaderRead,
vk::PipelineStageFlagBits::eHost,
vk::PipelineStageFlagBits::eComputeShader);
this->mAlgorithm->recordDispatch(
this->mKomputeWorkgroup.x,
this->mKomputeWorkgroup.y,
this->mKomputeWorkgroup.z);
// Barrier to ensure the shader code is executed before buffer read
this->mTensorOutput->recordBufferMemoryBarrier(
this->mCommandBuffer,
vk::AccessFlagBits::eShaderWrite,
vk::AccessFlagBits::eTransferRead,
vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eTransfer);
this->mTensorOutputStaging->recordCopyFrom(
this->mCommandBuffer,
this->mTensorOutput,
true);
}
void
OpAlgoLhsRhsOut::postEval()
{
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut postSubmit called");
this->mTensorOutputStaging->mapDataFromHostMemory();
this->mTensorOutput->setData(this->mTensorOutputStaging->data());
}
}

View file

@ -17,20 +17,17 @@ namespace kp {
* Operation that provides a general abstraction that simplifies the use of
* algorithm and parameter components which can be used with shaders.
* By default it enables the user to provide a dynamic number of tensors
* which are then passed as inputs.
*
* All of these tensors are expected to be initlaised and this is checked with throw std exception in the init function.
*
* See OpLhsRhsOut for an example implementation on a more specific granularity on tensor parameters.
*
* The template parameters specify the processing GPU layout number of
* iterations for each x, y, z parameter. More specifically, this will be the
* input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)"
* which are then passed as inputs.
*/
template<uint32_t tX = 0, uint32_t tY = 0, uint32_t tZ = 0>
class OpAlgoBase : public OpBase
{
public:
struct KomputeWorkgroup {
uint32_t x;
uint32_t y;
uint32_t z;
};
/**
* Base constructor, should not be used unless explicitly intended.
*/
@ -46,11 +43,13 @@ class OpAlgoBase : public OpBase
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that are to be used in this operation
* @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format)
* @param komputeWorkgroup Optional parameter to specify the layout for processing
*/
OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors);
std::vector<std::shared_ptr<Tensor>>& tensors,
KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup());
/**
* Constructor that enables a file to be passed to the operation with
@ -61,13 +60,15 @@ class OpAlgoBase : public OpBase
* @param device Vulkan logical device for passing to Algorithm
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that are to be used in this operation
* @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format)
* @param shaderFilePath Parameter to specify the shader to load (either in spirv or raw format)
* @param komputeWorkgroup Optional parameter to specify the layout for processing
*/
OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors,
std::string shaderFilePath);
std::string shaderFilePath,
KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup());
/**
* Constructor that enables raw shader data to be passed to the main operation
@ -78,12 +79,14 @@ class OpAlgoBase : public OpBase
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that are to be used in this operation
* @param shaderDataRaw Optional parameter to specify the shader data either in binary or raw form
* @param komputeWorkgroup Optional parameter to specify the layout for processing
*/
OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors,
const std::vector<char>& shaderDataRaw);
const std::vector<char>& shaderDataRaw,
KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup());
/**
* Default destructor, which is in charge of destroying the algorithm
@ -131,9 +134,7 @@ class OpAlgoBase : public OpBase
// -------------- ALWAYS OWNED RESOURCES
uint32_t mX;
uint32_t mY;
uint32_t mZ;
KomputeWorkgroup mKomputeWorkgroup;
std::string mShaderFilePath; ///< Optional member variable which can be provided for the OpAlgoBase to find the data automatically and load for processing
std::vector<char> mShaderDataRaw; ///< Optional member variable which can be provided to contain either the raw shader content or the spirv binary content
@ -143,174 +144,3 @@ class OpAlgoBase : public OpBase
} // End namespace kp
// Including implementation for template class
#ifndef OPALGOBASE_IMPL
#define OPALGOBASE_IMPL
namespace kp {
template<uint32_t tX, uint32_t tY, uint32_t tZ>
OpAlgoBase<tX, tY, tZ>::OpAlgoBase()
{
SPDLOG_DEBUG("Kompute OpAlgoBase constructor base");
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
OpAlgoBase<tX, tY, tZ>::OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors)
: OpBase(physicalDevice, device, commandBuffer, tensors, false)
{
SPDLOG_DEBUG("Kompute OpAlgoBase constructor with params numTensors: {}", tensors.size());
// The dispatch size is set up based on either explicitly provided template
// parameters or by default it would take the shape and size of the tensors
if (tX > 0) {
// If at least the x value is provided we use mainly the parameters
// provided
this->mX = tX;
this->mY = tY > 0 ? tY : 1;
this->mZ = tZ > 0 ? tZ : 1;
} else {
this->mX = tensors[0]->size();
this->mY = 1;
this->mZ = 1;
}
SPDLOG_INFO("Kompute OpAlgoBase dispatch size X: {}, Y: {}, Z: {}",
this->mX,
this->mY,
this->mZ);
this->mAlgorithm = std::make_shared<Algorithm>(device, commandBuffer);
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
OpAlgoBase<tX, tY, tZ>::OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors,
std::string shaderFilePath)
: OpAlgoBase(physicalDevice, device, commandBuffer, tensors)
{
SPDLOG_DEBUG("Kompute OpAlgoBase shaderFilePath constructo with shaderfile path: {}", shaderFilePath);
this->mShaderFilePath = shaderFilePath;
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
OpAlgoBase<tX, tY, tZ>::OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>>& tensors,
const std::vector<char>& shaderDataRaw)
: OpAlgoBase(physicalDevice, device, commandBuffer, tensors)
{
SPDLOG_DEBUG("Kompute OpAlgoBase shaderFilePath constructo with shader raw data length: {}", shaderDataRaw.size());
this->mShaderDataRaw = shaderDataRaw;
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
OpAlgoBase<tX, tY, tZ>::~OpAlgoBase()
{
SPDLOG_DEBUG("Kompute OpAlgoBase destructor started");
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
void
OpAlgoBase<tX, tY, tZ>::init()
{
SPDLOG_DEBUG("Kompute OpAlgoBase init called");
if (this->mTensors.size() < 1) {
throw std::runtime_error(
"Kompute OpAlgoBase called with less than 1 tensor");
}
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
if(!tensor->isInit()) {
throw std::runtime_error("Kompute OpAlgoBase validation failed; all tensor parameters must be initialised.");
}
}
SPDLOG_DEBUG("Kompute OpAlgoBase fetching spirv data");
std::vector<char> shaderFileData = this->fetchSpirvBinaryData();
SPDLOG_DEBUG("Kompute OpAlgoBase Initialising algorithm component");
this->mAlgorithm->init(shaderFileData, this->mTensors);
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
void
OpAlgoBase<tX, tY, tZ>::record()
{
SPDLOG_DEBUG("Kompute OpAlgoBase record called");
// Barrier to ensure the data is finished writing to buffer memory
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
tensor->recordBufferMemoryBarrier(
this->mCommandBuffer,
vk::AccessFlagBits::eHostWrite,
vk::AccessFlagBits::eShaderRead,
vk::PipelineStageFlagBits::eHost,
vk::PipelineStageFlagBits::eComputeShader);
}
this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ);
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
void
OpAlgoBase<tX, tY, tZ>::preEval()
{
SPDLOG_DEBUG("Kompute OpAlgoBase preEval called");
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
void
OpAlgoBase<tX, tY, tZ>::postEval()
{
SPDLOG_DEBUG("Kompute OpAlgoBase postSubmit called");
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
std::vector<char> OpAlgoBase<tX, tY, tZ>::fetchSpirvBinaryData()
{
SPDLOG_WARN(
"Kompute OpAlgoBase Running shaders directly from spirv file");
if (this->mShaderFilePath.size()) {
std::ifstream fileStream(this->mShaderFilePath,
std::ios::binary | std::ios::in | std::ios::ate);
if (!fileStream.good()) {
throw std::runtime_error("Error reading file: " + this->mShaderFilePath);
}
size_t shaderFileSize = fileStream.tellg();
fileStream.seekg(0, std::ios::beg);
char* shaderDataRaw = new char[shaderFileSize];
fileStream.read(shaderDataRaw, shaderFileSize);
fileStream.close();
SPDLOG_WARN(
"Kompute OpAlgoBase fetched {} bytes", shaderFileSize);
return std::vector<char>(shaderDataRaw,
shaderDataRaw + shaderFileSize);
}
else if (this->mShaderDataRaw.size()) {
return this->mShaderDataRaw;
}
else {
throw std::runtime_error("Kompute OpAlgoBase Error reached fetchSpirvBinaryData but neither filepath nor data provided");
}
}
}
#endif // #ifndef OPALGOBASE_IMPL

View file

@ -15,12 +15,8 @@ namespace kp {
* Operation base class to simplify the creation of operations that require
* right hand and left hand side datapoints together with a single output.
* The expected data passed is two input tensors and one output tensor.
* The template parameters specify the processing GPU layout number of
* iterations for each x, y, z parameter. More specifically, this will be the
* input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)"
*/
template<uint32_t tX = 0, uint32_t tY = 0, uint32_t tZ = 0>
class OpAlgoLhsRhsOut : public OpAlgoBase<tX, tY, tZ>
class OpAlgoLhsRhsOut : public OpAlgoBase
{
public:
/**
@ -38,11 +34,13 @@ class OpAlgoLhsRhsOut : public OpAlgoBase<tX, tY, tZ>
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that are to be used in this operation
* @param freeTensors Whether operation manages the memory of the Tensors
* @param komputeWorkgroup Optional parameter to specify the layout for processing
*/
OpAlgoLhsRhsOut(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>> tensors);
std::vector<std::shared_ptr<Tensor>> tensors,
KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup());
/**
* Default destructor, which is in charge of destroying the algorithm
@ -73,7 +71,7 @@ class OpAlgoLhsRhsOut : public OpAlgoBase<tX, tY, tZ>
* of the GPU Device memory into the staging buffer so the output data can
* be retrieved.
*/
virtual void postSubmit() override;
virtual void postEval() override;
protected:
// -------------- NEVER OWNED RESOURCES
@ -87,136 +85,3 @@ class OpAlgoLhsRhsOut : public OpAlgoBase<tX, tY, tZ>
} // End namespace kp
// Including implementation for template class
#ifndef OPALGOLHSRHSOUT_CPP
#define OPALGOLHSRHSOUT_CPP
namespace kp {
template<uint32_t tX, uint32_t tY, uint32_t tZ>
OpAlgoLhsRhsOut<tX, tY, tZ>::OpAlgoLhsRhsOut()
{
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor base");
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
OpAlgoLhsRhsOut<tX, tY, tZ>::OpAlgoLhsRhsOut(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>> tensors)
// The inheritance is initialised with the copyOutputData to false given that
// this depencendant class handles the transfer of data via staging buffers in
// a granular way.
: OpAlgoBase<tX, tY, tZ>(physicalDevice, device, commandBuffer, tensors)
{
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor with params");
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
OpAlgoLhsRhsOut<tX, tY, tZ>::~OpAlgoLhsRhsOut()
{
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut destructor started");
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
void
OpAlgoLhsRhsOut<tX, tY, tZ>::init()
{
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut init called");
if (this->mTensors.size() < 3) {
throw std::runtime_error(
"Kompute OpAlgoLhsRhsOut called with less than 1 tensor");
} else if (this->mTensors.size() > 3) {
SPDLOG_WARN("Kompute OpAlgoLhsRhsOut called with more than 3 this->mTensors");
}
this->mTensorLHS = this->mTensors[0];
this->mTensorRHS = this->mTensors[1];
this->mTensorOutput = this->mTensors[2];
if (!(this->mTensorLHS->isInit() && this->mTensorRHS->isInit() &&
this->mTensorOutput->isInit())) {
throw std::runtime_error(
"Kompute OpAlgoLhsRhsOut all tensor parameters must be initialised. LHS: " +
std::to_string(this->mTensorLHS->isInit()) +
" RHS: " + std::to_string(this->mTensorRHS->isInit()) +
" Output: " + std::to_string(this->mTensorOutput->isInit()));
}
if (!(this->mTensorLHS->size() == this->mTensorRHS->size() &&
this->mTensorRHS->size() == this->mTensorOutput->size())) {
throw std::runtime_error(
"Kompute OpAlgoLhsRhsOut all tensor parameters must be the same size LHS: " +
std::to_string(this->mTensorLHS->size()) +
" RHS: " + std::to_string(this->mTensorRHS->size()) +
" Output: " + std::to_string(this->mTensorOutput->size()));
}
this->mTensorOutputStaging = std::make_shared<Tensor>(
this->mTensorOutput->data(), Tensor::TensorTypes::eStaging);
this->mTensorOutputStaging->init(
this->mPhysicalDevice, this->mDevice, this->mCommandBuffer);
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut fetching spirv data");
std::vector<char> shaderFileData = this->fetchSpirvBinaryData();
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut Initialising algorithm component");
this->mAlgorithm->init(shaderFileData, this->mTensors);
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
void
OpAlgoLhsRhsOut<tX, tY, tZ>::record()
{
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut record called");
// Barrier to ensure the data is finished writing to buffer memory
this->mTensorLHS->recordBufferMemoryBarrier(
this->mCommandBuffer,
vk::AccessFlagBits::eHostWrite,
vk::AccessFlagBits::eShaderRead,
vk::PipelineStageFlagBits::eHost,
vk::PipelineStageFlagBits::eComputeShader);
this->mTensorRHS->recordBufferMemoryBarrier(
this->mCommandBuffer,
vk::AccessFlagBits::eHostWrite,
vk::AccessFlagBits::eShaderRead,
vk::PipelineStageFlagBits::eHost,
vk::PipelineStageFlagBits::eComputeShader);
this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ);
// Barrier to ensure the shader code is executed before buffer read
this->mTensorOutput->recordBufferMemoryBarrier(
this->mCommandBuffer,
vk::AccessFlagBits::eShaderWrite,
vk::AccessFlagBits::eTransferRead,
vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eTransfer);
this->mTensorOutputStaging->recordCopyFrom(
this->mCommandBuffer,
this->mTensorOutput,
true);
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
void
OpAlgoLhsRhsOut<tX, tY, tZ>::postSubmit()
{
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut postSubmit called");
this->mTensorOutputStaging->mapDataFromHostMemory();
this->mTensorOutput->setData(this->mTensorOutputStaging->data());
}
}
#endif // #ifndef OPALGOLHSRHSOUT_CPP

View file

@ -17,12 +17,9 @@ namespace kp {
/**
* Operation that performs multiplication on two tensors and outpus on third
* tensor. The template parameters specify the processing GPU layout number of
* iterations for each x, y, z parameter. More specifically, this will be the
* input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)"
* tensor.
*/
template<uint32_t tX = 0, uint32_t tY = 0, uint32_t tZ = 0>
class OpMult : public OpAlgoBase<tX, tY, tZ>
class OpMult : public OpAlgoBase
{
public:
/**
@ -41,13 +38,14 @@ class OpMult : public OpAlgoBase<tX, tY, tZ>
* @param device Vulkan logical device for passing to Algorithm
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that are to be used in this operation
* @param freeTensors Whether operation manages the memory of the Tensors
* @param komputeWorkgroup Optional parameter to specify the layout for processing
*/
OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>> tensors)
: OpAlgoBase<tX, tY, tZ>(physicalDevice, device, commandBuffer, tensors, "")
std::vector<std::shared_ptr<Tensor>> tensors,
KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup())
: OpAlgoBase(physicalDevice, device, commandBuffer, tensors, "", komputeWorkgroup)
{
SPDLOG_DEBUG("Kompute OpMult constructor with params");
@ -58,14 +56,8 @@ class OpMult : public OpAlgoBase<tX, tY, tZ>
#if RELEASE
/**
* If release it will be using the static version of the shader which is
* loaded using this file directly.
*
* @param physicalDevice Vulkan physical device used to find device queues
* @param device Vulkan logical device for passing to Algorithm
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that are to be used in this operation
* @param freeTensors Whether operation manages the memory of the Tensors
* If RELEASE=1 it will be using the static version of the shader which is
* loaded using this file directly. Otherwise it should not override the function.
*/
std::vector<char> fetchSpirvBinaryData() override
{