Updated single include with non-templated opalgobase classes
This commit is contained in:
parent
552a6c051f
commit
b0d394a50b
1 changed files with 31 additions and 343 deletions
|
|
@ -1620,20 +1620,17 @@ namespace kp {
|
|||
* Operation that provides a general abstraction that simplifies the use of
|
||||
* algorithm and parameter components which can be used with shaders.
|
||||
* By default it enables the user to provide a dynamic number of tensors
|
||||
* which are then passed as inputs.
|
||||
*
|
||||
* All of these tensors are expected to be initlaised and this is checked with throw std exception in the init function.
|
||||
*
|
||||
* See OpLhsRhsOut for an example implementation on a more specific granularity on tensor parameters.
|
||||
*
|
||||
* The template parameters specify the processing GPU layout number of
|
||||
* iterations for each x, y, z parameter. More specifically, this will be the
|
||||
* input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)"
|
||||
* which are then passed as inputs.
|
||||
*/
|
||||
template<uint32_t tX = 0, uint32_t tY = 0, uint32_t tZ = 0>
|
||||
class OpAlgoBase : public OpBase
|
||||
{
|
||||
public:
|
||||
struct KomputeWorkgroup {
|
||||
uint32_t x;
|
||||
uint32_t y;
|
||||
uint32_t z;
|
||||
};
|
||||
|
||||
/**
|
||||
* Base constructor, should not be used unless explicitly intended.
|
||||
*/
|
||||
|
|
@ -1649,11 +1646,13 @@ class OpAlgoBase : public OpBase
|
|||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
* @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format)
|
||||
* @param komputeWorkgroup Optional parameter to specify the layout for processing
|
||||
*/
|
||||
OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors);
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup());
|
||||
|
||||
/**
|
||||
* Constructor that enables a file to be passed to the operation with
|
||||
|
|
@ -1664,13 +1663,15 @@ class OpAlgoBase : public OpBase
|
|||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
* @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format)
|
||||
* @param shaderFilePath Parameter to specify the shader to load (either in spirv or raw format)
|
||||
* @param komputeWorkgroup Optional parameter to specify the layout for processing
|
||||
*/
|
||||
OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
std::string shaderFilePath);
|
||||
std::string shaderFilePath,
|
||||
KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup());
|
||||
|
||||
/**
|
||||
* Constructor that enables raw shader data to be passed to the main operation
|
||||
|
|
@ -1681,12 +1682,14 @@ class OpAlgoBase : public OpBase
|
|||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
* @param shaderDataRaw Optional parameter to specify the shader data either in binary or raw form
|
||||
* @param komputeWorkgroup Optional parameter to specify the layout for processing
|
||||
*/
|
||||
OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::vector<char>& shaderDataRaw);
|
||||
const std::vector<char>& shaderDataRaw,
|
||||
KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup());
|
||||
|
||||
/**
|
||||
* Default destructor, which is in charge of destroying the algorithm
|
||||
|
|
@ -1733,9 +1736,7 @@ class OpAlgoBase : public OpBase
|
|||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
|
||||
uint32_t mX;
|
||||
uint32_t mY;
|
||||
uint32_t mZ;
|
||||
KomputeWorkgroup mKomputeWorkgroup;
|
||||
|
||||
std::string mShaderFilePath; ///< Optional member variable which can be provided for the OpAlgoBase to find the data automatically and load for processing
|
||||
std::vector<char> mShaderDataRaw; ///< Optional member variable which can be provided to contain either the raw shader content or the spirv binary content
|
||||
|
|
@ -1745,177 +1746,6 @@ class OpAlgoBase : public OpBase
|
|||
|
||||
} // End namespace kp
|
||||
|
||||
// Including implementation for template class
|
||||
#ifndef OPALGOBASE_IMPL
|
||||
#define OPALGOBASE_IMPL
|
||||
|
||||
namespace kp {
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
OpAlgoBase<tX, tY, tZ>::OpAlgoBase()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoBase constructor base");
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
OpAlgoBase<tX, tY, tZ>::OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors)
|
||||
: OpBase(physicalDevice, device, commandBuffer, tensors, false)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoBase constructor with params numTensors: {}", tensors.size());
|
||||
|
||||
// The dispatch size is set up based on either explicitly provided template
|
||||
// parameters or by default it would take the shape and size of the tensors
|
||||
if (tX > 0) {
|
||||
// If at least the x value is provided we use mainly the parameters
|
||||
// provided
|
||||
this->mX = tX;
|
||||
this->mY = tY > 0 ? tY : 1;
|
||||
this->mZ = tZ > 0 ? tZ : 1;
|
||||
} else {
|
||||
this->mX = tensors[0]->size();
|
||||
this->mY = 1;
|
||||
this->mZ = 1;
|
||||
}
|
||||
SPDLOG_INFO("Kompute OpAlgoBase dispatch size X: {}, Y: {}, Z: {}",
|
||||
this->mX,
|
||||
this->mY,
|
||||
this->mZ);
|
||||
|
||||
this->mAlgorithm = std::make_shared<Algorithm>(device, commandBuffer);
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
OpAlgoBase<tX, tY, tZ>::OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
std::string shaderFilePath)
|
||||
: OpAlgoBase(physicalDevice, device, commandBuffer, tensors)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoBase shaderFilePath constructo with shaderfile path: {}", shaderFilePath);
|
||||
|
||||
this->mShaderFilePath = shaderFilePath;
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
OpAlgoBase<tX, tY, tZ>::OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::vector<char>& shaderDataRaw)
|
||||
: OpAlgoBase(physicalDevice, device, commandBuffer, tensors)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoBase shaderFilePath constructo with shader raw data length: {}", shaderDataRaw.size());
|
||||
|
||||
this->mShaderDataRaw = shaderDataRaw;
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
OpAlgoBase<tX, tY, tZ>::~OpAlgoBase()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoBase destructor started");
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
void
|
||||
OpAlgoBase<tX, tY, tZ>::init()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoBase init called");
|
||||
|
||||
if (this->mTensors.size() < 1) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpAlgoBase called with less than 1 tensor");
|
||||
}
|
||||
|
||||
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
|
||||
if(!tensor->isInit()) {
|
||||
throw std::runtime_error("Kompute OpAlgoBase validation failed; all tensor parameters must be initialised.");
|
||||
}
|
||||
}
|
||||
|
||||
SPDLOG_DEBUG("Kompute OpAlgoBase fetching spirv data");
|
||||
|
||||
std::vector<char> shaderFileData = this->fetchSpirvBinaryData();
|
||||
|
||||
SPDLOG_DEBUG("Kompute OpAlgoBase Initialising algorithm component");
|
||||
|
||||
this->mAlgorithm->init(shaderFileData, this->mTensors);
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
void
|
||||
OpAlgoBase<tX, tY, tZ>::record()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoBase record called");
|
||||
|
||||
// Barrier to ensure the data is finished writing to buffer memory
|
||||
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
|
||||
tensor->recordBufferMemoryBarrier(
|
||||
this->mCommandBuffer,
|
||||
vk::AccessFlagBits::eHostWrite,
|
||||
vk::AccessFlagBits::eShaderRead,
|
||||
vk::PipelineStageFlagBits::eHost,
|
||||
vk::PipelineStageFlagBits::eComputeShader);
|
||||
}
|
||||
|
||||
this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ);
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
void
|
||||
OpAlgoBase<tX, tY, tZ>::preEval()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoBase preEval called");
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
void
|
||||
OpAlgoBase<tX, tY, tZ>::postEval()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoBase postSubmit called");
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
std::vector<char> OpAlgoBase<tX, tY, tZ>::fetchSpirvBinaryData()
|
||||
{
|
||||
SPDLOG_WARN(
|
||||
"Kompute OpAlgoBase Running shaders directly from spirv file");
|
||||
|
||||
if (this->mShaderFilePath.size()) {
|
||||
std::ifstream fileStream(this->mShaderFilePath,
|
||||
std::ios::binary | std::ios::in | std::ios::ate);
|
||||
|
||||
if (!fileStream.good()) {
|
||||
throw std::runtime_error("Error reading file: " + this->mShaderFilePath);
|
||||
}
|
||||
|
||||
size_t shaderFileSize = fileStream.tellg();
|
||||
fileStream.seekg(0, std::ios::beg);
|
||||
char* shaderDataRaw = new char[shaderFileSize];
|
||||
fileStream.read(shaderDataRaw, shaderFileSize);
|
||||
fileStream.close();
|
||||
|
||||
SPDLOG_WARN(
|
||||
"Kompute OpAlgoBase fetched {} bytes", shaderFileSize);
|
||||
|
||||
return std::vector<char>(shaderDataRaw,
|
||||
shaderDataRaw + shaderFileSize);
|
||||
}
|
||||
else if (this->mShaderDataRaw.size()) {
|
||||
return this->mShaderDataRaw;
|
||||
}
|
||||
else {
|
||||
throw std::runtime_error("Kompute OpAlgoBase Error reached fetchSpirvBinaryData but neither filepath nor data provided");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif // #ifndef OPALGOBASE_IMPL
|
||||
|
||||
#include <fstream>
|
||||
|
||||
namespace kp {
|
||||
|
|
@ -1924,12 +1754,8 @@ namespace kp {
|
|||
* Operation base class to simplify the creation of operations that require
|
||||
* right hand and left hand side datapoints together with a single output.
|
||||
* The expected data passed is two input tensors and one output tensor.
|
||||
* The template parameters specify the processing GPU layout number of
|
||||
* iterations for each x, y, z parameter. More specifically, this will be the
|
||||
* input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)"
|
||||
*/
|
||||
template<uint32_t tX = 0, uint32_t tY = 0, uint32_t tZ = 0>
|
||||
class OpAlgoLhsRhsOut : public OpAlgoBase<tX, tY, tZ>
|
||||
class OpAlgoLhsRhsOut : public OpAlgoBase
|
||||
{
|
||||
public:
|
||||
/**
|
||||
|
|
@ -1947,11 +1773,13 @@ class OpAlgoLhsRhsOut : public OpAlgoBase<tX, tY, tZ>
|
|||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
* @param freeTensors Whether operation manages the memory of the Tensors
|
||||
* @param komputeWorkgroup Optional parameter to specify the layout for processing
|
||||
*/
|
||||
OpAlgoLhsRhsOut(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors);
|
||||
std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup());
|
||||
|
||||
/**
|
||||
* Default destructor, which is in charge of destroying the algorithm
|
||||
|
|
@ -1982,7 +1810,7 @@ class OpAlgoLhsRhsOut : public OpAlgoBase<tX, tY, tZ>
|
|||
* of the GPU Device memory into the staging buffer so the output data can
|
||||
* be retrieved.
|
||||
*/
|
||||
virtual void postSubmit() override;
|
||||
virtual void postEval() override;
|
||||
|
||||
protected:
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
|
|
@ -1996,138 +1824,6 @@ class OpAlgoLhsRhsOut : public OpAlgoBase<tX, tY, tZ>
|
|||
|
||||
} // End namespace kp
|
||||
|
||||
// Including implementation for template class
|
||||
#ifndef OPALGOLHSRHSOUT_CPP
|
||||
#define OPALGOLHSRHSOUT_CPP
|
||||
|
||||
namespace kp {
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
OpAlgoLhsRhsOut<tX, tY, tZ>::OpAlgoLhsRhsOut()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor base");
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
OpAlgoLhsRhsOut<tX, tY, tZ>::OpAlgoLhsRhsOut(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors)
|
||||
// The inheritance is initialised with the copyOutputData to false given that
|
||||
// this depencendant class handles the transfer of data via staging buffers in
|
||||
// a granular way.
|
||||
: OpAlgoBase<tX, tY, tZ>(physicalDevice, device, commandBuffer, tensors)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor with params");
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
OpAlgoLhsRhsOut<tX, tY, tZ>::~OpAlgoLhsRhsOut()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut destructor started");
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
void
|
||||
OpAlgoLhsRhsOut<tX, tY, tZ>::init()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut init called");
|
||||
|
||||
if (this->mTensors.size() < 3) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpAlgoLhsRhsOut called with less than 1 tensor");
|
||||
} else if (this->mTensors.size() > 3) {
|
||||
SPDLOG_WARN("Kompute OpAlgoLhsRhsOut called with more than 3 this->mTensors");
|
||||
}
|
||||
|
||||
this->mTensorLHS = this->mTensors[0];
|
||||
this->mTensorRHS = this->mTensors[1];
|
||||
this->mTensorOutput = this->mTensors[2];
|
||||
|
||||
if (!(this->mTensorLHS->isInit() && this->mTensorRHS->isInit() &&
|
||||
this->mTensorOutput->isInit())) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpAlgoLhsRhsOut all tensor parameters must be initialised. LHS: " +
|
||||
std::to_string(this->mTensorLHS->isInit()) +
|
||||
" RHS: " + std::to_string(this->mTensorRHS->isInit()) +
|
||||
" Output: " + std::to_string(this->mTensorOutput->isInit()));
|
||||
}
|
||||
|
||||
if (!(this->mTensorLHS->size() == this->mTensorRHS->size() &&
|
||||
this->mTensorRHS->size() == this->mTensorOutput->size())) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpAlgoLhsRhsOut all tensor parameters must be the same size LHS: " +
|
||||
std::to_string(this->mTensorLHS->size()) +
|
||||
" RHS: " + std::to_string(this->mTensorRHS->size()) +
|
||||
" Output: " + std::to_string(this->mTensorOutput->size()));
|
||||
}
|
||||
|
||||
this->mTensorOutputStaging = std::make_shared<Tensor>(
|
||||
this->mTensorOutput->data(), Tensor::TensorTypes::eStaging);
|
||||
|
||||
this->mTensorOutputStaging->init(
|
||||
this->mPhysicalDevice, this->mDevice, this->mCommandBuffer);
|
||||
|
||||
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut fetching spirv data");
|
||||
|
||||
std::vector<char> shaderFileData = this->fetchSpirvBinaryData();
|
||||
|
||||
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut Initialising algorithm component");
|
||||
|
||||
this->mAlgorithm->init(shaderFileData, this->mTensors);
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
void
|
||||
OpAlgoLhsRhsOut<tX, tY, tZ>::record()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut record called");
|
||||
|
||||
// Barrier to ensure the data is finished writing to buffer memory
|
||||
this->mTensorLHS->recordBufferMemoryBarrier(
|
||||
this->mCommandBuffer,
|
||||
vk::AccessFlagBits::eHostWrite,
|
||||
vk::AccessFlagBits::eShaderRead,
|
||||
vk::PipelineStageFlagBits::eHost,
|
||||
vk::PipelineStageFlagBits::eComputeShader);
|
||||
this->mTensorRHS->recordBufferMemoryBarrier(
|
||||
this->mCommandBuffer,
|
||||
vk::AccessFlagBits::eHostWrite,
|
||||
vk::AccessFlagBits::eShaderRead,
|
||||
vk::PipelineStageFlagBits::eHost,
|
||||
vk::PipelineStageFlagBits::eComputeShader);
|
||||
|
||||
this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ);
|
||||
|
||||
// Barrier to ensure the shader code is executed before buffer read
|
||||
this->mTensorOutput->recordBufferMemoryBarrier(
|
||||
this->mCommandBuffer,
|
||||
vk::AccessFlagBits::eShaderWrite,
|
||||
vk::AccessFlagBits::eTransferRead,
|
||||
vk::PipelineStageFlagBits::eComputeShader,
|
||||
vk::PipelineStageFlagBits::eTransfer);
|
||||
|
||||
this->mTensorOutputStaging->recordCopyFrom(
|
||||
this->mCommandBuffer,
|
||||
this->mTensorOutput,
|
||||
true);
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
void
|
||||
OpAlgoLhsRhsOut<tX, tY, tZ>::postSubmit()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut postSubmit called");
|
||||
|
||||
this->mTensorOutputStaging->mapDataFromHostMemory();
|
||||
|
||||
this->mTensorOutput->setData(this->mTensorOutputStaging->data());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif // #ifndef OPALGOLHSRHSOUT_CPP
|
||||
|
||||
#include <fstream>
|
||||
|
||||
#if RELEASE
|
||||
|
|
@ -2138,12 +1834,9 @@ namespace kp {
|
|||
|
||||
/**
|
||||
* Operation that performs multiplication on two tensors and outpus on third
|
||||
* tensor. The template parameters specify the processing GPU layout number of
|
||||
* iterations for each x, y, z parameter. More specifically, this will be the
|
||||
* input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)"
|
||||
* tensor.
|
||||
*/
|
||||
template<uint32_t tX = 0, uint32_t tY = 0, uint32_t tZ = 0>
|
||||
class OpMult : public OpAlgoBase<tX, tY, tZ>
|
||||
class OpMult : public OpAlgoBase
|
||||
{
|
||||
public:
|
||||
/**
|
||||
|
|
@ -2162,13 +1855,14 @@ class OpMult : public OpAlgoBase<tX, tY, tZ>
|
|||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
* @param freeTensors Whether operation manages the memory of the Tensors
|
||||
* @param komputeWorkgroup Optional parameter to specify the layout for processing
|
||||
*/
|
||||
OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors)
|
||||
: OpAlgoBase<tX, tY, tZ>(physicalDevice, device, commandBuffer, tensors, "")
|
||||
std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
KomputeWorkgroup komputeWorkgroup = KomputeWorkgroup())
|
||||
: OpAlgoBase(physicalDevice, device, commandBuffer, tensors, "", komputeWorkgroup)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpMult constructor with params");
|
||||
|
||||
|
|
@ -2179,14 +1873,8 @@ class OpMult : public OpAlgoBase<tX, tY, tZ>
|
|||
|
||||
#if RELEASE
|
||||
/**
|
||||
* If release it will be using the static version of the shader which is
|
||||
* loaded using this file directly.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device used to find device queues
|
||||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
* @param freeTensors Whether operation manages the memory of the Tensors
|
||||
* If RELEASE=1 it will be using the static version of the shader which is
|
||||
* loaded using this file directly. Otherwise it should not override the function.
|
||||
*/
|
||||
std::vector<char> fetchSpirvBinaryData() override
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue