diff --git a/README.md b/README.md
index 26bc2bb36..1d191ecc3 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@
Vulkan Kompute
-The General Purpose Vulkan Compute Framework
+The General Purpose Vulkan Compute Framework. Blazing fast, lightweight, easy to set up and optimized for advanced data processing usecases.
|
diff --git a/single_include/AggregateHeaders.cpp b/single_include/AggregateHeaders.cpp
index 5a86ac9cd..779bcd29b 100644
--- a/single_include/AggregateHeaders.cpp
+++ b/single_include/AggregateHeaders.cpp
@@ -3,6 +3,7 @@
#include "kompute/Manager.hpp"
#include "kompute/Sequence.hpp"
#include "kompute/operations/OpBase.hpp"
+#include "kompute/operations/OpAlgoBase.hpp"
#include "kompute/operations/OpMult.hpp"
#include "kompute/operations/OpCreateTensor.hpp"
#include "kompute/Algorithm.hpp"
diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp
index 6b0f21246..cb13b744f 100755
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@@ -281,15 +281,18 @@ class Tensor
void mapDataIntoHostMemory();
private:
+ // -------------- NEVER OWNED RESOURCES
std::shared_ptr mPhysicalDevice;
std::shared_ptr mDevice;
std::shared_ptr mCommandBuffer;
+ // -------------- OPTIONALLY OWNED RESOURCES
std::shared_ptr mBuffer;
bool mFreeBuffer;
std::shared_ptr mMemory;
bool mFreeMemory;
+ // -------------- ALWAYS OWNED RESOURCES
std::vector mData;
TensorTypes mTensorType = TensorTypes::eDevice;
@@ -297,8 +300,7 @@ class Tensor
std::array mShape;
bool mIsInit = false;
- // Creates the vulkan buffer
- void createBuffer();
+ void createBuffer(); // Creates the vulkan buffer
// Private util functions
vk::BufferUsageFlags getBufferUsageFlags();
@@ -400,19 +402,19 @@ class OpBase
virtual void postSubmit() = 0;
protected:
- // OPTIONALLY OWNED RESOURCES
- std::vector>
- mTensors; ///< Tensors referenced by operation that can be managed
- ///< optionally by operation
- bool mFreeTensors = false; ///< Explicit boolean that specifies whether the
- ///< tensors are freed (if they are managed)
-
- // NEVER OWNED RESOURCES
+ // -------------- NEVER OWNED RESOURCES
std::shared_ptr
mPhysicalDevice; ///< Vulkan Physical Device
std::shared_ptr mDevice; ///< Vulkan Logical Device
std::shared_ptr
mCommandBuffer; ///< Vulkan Command Buffer
+
+ // -------------- OPTIONALLY OWNED RESOURCES
+ std::vector>
+ mTensors; ///< Tensors referenced by operation that can be managed
+ ///< optionally by operation
+ bool mFreeTensors = false; ///< Explicit boolean that specifies whether the
+ ///< tensors are freed (if they are managed)
};
} // End namespace kp
@@ -517,10 +519,13 @@ class Sequence
}
private:
+ // -------------- NEVER OWNED RESOURCES
std::shared_ptr mPhysicalDevice = nullptr;
std::shared_ptr mDevice = nullptr;
std::shared_ptr mComputeQueue = nullptr;
uint32_t mQueueIndex = -1;
+
+ // -------------- OPTIONALLY OWNED RESOURCES
std::shared_ptr mCommandPool = nullptr;
bool mFreeCommandPool = false;
std::shared_ptr mCommandBuffer = nullptr;
@@ -618,6 +623,7 @@ class Manager
}
private:
+ // -------------- OPTIONALLY OWNED RESOURCES
std::shared_ptr mInstance = nullptr;
bool mFreeInstance = false;
std::shared_ptr mPhysicalDevice = nullptr;
@@ -627,7 +633,7 @@ class Manager
uint32_t mComputeQueueFamilyIndex = -1;
std::shared_ptr mComputeQueue = nullptr;
- // Always owned resources
+ // -------------- ALWAYS OWNED RESOURCES
std::unordered_map>
mManagedSequences;
@@ -691,16 +697,15 @@ class Algorithm
void recordDispatch(uint32_t x = 1, uint32_t y = 1, uint32_t z = 1);
private:
- // Never Owned Resources
+ // -------------- NEVER OWNED RESOURCES
std::shared_ptr mDevice;
std::shared_ptr mCommandBuffer;
- // Optionally owned resources
+ // -------------- OPTIONALLY OWNED RESOURCES
std::shared_ptr mDescriptorSetLayout;
bool mFreeDescriptorSetLayout = false;
std::shared_ptr mDescriptorPool;
bool mFreeDescriptorPool = false;
-
// TODO: Explore design for multiple descriptor sets
std::shared_ptr mDescriptorSet;
bool mFreeDescriptorSet = false;
@@ -726,6 +731,195 @@ class Algorithm
namespace kp {
+/**
+ * Operation that provides a general abstraction that simplifies the use of
+ * algorithm and parameter components which can be used with shaders.
+ * The template parameters specify the processing GPU layout number of
+ * iterations for each x, y, z parameter. More specifically, this will be the
+ * input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)"
+ */
+template
+class OpAlgoBase : public OpBase
+{
+ public:
+ /**
+ * Base constructor, should not be used unless explicitly intended.
+ */
+ OpAlgoBase();
+
+ /**
+ * Default constructor with parameters that provides the bare minimum
+ * requirements for the operations to be able to create and manage their
+ * sub-components.
+ *
+ * @param physicalDevice Vulkan physical device used to find device queues
+ * @param device Vulkan logical device for passing to Algorithm
+ * @param commandBuffer Vulkan Command Buffer to record commands into
+ * @param tensors Tensors that are to be used in this operation
+ */
+ OpAlgoBase(std::shared_ptr physicalDevice,
+ std::shared_ptr device,
+ std::shared_ptr commandBuffer,
+ std::vector>& tensors);
+
+ /**
+ * Default destructor, which is in charge of destroying the algorithm
+ * components but does not destroy the underlying tensors
+ */
+ ~OpAlgoBase();
+
+ /**
+ * The init function is responsible for the initialisation of the algorithm
+ * component based on the parameters specified, and allows for extensibility
+ * on the options provided. Further dependent classes can perform more
+ * specific checks such as ensuring tensors provided are initialised, etc.
+ */
+ virtual void init() override;
+
+ /**
+ * This records the commands that are to be sent to the GPU. This includes
+ * the barriers that ensure the memory has been copied before going in and
+ * out of the shader, as well as the dispatch operation that sends the
+ * shader processing to the gpu. This function also records the GPU memory
+ * copy of the output data for the staging bufffer so it can be read by the
+ * host.
+ */
+ virtual void record() override;
+
+ /**
+ * Executes after the recorded commands are submitted, and performs a copy
+ * of the GPU Device memory into the staging buffer so the output data can
+ * be retrieved.
+ */
+ virtual void postSubmit() override;
+
+ protected:
+ // -------------- NEVER OWNED RESOURCES
+
+ // -------------- OPTIONALLY OWNED RESOURCES
+ std::shared_ptr mAlgorithm;
+ bool mFreeAlgorithm = false;
+
+ // -------------- ALWAYS OWNED RESOURCES
+ uint32_t mX;
+ uint32_t mY;
+ uint32_t mZ;
+
+ std::string mOptSpirvBinPath; ///< Optional member variable which can be provided for the OpAlgoBase to find the data automatically and load for processing
+
+ virtual std::vector fetchSpirvBinaryData();
+};
+
+} // End namespace kp
+
+// Including implemenation for template class
+#ifndef OPALGOBASE_IMPL
+#define OPALGOBASE_IMPL
+
+namespace kp {
+
+template
+OpAlgoBase::OpAlgoBase()
+{
+ SPDLOG_DEBUG("Kompute OpAlgoBase constructor base");
+}
+
+template
+OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice,
+ std::shared_ptr device,
+ std::shared_ptr commandBuffer,
+ std::vector>& tensors)
+ : OpBase(physicalDevice, device, commandBuffer, tensors, false)
+{
+ SPDLOG_DEBUG("Kompute OpAlgoBase constructor with params");
+
+ // The dispatch size is set up based on either explicitly provided template
+ // parameters or by default it would take the shape and size of the tensors
+ if (tX > 0) {
+ // If at least the x value is provided we use mainly the parameters
+ // provided
+ this->mX = tX;
+ this->mY = tY > 0 ? tY : 1;
+ this->mZ = tZ > 0 ? tZ : 1;
+ } else {
+ // TODO: If tensor empty vector exception would be thrown
+ // TODO: Fully support the full size dispatch using size for the shape
+ this->mX = tensors[0]->size();
+ this->mY = 1;
+ this->mZ = 1;
+ }
+ spdlog::info("Kompute OpAlgoBase dispatch size X: {}, Y: {}, Z: {}",
+ this->mX,
+ this->mY,
+ this->mZ);
+
+ this->mAlgorithm = std::make_shared(device, commandBuffer);
+}
+
+template
+OpAlgoBase::~OpAlgoBase()
+{
+ SPDLOG_DEBUG("Kompute OpAlgoBase destructor started");
+}
+
+template
+std::vector OpAlgoBase::fetchSpirvBinaryData()
+{
+ SPDLOG_WARN(
+ "Kompute OpAlgoBase Running shaders directly from spirv file");
+
+ std::ifstream fileStream(this->mOptSpirvBinPath,
+ std::ios::binary | std::ios::in | std::ios::ate);
+
+ size_t shaderFileSize = fileStream.tellg();
+ fileStream.seekg(0, std::ios::beg);
+ char* shaderDataRaw = new char[shaderFileSize];
+ fileStream.read(shaderDataRaw, shaderFileSize);
+ fileStream.close();
+
+ return std::vector(shaderDataRaw,
+ shaderDataRaw + shaderFileSize);
+}
+
+template
+void
+OpAlgoBase::init()
+{
+ SPDLOG_DEBUG("Kompute OpAlgoBase init called");
+
+ std::vector shaderFileData = this->fetchSpirvBinaryData();
+
+ this->mAlgorithm->init(shaderFileData, this->mTensors);
+}
+
+template
+void
+OpAlgoBase::record()
+{
+ SPDLOG_DEBUG("Kompute OpAlgoBase record called");
+
+ this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ);
+}
+
+template
+void
+OpAlgoBase::postSubmit()
+{
+ SPDLOG_DEBUG("Kompute OpAlgoBase postSubmit called");
+}
+
+}
+
+#endif // #ifndef OPALGOBASE_IMPL
+
+#include
+
+#if RELEASE
+
+#endif
+
+namespace kp {
+
/**
* Operation that performs multiplication on two tensors and outpus on third
* tensor. The template parameters specify the processing GPU layout number of
@@ -733,7 +927,7 @@ namespace kp {
* input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)"
*/
template
-class OpMult : public OpBase
+class OpMult : public OpAlgoBase
{
public:
/**
@@ -755,8 +949,7 @@ class OpMult : public OpBase
OpMult(std::shared_ptr physicalDevice,
std::shared_ptr device,
std::shared_ptr commandBuffer,
- std::vector>& tensors,
- bool freeTensors = false);
+ std::vector>& tensors);
/**
* Default destructor, which is in charge of destroying the algorithm
@@ -790,21 +983,13 @@ class OpMult : public OpBase
void postSubmit() override;
private:
- // Always owned resources
- std::shared_ptr mTensorOutputStaging;
-
- // Optionally owned resources
- std::shared_ptr mAlgorithm;
- bool mFreeAlgorithm = false;
-
- // Never owned resources
+ // -------------- NEVER OWNED RESOURCES
std::shared_ptr mTensorLHS;
std::shared_ptr mTensorRHS;
std::shared_ptr mTensorOutput;
- uint32_t mX;
- uint32_t mY;
- uint32_t mZ;
+ // -------------- ALWAYS OWNED RESOURCES
+ std::shared_ptr mTensorOutputStaging;
};
} // End namespace kp
@@ -825,13 +1010,10 @@ template
OpMult::OpMult(std::shared_ptr physicalDevice,
std::shared_ptr device,
std::shared_ptr commandBuffer,
- std::vector>& tensors,
- bool freeTensors)
- : OpBase(physicalDevice, device, commandBuffer, tensors, freeTensors)
+ std::vector>& tensors)
+ : OpAlgoBase(physicalDevice, device, commandBuffer, tensors)
{
SPDLOG_DEBUG("Kompute OpMult constructor with params");
-
- this->mAlgorithm = std::make_shared(device, commandBuffer);
}
template
@@ -857,25 +1039,6 @@ OpMult::init()
this->mTensorRHS = this->mTensors[1];
this->mTensorOutput = this->mTensors[2];
- // The dispatch size is set up based on either explicitly provided template
- // parameters or by default it would take the shape and size of the tensors
- if (tX > 0) {
- // If at least the x value is provided we use mainly the parameters
- // provided
- this->mX = tX;
- this->mY = tY > 0 ? tY : 1;
- this->mZ = tZ > 0 ? tZ : 1;
- } else {
- // TODO: Fully support the full size dispatch using size for the shape
- this->mX = this->mTensorLHS->size();
- this->mY = 1;
- this->mZ = 1;
- }
- spdlog::info("Kompute OpMult dispatch size X: {}, Y: {}, Z: {}",
- this->mX,
- this->mY,
- this->mZ);
-
// TODO: Explore adding a validate function
if (!(this->mTensorLHS->isInit() && this->mTensorRHS->isInit() &&
this->mTensorOutput->isInit())) {
@@ -909,25 +1072,12 @@ OpMult::init()
shader_data::shaders_glsl_opmult_comp_spv +
kp::shader_data::shaders_glsl_opmult_comp_spv_len);
#else
- SPDLOG_DEBUG(
- "Kompute OpMult Running debug loading shaders directly from spirv file");
-
- // TODO: Move to utility function
- std::string shaderFilePath = "shaders/glsl/opmult.comp.spv";
- std::ifstream fileStream(shaderFilePath,
- std::ios::binary | std::ios::in | std::ios::ate);
-
- size_t shaderFileSize = fileStream.tellg();
- fileStream.seekg(0, std::ios::beg);
- char* shaderDataRaw = new char[shaderFileSize];
- fileStream.read(shaderDataRaw, shaderFileSize);
- fileStream.close();
-
- std::vector shaderFileData(shaderDataRaw,
- shaderDataRaw + shaderFileSize);
+ this->mOptSpirvBinPath = "shaders/glsl/opmult.comp.spv";
+ std::vector& shaderFileData = this->fetchSpirvBinaryData();
#endif
SPDLOG_DEBUG("Kompute OpMult Initialising algorithm component");
+ SPDLOG_DEBUG("Kompute vector size {}", shaderFileData.size());
this->mAlgorithm->init(shaderFileData, this->mTensors);
}
@@ -1019,8 +1169,7 @@ class OpCreateTensor : public OpBase
OpCreateTensor(std::shared_ptr physicalDevice,
std::shared_ptr device,
std::shared_ptr commandBuffer,
- std::vector>& tensors,
- bool freeTensors = true);
+ std::vector>& tensors);
/**
* Default destructor which in this case expects the parent class to free
diff --git a/src/OpCreateTensor.cpp b/src/OpCreateTensor.cpp
index b82962255..ee0c2504f 100644
--- a/src/OpCreateTensor.cpp
+++ b/src/OpCreateTensor.cpp
@@ -14,9 +14,8 @@ OpCreateTensor::OpCreateTensor(
std::shared_ptr physicalDevice,
std::shared_ptr device,
std::shared_ptr commandBuffer,
- std::vector>& tensors,
- bool freeTensors)
- : OpBase(physicalDevice, device, commandBuffer, tensors, freeTensors)
+ std::vector>& tensors)
+ : OpBase(physicalDevice, device, commandBuffer, tensors, true)
{
SPDLOG_DEBUG("Kompute OpCreateTensor constructor with params");
}
diff --git a/src/include/kompute/operations/OpAlgoBase.hpp b/src/include/kompute/operations/OpAlgoBase.hpp
new file mode 100644
index 000000000..b224dea14
--- /dev/null
+++ b/src/include/kompute/operations/OpAlgoBase.hpp
@@ -0,0 +1,196 @@
+#pragma once
+
+#include
+
+#include "kompute/Core.hpp"
+
+#include "kompute/shaders/shaderopmult.hpp"
+
+#include "kompute/Algorithm.hpp"
+#include "kompute/Tensor.hpp"
+
+#include "kompute/operations/OpBase.hpp"
+
+namespace kp {
+
+/**
+ * Operation that provides a general abstraction that simplifies the use of
+ * algorithm and parameter components which can be used with shaders.
+ * The template parameters specify the processing GPU layout number of
+ * iterations for each x, y, z parameter. More specifically, this will be the
+ * input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)"
+ */
+template
+class OpAlgoBase : public OpBase
+{
+ public:
+ /**
+ * Base constructor, should not be used unless explicitly intended.
+ */
+ OpAlgoBase();
+
+ /**
+ * Default constructor with parameters that provides the bare minimum
+ * requirements for the operations to be able to create and manage their
+ * sub-components.
+ *
+ * @param physicalDevice Vulkan physical device used to find device queues
+ * @param device Vulkan logical device for passing to Algorithm
+ * @param commandBuffer Vulkan Command Buffer to record commands into
+ * @param tensors Tensors that are to be used in this operation
+ */
+ OpAlgoBase(std::shared_ptr physicalDevice,
+ std::shared_ptr device,
+ std::shared_ptr commandBuffer,
+ std::vector>& tensors);
+
+ /**
+ * Default destructor, which is in charge of destroying the algorithm
+ * components but does not destroy the underlying tensors
+ */
+ ~OpAlgoBase();
+
+ /**
+ * The init function is responsible for the initialisation of the algorithm
+ * component based on the parameters specified, and allows for extensibility
+ * on the options provided. Further dependent classes can perform more
+ * specific checks such as ensuring tensors provided are initialised, etc.
+ */
+ virtual void init() override;
+
+ /**
+ * This records the commands that are to be sent to the GPU. This includes
+ * the barriers that ensure the memory has been copied before going in and
+ * out of the shader, as well as the dispatch operation that sends the
+ * shader processing to the gpu. This function also records the GPU memory
+ * copy of the output data for the staging bufffer so it can be read by the
+ * host.
+ */
+ virtual void record() override;
+
+ /**
+ * Executes after the recorded commands are submitted, and performs a copy
+ * of the GPU Device memory into the staging buffer so the output data can
+ * be retrieved.
+ */
+ virtual void postSubmit() override;
+
+ protected:
+ // -------------- NEVER OWNED RESOURCES
+
+ // -------------- OPTIONALLY OWNED RESOURCES
+ std::shared_ptr mAlgorithm;
+ bool mFreeAlgorithm = false;
+
+ // -------------- ALWAYS OWNED RESOURCES
+ uint32_t mX;
+ uint32_t mY;
+ uint32_t mZ;
+
+ std::string mOptSpirvBinPath; ///< Optional member variable which can be provided for the OpAlgoBase to find the data automatically and load for processing
+
+ virtual std::vector fetchSpirvBinaryData();
+};
+
+} // End namespace kp
+
+// Including implemenation for template class
+#ifndef OPALGOBASE_IMPL
+#define OPALGOBASE_IMPL
+
+namespace kp {
+
+template
+OpAlgoBase::OpAlgoBase()
+{
+ SPDLOG_DEBUG("Kompute OpAlgoBase constructor base");
+}
+
+template
+OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice,
+ std::shared_ptr device,
+ std::shared_ptr commandBuffer,
+ std::vector>& tensors)
+ : OpBase(physicalDevice, device, commandBuffer, tensors, false)
+{
+ SPDLOG_DEBUG("Kompute OpAlgoBase constructor with params");
+
+ // The dispatch size is set up based on either explicitly provided template
+ // parameters or by default it would take the shape and size of the tensors
+ if (tX > 0) {
+ // If at least the x value is provided we use mainly the parameters
+ // provided
+ this->mX = tX;
+ this->mY = tY > 0 ? tY : 1;
+ this->mZ = tZ > 0 ? tZ : 1;
+ } else {
+ // TODO: If tensor empty vector exception would be thrown
+ // TODO: Fully support the full size dispatch using size for the shape
+ this->mX = tensors[0]->size();
+ this->mY = 1;
+ this->mZ = 1;
+ }
+ spdlog::info("Kompute OpAlgoBase dispatch size X: {}, Y: {}, Z: {}",
+ this->mX,
+ this->mY,
+ this->mZ);
+
+ this->mAlgorithm = std::make_shared(device, commandBuffer);
+}
+
+template
+OpAlgoBase::~OpAlgoBase()
+{
+ SPDLOG_DEBUG("Kompute OpAlgoBase destructor started");
+}
+
+template
+std::vector OpAlgoBase::fetchSpirvBinaryData()
+{
+ SPDLOG_WARN(
+ "Kompute OpAlgoBase Running shaders directly from spirv file");
+
+ std::ifstream fileStream(this->mOptSpirvBinPath,
+ std::ios::binary | std::ios::in | std::ios::ate);
+
+ size_t shaderFileSize = fileStream.tellg();
+ fileStream.seekg(0, std::ios::beg);
+ char* shaderDataRaw = new char[shaderFileSize];
+ fileStream.read(shaderDataRaw, shaderFileSize);
+ fileStream.close();
+
+ return std::vector(shaderDataRaw,
+ shaderDataRaw + shaderFileSize);
+}
+
+template
+void
+OpAlgoBase::init()
+{
+ SPDLOG_DEBUG("Kompute OpAlgoBase init called");
+
+ std::vector shaderFileData = this->fetchSpirvBinaryData();
+
+ this->mAlgorithm->init(shaderFileData, this->mTensors);
+}
+
+template
+void
+OpAlgoBase::record()
+{
+ SPDLOG_DEBUG("Kompute OpAlgoBase record called");
+
+ this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ);
+}
+
+template
+void
+OpAlgoBase::postSubmit()
+{
+ SPDLOG_DEBUG("Kompute OpAlgoBase postSubmit called");
+}
+
+}
+
+#endif // #ifndef OPALGOBASE_IMPL
+
diff --git a/src/include/kompute/operations/OpCreateTensor.hpp b/src/include/kompute/operations/OpCreateTensor.hpp
index a9f8f1f08..e7f7320af 100644
--- a/src/include/kompute/operations/OpCreateTensor.hpp
+++ b/src/include/kompute/operations/OpCreateTensor.hpp
@@ -31,8 +31,7 @@ class OpCreateTensor : public OpBase
OpCreateTensor(std::shared_ptr physicalDevice,
std::shared_ptr device,
std::shared_ptr commandBuffer,
- std::vector>& tensors,
- bool freeTensors = true);
+ std::vector>& tensors);
/**
* Default destructor which in this case expects the parent class to free
diff --git a/src/include/kompute/operations/OpMult.hpp b/src/include/kompute/operations/OpMult.hpp
index aec4efa50..fdc7a3282 100644
--- a/src/include/kompute/operations/OpMult.hpp
+++ b/src/include/kompute/operations/OpMult.hpp
@@ -4,12 +4,14 @@
#include "kompute/Core.hpp"
+#if RELEASE
#include "kompute/shaders/shaderopmult.hpp"
+#endif
#include "kompute/Algorithm.hpp"
#include "kompute/Tensor.hpp"
-#include "kompute/operations/OpBase.hpp"
+#include "kompute/operations/OpAlgoBase.hpp"
namespace kp {
@@ -20,7 +22,7 @@ namespace kp {
* input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)"
*/
template
-class OpMult : public OpBase
+class OpMult : public OpAlgoBase
{
public:
/**
@@ -42,8 +44,7 @@ class OpMult : public OpBase
OpMult(std::shared_ptr physicalDevice,
std::shared_ptr device,
std::shared_ptr commandBuffer,
- std::vector>& tensors,
- bool freeTensors = false);
+ std::vector>& tensors);
/**
* Default destructor, which is in charge of destroying the algorithm
@@ -82,16 +83,8 @@ class OpMult : public OpBase
std::shared_ptr mTensorRHS;
std::shared_ptr mTensorOutput;
- // -------------- OPTIONALLY OWNED RESOURCES
- std::shared_ptr mAlgorithm;
- bool mFreeAlgorithm = false;
-
// -------------- ALWAYS OWNED RESOURCES
std::shared_ptr mTensorOutputStaging;
-
- uint32_t mX;
- uint32_t mY;
- uint32_t mZ;
};
} // End namespace kp
@@ -112,13 +105,10 @@ template
OpMult::OpMult(std::shared_ptr physicalDevice,
std::shared_ptr device,
std::shared_ptr commandBuffer,
- std::vector>& tensors,
- bool freeTensors)
- : OpBase(physicalDevice, device, commandBuffer, tensors, freeTensors)
+ std::vector>& tensors)
+ : OpAlgoBase(physicalDevice, device, commandBuffer, tensors)
{
SPDLOG_DEBUG("Kompute OpMult constructor with params");
-
- this->mAlgorithm = std::make_shared(device, commandBuffer);
}
template
@@ -144,24 +134,6 @@ OpMult::init()
this->mTensorRHS = this->mTensors[1];
this->mTensorOutput = this->mTensors[2];
- // The dispatch size is set up based on either explicitly provided template
- // parameters or by default it would take the shape and size of the tensors
- if (tX > 0) {
- // If at least the x value is provided we use mainly the parameters
- // provided
- this->mX = tX;
- this->mY = tY > 0 ? tY : 1;
- this->mZ = tZ > 0 ? tZ : 1;
- } else {
- // TODO: Fully support the full size dispatch using size for the shape
- this->mX = this->mTensorLHS->size();
- this->mY = 1;
- this->mZ = 1;
- }
- spdlog::info("Kompute OpMult dispatch size X: {}, Y: {}, Z: {}",
- this->mX,
- this->mY,
- this->mZ);
// TODO: Explore adding a validate function
if (!(this->mTensorLHS->isInit() && this->mTensorRHS->isInit() &&
@@ -196,22 +168,8 @@ OpMult::init()
shader_data::shaders_glsl_opmult_comp_spv +
kp::shader_data::shaders_glsl_opmult_comp_spv_len);
#else
- SPDLOG_DEBUG(
- "Kompute OpMult Running debug loading shaders directly from spirv file");
-
- // TODO: Move to utility function
- std::string shaderFilePath = "shaders/glsl/opmult.comp.spv";
- std::ifstream fileStream(shaderFilePath,
- std::ios::binary | std::ios::in | std::ios::ate);
-
- size_t shaderFileSize = fileStream.tellg();
- fileStream.seekg(0, std::ios::beg);
- char* shaderDataRaw = new char[shaderFileSize];
- fileStream.read(shaderDataRaw, shaderFileSize);
- fileStream.close();
-
- std::vector shaderFileData(shaderDataRaw,
- shaderDataRaw + shaderFileSize);
+ this->mOptSpirvBinPath = "shaders/glsl/opmult.comp.spv";
+ std::vector& shaderFileData = this->fetchSpirvBinaryData();
#endif
SPDLOG_DEBUG("Kompute OpMult Initialising algorithm component");