Added baseline functionality including new memory models

This commit is contained in:
Alejandro Saucedo 2021-02-24 22:26:02 +00:00
parent 9aae5d69db
commit 635fdb02be
22 changed files with 283 additions and 1919 deletions

View file

@ -69,147 +69,6 @@ class Manager
*/
std::shared_ptr<Sequence> sequence(uint32_t queueIndex = 0);
/**
* Function that evaluates operation against named sequence.
*
* @param tensors The tensors to be used in the operation recorded
* @param sequenceName The name of the sequence to be retrieved or created
* @param TArgs Template parameters that will be used to initialise
* Operation to allow for extensible configurations on initialisation
*/
template<typename T, typename... TArgs>
void evalOp(std::vector<std::shared_ptr<Tensor>> tensors,
std::string sequenceName,
TArgs&&... params)
{
KP_LOG_DEBUG("Kompute Manager evalOp triggered");
std::shared_ptr<kp::Sequence> sq =
this->sequence(sequenceName);
KP_LOG_DEBUG("Kompute Manager evalOp running sequence BEGIN");
sq->begin();
KP_LOG_DEBUG("Kompute Manager evalOp running sequence RECORD");
sq->record<T>(tensors, std::forward<TArgs>(params)...);
KP_LOG_DEBUG("Kompute Manager evalOp running sequence END");
sq->end();
KP_LOG_DEBUG("Kompute Manager evalOp running sequence EVAL");
sq->eval();
KP_LOG_DEBUG("Kompute Manager evalOp running sequence SUCCESS");
}
/**
* Function that evaluates operation against a newly created sequence.
*
* @param tensors The tensors to be used in the operation recorded
* @param TArgs Template parameters that will be used to initialise
* Operation to allow for extensible configurations on initialisation
*/
template<typename T, typename... TArgs>
void evalOpDefault(std::vector<std::shared_ptr<Tensor>> tensors,
TArgs&&... params)
{
KP_LOG_DEBUG("Kompute Manager evalOp Default triggered");
this->mCurrentSequenceIndex++;
this->evalOp<T>(
tensors, KP_DEFAULT_SESSION, std::forward<TArgs>(params)...);
}
/**
* Function that evaluates operation against named sequence asynchronously.
*
* @param tensors The tensors to be used in the operation recorded
* @param sequenceName The name of the sequence to be retrieved or created
* @param params Template parameters that will be used to initialise
* Operation to allow for extensible configurations on initialisation
*/
template<typename T, typename... TArgs>
void evalOpAsync(std::vector<std::shared_ptr<Tensor>> tensors,
std::string sequenceName,
TArgs&&... params)
{
KP_LOG_DEBUG("Kompute Manager evalOpAsync triggered");
std::shared_ptr<kp::Sequence> sq =
this->sequence(sequenceName);
KP_LOG_DEBUG("Kompute Manager evalOpAsync running sequence BEGIN");
sq->begin();
KP_LOG_DEBUG("Kompute Manager evalOpAsync running sequence RECORD");
sq->record<T>(tensors, std::forward<TArgs>(params)...);
KP_LOG_DEBUG("Kompute Manager evalOpAsync running sequence END");
sq->end();
KP_LOG_DEBUG("Kompute Manager evalOpAsync running sequence EVAL");
sq->evalAsync();
KP_LOG_DEBUG("Kompute Manager evalOpAsync running sequence SUCCESS");
}
/**
* Operation that evaluates operation against default sequence
* asynchronously.
*
* @param tensors The tensors to be used in the operation recorded
* @param params Template parameters that will be used to initialise
* Operation to allow for extensible configurations on initialisation
*/
template<typename T, typename... TArgs>
void evalOpAsyncDefault(std::vector<std::shared_ptr<Tensor>> tensors,
TArgs&&... params)
{
KP_LOG_DEBUG("Kompute Manager evalOpAsyncDefault triggered");
this->mCurrentSequenceIndex++;
this->evalOpAsync<T>(
tensors, KP_DEFAULT_SESSION, std::forward<TArgs>(params)...);
}
/**
* Operation that awaits for named sequence to finish.
*
* @param sequenceName The name of the sequence to wait for termination
* @param waitFor The amount of time to wait before timing out
*/
void evalOpAwait(std::string sequenceName, uint64_t waitFor = UINT64_MAX)
{
KP_LOG_DEBUG("Kompute Manager evalOpAwait triggered with sequence {}",
sequenceName);
std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator
found = this->mManagedSequences.find(sequenceName);
if (found != this->mManagedSequences.end()) {
if (std::shared_ptr<kp::Sequence> sq = found->second) {
KP_LOG_DEBUG("Kompute Manager evalOpAwait running sequence "
"Sequence EVAL AWAIT");
if (sq->isRunning()) {
sq->evalAwait(waitFor);
}
}
KP_LOG_DEBUG(
"Kompute Manager evalOpAwait running sequence SUCCESS");
} else {
KP_LOG_ERROR("Kompute Manager evalOpAwait Sequence not found");
}
}
/**
* Operation that awaits for default sequence to finish.
*
* @param tensors The tensors to be used in the operation recorded
* @param params Template parameters that will be used to initialise
* Operation to allow for extensible configurations on initialisation
*/
void evalOpAwaitDefault(uint64_t waitFor = UINT64_MAX)
{
KP_LOG_DEBUG("Kompute Manager evalOpAwaitDefault triggered");
this->evalOpAwait(KP_DEFAULT_SESSION, waitFor);
}
/**
* Function that simplifies the common workflow of tensor creation and
* initialization. It will take the constructor parameters for a Tensor
@ -233,80 +92,6 @@ class Manager
const Constants& specializationConstants = {},
const Constants& pushConstants = {});
/**
* Function that simplifies the common workflow of tensor initialisation. It
* will take the constructor parameters for a Tensor and will will us it to
* create a new Tensor. The tensor memory will then be managed and owned by
* the manager.
*
* @param tensors Array of tensors to rebuild
* @param syncDataToGPU Whether to sync the data to GPU memory
*/
void rebuild(std::vector<std::shared_ptr<kp::Tensor>> tensors,
bool syncDataToGPU = true);
/**
* Function that simplifies the common workflow of tensor initialisation. It
* will take the constructor parameters for a Tensor and will will us it to
* create a new Tensor. The tensor memory will then be managed and owned by
* the manager.
*
* @param tensors Single tensor to rebuild
* @param syncDataToGPU Whether to sync the data to GPU memory
*/
void rebuild(std::shared_ptr<kp::Tensor> tensor,
bool syncDataToGPU = true);
/**
* Destroy owned Vulkan GPU resources and free GPU memory for
* single tensor.
*
* @param tensors Single tensor to rebuild
*/
void destroy(std::shared_ptr<kp::Tensor> tensor);
/**
* Destroy owned Vulkan GPU resources and free GPU memory for
* vector of tensors.
*
* @param tensors Single tensor to rebuild
*/
void destroy(std::vector<std::shared_ptr<kp::Tensor>> tensors);
/**
* Destroy owned Vulkan GPU resources and free GPU memory for
* vector of sequences. Destroying by sequence name is more efficent
* and hence recommended instead of by object.
*
* @param sequences Vector for shared ptrs with sequences to destroy
*/
void destroy(std::vector<std::shared_ptr<kp::Sequence>> sequences);
/**
* Destroy owned Vulkan GPU resources and free GPU memory for
* single sequence. Destroying by sequence name is more efficent
* and hence recommended instead of by object.
*
* @param sequences Single sequence to rebuild
*/
void destroy(std::shared_ptr<kp::Sequence> sequence);
/**
* Destroy owned Vulkan GPU resources and free GPU memory for
* sequence by name.
*
* @param sequenceName Single name of named sequence to destroy
*/
void destroy(const std::string& sequenceName);
/**
* Destroy owned Vulkan GPU resources and free GPU memory for
* sequences using vector of named sequence names.
*
* @param sequenceName Vector of sequence names to destroy
*/
void destroy(const std::vector<std::string>& sequenceNames);
private:
// -------------- OPTIONALLY OWNED RESOURCES
std::shared_ptr<vk::Instance> mInstance = nullptr;
@ -317,10 +102,9 @@ class Manager
bool mFreeDevice = false;
// -------------- ALWAYS OWNED RESOURCES
std::set<std::weak_ptr<Tensor>> mManagedTensors;
std::set<std::weak_ptr<Sequence>> mManagedSequences;
std::set<std::weak_ptr<Algorithm>> mManagedAlgorithms;
//std::unique_ptr<Sequence> mDefaultSequence;
std::vector<std::weak_ptr<Tensor>> mManagedTensors;
std::vector<std::weak_ptr<Sequence>> mManagedSequences;
std::vector<std::weak_ptr<Algorithm>> mManagedAlgorithms;
std::vector<uint32_t> mComputeQueueFamilyIndices;
std::vector<std::shared_ptr<vk::Queue>> mComputeQueues;

View file

@ -9,7 +9,7 @@ namespace kp {
/**
* Container of operations that can be sent to GPU as batch
*/
class Sequence
class Sequence: public std::enable_shared_from_this<Sequence>
{
public:
/**
@ -31,13 +31,30 @@ class Sequence
*/
~Sequence();
/**
* Record function for operation to be added to the GPU queue in batch. This
* template requires classes to be derived from the OpBase class. This
* function also requires the Sequence to be recording, otherwise it will
* not be able to add the operation.
*
* @param tensors Vector of tensors to use for the operation
* @param TArgs Template parameters that are used to initialise operation
* which allows for extensible configurations on initialisation.
*/
std::shared_ptr<Sequence> record(std::shared_ptr<OpBase> op);
/**
* Clear function clears all operations currently recorded and starts recording again.
*/
void clear();
/**
* Begins recording commands for commands to be submitted into the command
* buffer.
*
* @return Boolean stating whether execution was successful.
*/
bool begin();
void begin();
/**
* Ends the recording and stops recording commands when the record command
@ -45,7 +62,7 @@ class Sequence
*
* @return Boolean stating whether execution was successful.
*/
bool end();
void end();
/**
* Eval sends all the recorded and stored operations in the vector of
@ -53,7 +70,7 @@ class Sequence
*
* @return Boolean stating whether execution was successful.
*/
bool eval();
std::shared_ptr<Sequence> eval();
/**
* Eval Async sends all the recorded and stored operations in the vector of
@ -62,7 +79,7 @@ class Sequence
*
* @return Boolean stating whether execution was successful.
*/
bool evalAsync();
std::shared_ptr<Sequence> evalAsync();
/**
* Eval Await waits for the fence to finish processing and then once it
@ -71,7 +88,7 @@ class Sequence
* @param waitFor Number of milliseconds to wait before timing out.
* @return Boolean stating whether execution was successful.
*/
bool evalAwait(uint64_t waitFor = UINT64_MAX);
std::shared_ptr<Sequence> evalAwait(uint64_t waitFor = UINT64_MAX);
/**
* Returns true if the sequence is currently in recording activated.
@ -94,55 +111,6 @@ class Sequence
*/
void freeMemoryDestroyGPUResources();
/**
* Record function for operation to be added to the GPU queue in batch. This
* template requires classes to be derived from the OpBase class. This
* function also requires the Sequence to be recording, otherwise it will
* not be able to add the operation.
*
* @param tensors Vector of tensors to use for the operation
* @param TArgs Template parameters that are used to initialise operation
* which allows for extensible configurations on initialisation.
*/
template<typename T, typename... TArgs>
bool record(std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
{
static_assert(std::is_base_of<OpBase, T>::value,
"Kompute Sequence record(...) template only valid with "
"OpBase derived classes");
KP_LOG_DEBUG("Kompute Sequence record function started");
if (!this->isRecording()) {
KP_LOG_ERROR(
"Kompute sequence record attempted when not record BEGIN");
return false;
}
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
T* op = new T(this->mPhysicalDevice,
this->mDevice,
this->mCommandBuffer,
tensors,
std::forward<TArgs>(params)...);
OpBase* baseOp = dynamic_cast<OpBase*>(op);
std::unique_ptr<OpBase> baseOpPtr{ baseOp };
KP_LOG_DEBUG(
"Kompute Sequence running init on OpBase derived class instance");
baseOpPtr->init();
KP_LOG_DEBUG(
"Kompute Sequence running record on OpBase derived class instance");
baseOpPtr->record();
mOperations.push_back(std::move(baseOpPtr));
return true;
}
private:
// -------------- NEVER OWNED RESOURCES
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice = nullptr;
@ -158,7 +126,7 @@ class Sequence
// -------------- ALWAYS OWNED RESOURCES
vk::Fence mFence;
std::vector<std::unique_ptr<OpBase>> mOperations;
std::vector<std::shared_ptr<OpBase>> mOperations;
// State
bool mRecording = false;

View file

@ -1,77 +0,0 @@
#pragma once
#include <fstream>
#include "kompute/Core.hpp"
#include "kompute/shaders/shaderopmult.hpp"
#include "kompute/Algorithm.hpp"
#include "kompute/Tensor.hpp"
#include "kompute/operations/OpBase.hpp"
namespace kp {
/**
* Operation that provides a general abstraction that simplifies the use of
* algorithm and parameter components which can be used with shaders.
* By default it enables the user to provide a dynamic number of tensors
* which are then passed as inputs.
*/
class OpAlgoCreate : public OpBase
{
public:
/**
* Default constructor with parameters that provides the bare minimum
* requirements for the operations to be able to create and manage their
* sub-components.
*
* @param physicalDevice Vulkan physical device used to find device queues
* @param device Vulkan logical device for passing to Algorithm
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that are to be used in this operation
* @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format)
* @param komputeWorkgroup Optional parameter to specify the layout for processing
*/
OpAlgoCreate(std::vector<std::shared_ptr<Tensor>> tensors,
std::shared_ptr<kp::Algorithm> algorithm);
/**
* Default destructor, which is in charge of destroying the algorithm
* components but does not destroy the underlying tensors
*/
virtual ~OpAlgoCreate() override;
virtual void init(
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device) override;
/**
* This records the commands that are to be sent to the GPU. This includes
* the barriers that ensure the memory has been copied before going in and
* out of the shader, as well as the dispatch operation that sends the
* shader processing to the gpu. This function also records the GPU memory
* copy of the output data for the staging buffer so it can be read by the
* host.
*/
virtual void record(std::shared_ptr<vk::CommandBuffer> commandBuffer) override;
/**
* Does not perform any preEval commands.
*/
virtual void preEval() override;
/**
* Executes after the recorded commands are submitted, and performs a copy
* of the GPU Device memory into the staging buffer so the output data can
* be retrieved.
*/
virtual void postEval() override;
};
} // End namespace kp

View file

@ -17,51 +17,8 @@ class OpAlgoDispatch : public OpBase
{
public:
/**
* Default constructor with parameters that provides the bare minimum
* requirements for the operations to be able to create and manage their
* sub-components.
*
* @param physicalDevice Vulkan physical device used to find device queues
* @param device Vulkan logical device for passing to Algorithm
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that are to be used in this operation
* @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format)
* @param komputeWorkgroup Optional parameter to specify the layout for processing
*/
OpAlgoDispatch(std::vector<std::shared_ptr<Tensor>> tensors,
std::shared_ptr<kp::Algorithm> algorithm);
/**
* Constructor that enables a file to be passed to the operation with
* the contents of the shader. This can be either in raw format or in
* compiled SPIR-V binary format.
*
* @param physicalDevice Vulkan physical device used to find device queues
* @param device Vulkan logical device for passing to Algorithm
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that are to be used in this operation
* @param shaderFilePath Parameter to specify the shader to load (either in spirv or raw format)
* @param komputeWorkgroup Optional parameter to specify the layout for processing
*/
OpAlgoDispatch(std::vector<std::shared_ptr<Tensor>>& tensors,
std::shared_ptr<kp::Algorithm>& algorithm,
std::string shaderFilePath);
/**
* Constructor that enables raw shader data to be passed to the main operation
* which can be either in raw shader glsl code or in compiled SPIR-V binary.
*
* @param physicalDevice Vulkan physical device used to find device queues
* @param device Vulkan logical device for passing to Algorithm
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that are to be used in this operation
* @param shaderDataRaw Optional parameter to specify the shader data either in binary or raw form
* @param komputeWorkgroup Optional parameter to specify the layout for processing
*/
OpAlgoDispatch(std::vector<std::shared_ptr<Tensor>>& tensors,
std::shared_ptr<kp::Algorithm>& algorithm,
const std::vector<uint32_t>& shaderDataRaw);
OpAlgoDispatch(const std::vector<std::shared_ptr<Tensor>>& tensors,
const std::shared_ptr<kp::Algorithm>& algorithm);
/**
* Default destructor, which is in charge of destroying the algorithm
@ -69,15 +26,6 @@ class OpAlgoDispatch : public OpBase
*/
virtual ~OpAlgoDispatch() override;
/**
* The init function is responsible for the initialisation of the algorithm
* component based on the parameters specified, and allows for extensibility
* on the options provided. Further dependent classes can perform more
* specific checks such as ensuring tensors provided are initialised, etc.
*/
virtual void init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device) override;
/**
* This records the commands that are to be sent to the GPU. This includes
* the barriers that ensure the memory has been copied before going in and
@ -88,7 +36,6 @@ class OpAlgoDispatch : public OpBase
*/
virtual void record(std::shared_ptr<vk::CommandBuffer> commandBuffer) override;
/**
* Does not perform any preEval commands.
*/
@ -101,6 +48,10 @@ class OpAlgoDispatch : public OpBase
*/
virtual void postEval() override;
private:
// -------------- ALWAYS OWNED RESOURCES
std::vector<std::shared_ptr<Tensor>> mTensors;
std::shared_ptr<Algorithm> mAlgorithm;
};
} // End namespace kp

View file

@ -1,78 +0,0 @@
#pragma once
#include <fstream>
#include "kompute/Core.hpp"
#include "kompute/Algorithm.hpp"
#include "kompute/Tensor.hpp"
#include "kompute/operations/OpAlgoCreate.hpp"
namespace kp {
/**
* Operation base class to simplify the creation of operations that require
* right hand and left hand side datapoints together with a single output.
* The expected data passed is two input tensors and one output tensor.
*/
class OpAlgoLhsRhsOut : public OpAlgoCreate
{
public:
/**
* Default constructor with parameters that provides the bare minimum
* requirements for the operations to be able to create and manage their
* sub-components.
*
* @param physicalDevice Vulkan physical device used to find device queues
* @param device Vulkan logical device for passing to Algorithm
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that are to be used in this operation
* @param freeTensors Whether operation manages the memory of the Tensors
* @param komputeWorkgroup Optional parameter to specify the layout for processing
*/
OpAlgoLhsRhsOut(std::vector<std::shared_ptr<Tensor>>& tensors,
std::shared_ptr<Algorithm> algorithm);
/**
* Default destructor, which is in charge of destroying the algorithm
* components but does not destroy the underlying tensors
*/
virtual ~OpAlgoLhsRhsOut() override;
/**
* The init function is responsible for ensuring that all of the tensors
* provided are aligned with requirements such as LHS, RHS and Output
* tensors, and creates the algorithm component which processes the
* computation.
*/
virtual void init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device) override;
/**
* This records the commands that are to be sent to the GPU. This includes
* the barriers that ensure the memory has been copied before going in and
* out of the shader, as well as the dispatch operation that sends the
* shader processing to the gpu. This function also records the GPU memory
* copy of the output data for the staging buffer so it can be read by the
* host.
*/
virtual void record(std::shared_ptr<vk::CommandBuffer> commandBuffer) override;
/**
* Executes after the recorded commands are submitted, and performs a copy
* of the GPU Device memory into the staging buffer so the output data can
* be retrieved.
*/
virtual void postEval() override;
protected:
// -------------- NEVER OWNED RESOURCES
std::shared_ptr<Tensor> mTensorLHS; ///< Reference to the parameter used in the left hand side equation of the shader
std::shared_ptr<Tensor> mTensorRHS; ///< Reference to the parameter used in the right hand side equation of the shader
std::shared_ptr<Tensor> mTensorOutput; ///< Reference to the parameter used in the output of the shader and will be copied with a staging vector
};
} // End namespace kp

View file

@ -19,25 +19,6 @@ class OpBase
{
public:
/**
* Default constructor with parameters that provides the bare minimum
* requirements for the operations to be able to create and manage their
* sub-components.
*
* @param physicalDevice Vulkan physical device used to find device queues
* @param device Vulkan logical device for passing to Algorithm
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that are to be used in this operation
*/
OpBase(std::vector<std::shared_ptr<Tensor>>& tensors,
std::shared_ptr<Algorithm> algorithm)
{
KP_LOG_DEBUG("Compute OpBase constructor with params");
this->mTensors = tensors;
this->mAlgorithm = algorithm;
this->mIsInit = false;
}
/**
* Default destructor for OpBase class. This OpBase destructor class should
* always be called to destroy and free owned resources unless it is
@ -46,81 +27,6 @@ class OpBase
virtual ~OpBase()
{
KP_LOG_DEBUG("Kompute OpBase destructor started");
this->destroy();
}
virtual std::shared_ptr<kp::Algorithm> algorithm() {
return this->mAlgorithm;
}
virtual std::vector<std::shared_ptr<kp::Tensor>> tensors() {
return this->mTensors;
}
virtual bool isInit() {
return this->mIsInit;
}
/**
* The init function is responsible for setting up all the resources and
* should be called after the Operation has been created.
*/
// TODO: Potentially remove physicalDevice in favour of memoryProperties (for tensor)
virtual void init(
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device) {
if (this->mTensors.size() < 1) {
throw std::runtime_error("Kompute OpBase init called with 0 tensors");
}
if (this->mManagesTensors) {
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
if (tensor->isInit()) {
// TODO: Evaluate whether throwing runtime error or just writing error log
throw std::runtime_error(
"Kompute OpTensorCreate: Tensor has already been initialized");
}
else {
tensor->init(physicalDevice, device);
}
}
}
if (this->mManagesAlgorithm) {
this->mAlgorithm->init(device, this->mTensors);
}
}
virtual void destroy() {
if (!this->mIsInit) {
KP_LOG_WARN("Kompute OpBase destroy called but not initialised");
}
if (this->mManagesTensors) {
for (const std::shared_ptr<Tensor>& tensor : this->mTensors) {
if (!tensor->isInit()) {
KP_LOG_WARN("Kompute OpBase attempted to free managed tensor "
"but tensor is not initialised");
} else {
KP_LOG_DEBUG("Kompute OpBase freeing tensor");
tensor->freeMemoryDestroyGPUResources();
}
}
this->mTensors.clear();
}
if (this->mManagesAlgorithm) {
if (this->mAlgorithm && this->mAlgorithm->isInit()) {
KP_LOG_DEBUG("Kompute OpBase freeing tensor");
this->mAlgorithm->freeMemoryDestroyGPUResources();
} else {
KP_LOG_WARN("Kompute OpBase attempted to free managed algorithm"
"but algorithm is not initialised");
}
}
this->mIsInit = false;
}
/**
@ -149,16 +55,6 @@ class OpBase
* provided by the user.
*/
virtual void postEval() = 0;
protected:
// -------------- OPTIONALLY OWNED RESOURCES
std::vector<std::shared_ptr<Tensor>> mTensors;
bool mManagesTensors = false;
std::shared_ptr<kp::Algorithm> mAlgorithm;
bool mManagesAlgorithm = false;
// -------------- ALWAYS OWNED RESOURCES
bool mIsInit;
};
} // End namespace kp

View file

@ -4,14 +4,12 @@
#include "kompute/Core.hpp"
#if RELEASE
#include "kompute/shaders/shaderopmult.hpp"
#endif
#include "kompute/Algorithm.hpp"
#include "kompute/Tensor.hpp"
#include "kompute/operations/OpAlgoCreate.hpp"
#include "kompute/operations/OpAlgoDispatch.hpp"
namespace kp {
@ -19,15 +17,9 @@ namespace kp {
* Operation that performs multiplication on two tensors and outpus on third
* tensor.
*/
class OpMult : public OpAlgoCreate
class OpMult : public OpAlgoDispatch
{
public:
/**
* Base constructor, should not be used unless explicitly intended.
*/
OpMult() {
}
/**
* Default constructor with parameters that provides the bare minimum
@ -40,46 +32,30 @@ class OpMult : public OpAlgoCreate
* @param tensors Tensors that are to be used in this operation
* @param komputeWorkgroup Optional parameter to specify the layout for processing
*/
OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>> tensors,
const Workgroup& komputeWorkgroup = {})
: OpAlgoCreate(physicalDevice, device, commandBuffer, tensors, "", komputeWorkgroup)
OpMult(std::vector<std::shared_ptr<Tensor>> tensors, std::shared_ptr<Algorithm> algorithm)
: OpAlgoDispatch(tensors, algorithm)
{
KP_LOG_DEBUG("Kompute OpMult constructor with params");
#ifndef RELEASE
this->mShaderFilePath = "shaders/glsl/opmult.comp.spv";
#endif
}
if (tensors.size() != 3) {
throw std::runtime_error("Kompute OpMult expected 3 tensors but got " + tensors.size());
}
#if RELEASE
/**
* If RELEASE=1 it will be using the static version of the shader which is
* loaded using this file directly. Otherwise it should not override the function.
*/
std::vector<uint32_t> fetchSpirvBinaryData() override
{
KP_LOG_WARN(
"Kompute OpMult Running shaders directly from header");
return std::vector<uint32_t>(
std::vector<uint32_t> spirv(
(uint32_t*)shader_data::shaders_glsl_opmult_comp_spv,
(uint32_t*)(shader_data::shaders_glsl_opmult_comp_spv +
kp::shader_data::shaders_glsl_opmult_comp_spv_len));
algorithm->rebuild(tensors, spirv, Workgroup({tensors[0]->size()}));
}
#endif
/**
* Default destructor, which is in charge of destroying the algorithm
* components but does not destroy the underlying tensors
*/
~OpMult() override {
virtual ~OpMult() override {
KP_LOG_DEBUG("Kompute OpMult destructor started");
}
};
} // End namespace kp

View file

@ -22,19 +22,13 @@ class OpTensorCopy : public OpBase
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that will be used to create in operation.
*/
OpTensorCopy(std::vector<std::shared_ptr<Tensor>> tensors);
OpTensorCopy(const std::vector<std::shared_ptr<Tensor>>& tensors);
/**
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
*/
~OpTensorCopy() override;
/**
* Performs basic checks such as ensuring there are at least two tensors provided, that they are initialised and that they are not of type TensorTypes::eStorage.
*/
void init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device) override;
/**
* Records the copy commands from the first tensor into all the other tensors provided. Also optionally records a barrier.
*/
@ -51,6 +45,8 @@ class OpTensorCopy : public OpBase
virtual void postEval() override;
private:
// -------------- ALWAYS OWNED RESOURCES
std::vector<std::shared_ptr<Tensor>> mTensors;
};
} // End namespace kp

View file

@ -1,71 +0,0 @@
#pragma once
#include "kompute/Core.hpp"
#include "kompute/operations/OpBase.hpp"
#include "kompute/Tensor.hpp"
#include "kompute/Algorithm.hpp"
namespace kp {
/**
* Base Operation which provides the high level interface that Kompute
* operations implement in order to perform a set of actions in the GPU.
*
* Operations can perform actions on tensors, and optionally can also own an
* Algorithm with respective parameters. kp::Operations with kp::Algorithms
* would inherit from kp::OpBaseAlgo.
*/
class OpTensorCreate : public OpBase
{
public:
/**
* Default constructor with parameters that provides the bare minimum
* requirements for the operations to be able to create and manage their
* sub-components.
*
* @param physicalDevice Vulkan physical device used to find device queues
* @param device Vulkan logical device for passing to Algorithm
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that are to be used in this operation
*/
OpTensorCreate(std::vector<std::shared_ptr<Tensor>>& tensors);
/**
* Default destructor for OpTensorCreate class. This OpTensorCreate destructor class should
* always be called to destroy and free owned resources unless it is
* intended to destroy the resources in the parent class.
*/
virtual ~OpTensorCreate() override;
/**
* The init function is responsible for setting up all the resources and
* should be called after the Operation has been created.
*/
virtual void init(
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device) override;
/**
* Record runs the core actions to create the tensors. For device tensors
* it records a copyCommand to move the data from the staging tensor to the
* device tensor. The mapping for staging tensors happens in the init function
* not in the record function.
*/
void record(std::shared_ptr<vk::CommandBuffer> commandBuffer) override;
/**
* Does not perform any preEval commands.
*/
virtual void preEval() override;
/**
* Performs a copy back into the main tensor to ensure that the data
* contained is the one that is now being stored in the GPU.
*/
virtual void postEval() override;
};
} // End namespace kp

View file

@ -21,19 +21,13 @@ class OpTensorSyncDevice : public OpBase
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that will be used to create in operation.
*/
OpTensorSyncDevice(std::vector<std::shared_ptr<Tensor>> tensors);
OpTensorSyncDevice(const std::vector<std::shared_ptr<Tensor>>& tensors);
/**
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
*/
~OpTensorSyncDevice() override;
/**
* Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element.
*/
void init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device) override;
/**
* For device tensors, it records the copy command for the tensor to copy the data from its staging to device memory.
*/
@ -50,6 +44,8 @@ class OpTensorSyncDevice : public OpBase
virtual void postEval() override;
private:
// -------------- ALWAYS OWNED RESOURCES
std::vector<std::shared_ptr<Tensor>> mTensors;
};
} // End namespace kp

View file

@ -22,19 +22,13 @@ class OpTensorSyncLocal : public OpBase
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that will be used to create in operation.
*/
OpTensorSyncLocal(std::vector<std::shared_ptr<Tensor>> tensors);
OpTensorSyncLocal(const std::vector<std::shared_ptr<Tensor>>& tensors);
/**
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
*/
~OpTensorSyncLocal() override;
/**
* Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element.
*/
void init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device) override;
/**
* For device tensors, it records the copy command for the tensor to copy the data from its device to staging memory.
*/
@ -52,6 +46,8 @@ class OpTensorSyncLocal : public OpBase
private:
// -------------- ALWAYS OWNED RESOURCES
std::vector<std::shared_ptr<Tensor>> mTensors;
};
} // End namespace kp