diff --git a/single_include/AggregateHeaders.cpp b/single_include/AggregateHeaders.cpp index 599607c43..da1eaabf0 100644 --- a/single_include/AggregateHeaders.cpp +++ b/single_include/AggregateHeaders.cpp @@ -6,8 +6,6 @@ #include "kompute/Manager.hpp" #include "kompute/Sequence.hpp" #include "kompute/operations/OpBase.hpp" -#include "kompute/operations/OpAlgoCreate.hpp" -#include "kompute/operations/OpAlgoLhsRhsOut.hpp" #include "kompute/operations/OpMult.hpp" #include "kompute/operations/OpTensorCopy.hpp" #include "kompute/operations/OpTensorSyncDevice.hpp" diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index b366dbf09..629ff6a4b 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -871,8 +871,6 @@ static const unsigned int shaders_glsl_logisticregression_comp_spv_len = 4816; #include #include -#define KP_MAX_DIM_SIZE 1 - namespace kp { /** @@ -899,11 +897,6 @@ class Tensor eStorage = 2, ///< Type is Device memory (only) }; - /** - * Base constructor, should not be used unless explicitly intended. - */ - Tensor(); - /** * Default constructor with data provided which would be used to create the * respective vulkan buffer and memory. @@ -912,8 +905,10 @@ class Tensor * tensor * @param tensorType Type for the tensor which is of type TensorTypes */ - Tensor(const std::vector& data, - TensorTypes tensorType = TensorTypes::eDevice); + Tensor(std::shared_ptr physicalDevice, + std::shared_ptr device, + const std::vector& data, + const TensorTypes& tensorType = TensorTypes::eDevice); /** * Destructor which is in charge of freeing vulkan resources unless they @@ -927,8 +922,8 @@ class Tensor * would only be created for the tensors of type TensorType::eDevice as * otherwise there is no need to copy from host memory. */ - void init(std::shared_ptr physicalDevice, - std::shared_ptr device); + void rebuild(const std::vector& data, + TensorTypes tensorType = TensorTypes::eDevice); /** * Destroys and frees the GPU resources which include the buffer and memory. @@ -960,26 +955,13 @@ class Tensor * @return Unsigned integer representing the total number of elements */ uint32_t size(); - /** - * Returns the shape of the tensor, which includes the number of dimensions - * and the size per dimension. - * - * @return Array containing the sizes for each dimension. Zero means - * respective dimension is not active. - */ - std::array shape(); + /** * Retrieve the tensor type of the Tensor * * @return Tensor type of tensor */ TensorTypes tensorType(); - /** - * Returns true if the tensor initialisation function has been carried out - * successful, which would mean that the buffer and memory will have been - * provisioned. - */ - bool isInit(); /** * Sets / resets the vector data of the tensor. This function does not @@ -1083,9 +1065,6 @@ class Tensor TensorTypes mTensorType = TensorTypes::eDevice; - std::array mShape; - bool mIsInit = false; - void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer void createBuffer(std::shared_ptr buffer, vk::BufferUsageFlags bufferUsageFlags); @@ -1111,6 +1090,107 @@ class Tensor namespace kp { +/** + Abstraction for compute shaders that are run on top of tensors grouped via + ParameterGroups (which group descriptorsets) +*/ +class Algorithm +{ +public: + + /** + * Default constructor for Algorithm + * + * @param device The Vulkan device to use for creating resources + * @param commandBuffer The vulkan command buffer to bind the pipeline and + * shaders + */ + Algorithm( + std::shared_ptr device, + const std::vector>& tensors = {}, + const std::vector& spirv = {}, + const Workgroup& workgroup = {}, + const Constants& specializationConstants = {}, + const Constants& pushConstants = {}); + + /** + * Initialiser for the shader data provided to the algorithm as well as + * tensor parameters that will be used in shader. + * + * @param shaderFileData The bytes in spir-v format of the shader + * @tensorParams The Tensors to be used in the Algorithm / shader for + * @specalizationInstalces The specialization parameters to pass to the function + * processing + */ + void rebuild( + const std::vector>& tensors = {}, + const std::vector& spirv = {}, + const Workgroup& workgroup = {}, + const Constants& specializationConstants = {}, + const Constants& pushConstants = {}); + + bool isInit(); + + void freeMemoryDestroyGPUResources(); + + /** + * Destructor for Algorithm which is responsible for freeing and desroying + * respective pipelines and owned parameter groups. + */ + ~Algorithm(); + + /** + * Records the dispatch function with the provided template parameters or + * alternatively using the size of the tensor by default. + * + * @param x Layout X dispatch value + * @param y Layout Y dispatch value + * @param z Layout Z dispatch value + */ + void recordDispatch(std::shared_ptr commandBuffer); + + void setWorkgroup(const Workgroup& workgroup, uint32_t minSize = 1); + +private: + // -------------- NEVER OWNED RESOURCES + std::shared_ptr mDevice; + + // -------------- OPTIONALLY OWNED RESOURCES + std::shared_ptr mDescriptorSetLayout; + bool mFreeDescriptorSetLayout = false; + std::shared_ptr mDescriptorPool; + bool mFreeDescriptorPool = false; + std::shared_ptr mDescriptorSet; + bool mFreeDescriptorSet = false; + std::shared_ptr mShaderModule; + bool mFreeShaderModule = false; + std::shared_ptr mPipelineLayout; + bool mFreePipelineLayout = false; + std::shared_ptr mPipelineCache; + bool mFreePipelineCache = false; + std::shared_ptr mPipeline; + bool mFreePipeline = false; + + // -------------- ALWAYS OWNED RESOURCES + std::vector mSpirv; + Constants mSpecializationConstants; + Constants mPushConstants; + Workgroup mWorkgroup; + + bool mIsInit; + + // Create util functions + void createShaderModule(); + void createPipeline(); + + // Parameters + void createParameters(const std::vector>& tensorParams); +}; + +} // End namespace kp + +namespace kp { + /** * Base Operation which provides the high level interface that Kompute * operations implement in order to perform a set of actions in the GPU. @@ -1122,33 +1202,6 @@ namespace kp { class OpBase { public: - /** - * Base constructor, should not be used unless explicitly intended. - */ - OpBase() { KP_LOG_DEBUG("Compute OpBase base constructor"); } - - /** - * Default constructor with parameters that provides the bare minimum - * requirements for the operations to be able to create and manage their - * sub-components. - * - * @param physicalDevice Vulkan physical device used to find device queues - * @param device Vulkan logical device for passing to Algorithm - * @param commandBuffer Vulkan Command Buffer to record commands into - * @param tensors Tensors that are to be used in this operation - */ - OpBase(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector>& tensors) - { - KP_LOG_DEBUG("Compute OpBase constructor with params"); - - this->mPhysicalDevice = physicalDevice; - this->mDevice = device; - this->mCommandBuffer = commandBuffer; - this->mTensors = tensors; - } /** * Default destructor for OpBase class. This OpBase destructor class should @@ -1158,37 +1211,14 @@ class OpBase virtual ~OpBase() { KP_LOG_DEBUG("Kompute OpBase destructor started"); - - if (!this->mDevice) { - KP_LOG_WARN("Kompute OpBase destructor called with empty device"); - return; - } - - if (this->mFreeTensors) { - KP_LOG_DEBUG("Kompute OpBase freeing tensors"); - for (std::shared_ptr tensor : this->mTensors) { - if (tensor && tensor->isInit()) { - tensor->freeMemoryDestroyGPUResources(); - } else { - KP_LOG_WARN("Kompute OpBase expected to free " - "tensor but has already been freed."); - } - } - } } - /** - * The init function is responsible for setting up all the resources and - * should be called after the Operation has been created. - */ - virtual void init() = 0; - /** * The record function is intended to only send a record command or run * commands that are expected to record operations that are to be submitted * as a batch into the GPU. */ - virtual void record() = 0; + virtual void record(std::shared_ptr commandBuffer) = 0; /** * Pre eval is called before the Sequence has called eval and submitted the commands to @@ -1209,21 +1239,6 @@ class OpBase * provided by the user. */ virtual void postEval() = 0; - - protected: - // -------------- NEVER OWNED RESOURCES - std::shared_ptr - mPhysicalDevice; ///< Vulkan Physical Device - std::shared_ptr mDevice; ///< Vulkan Logical Device - std::shared_ptr - mCommandBuffer; ///< Vulkan Command Buffer - - // -------------- OPTIONALLY OWNED RESOURCES - std::vector> - mTensors; ///< Tensors referenced by operation that can be managed - ///< optionally by operation - bool mFreeTensors = false; ///< Explicit boolean that specifies whether the - ///< tensors are freed (if they are managed) }; } // End namespace kp @@ -1233,14 +1248,9 @@ namespace kp { /** * Container of operations that can be sent to GPU as batch */ -class Sequence +class Sequence: public std::enable_shared_from_this { public: - /** - * Base constructor for Sequence. Should not be used unless explicit - * intended. - */ - Sequence(); /** * Main constructor for sequence which requires core vulkan components to * generate all dependent resources. @@ -1261,10 +1271,21 @@ class Sequence ~Sequence(); /** - * Initialises sequence including the creation of the command pool and the - * command buffer. + * Record function for operation to be added to the GPU queue in batch. This + * template requires classes to be derived from the OpBase class. This + * function also requires the Sequence to be recording, otherwise it will + * not be able to add the operation. + * + * @param tensors Vector of tensors to use for the operation + * @param TArgs Template parameters that are used to initialise operation + * which allows for extensible configurations on initialisation. */ - void init(); + std::shared_ptr record(std::shared_ptr op); + + /** + * Clear function clears all operations currently recorded and starts recording again. + */ + void clear(); /** * Begins recording commands for commands to be submitted into the command @@ -1272,7 +1293,7 @@ class Sequence * * @return Boolean stating whether execution was successful. */ - bool begin(); + void begin(); /** * Ends the recording and stops recording commands when the record command @@ -1280,7 +1301,7 @@ class Sequence * * @return Boolean stating whether execution was successful. */ - bool end(); + void end(); /** * Eval sends all the recorded and stored operations in the vector of @@ -1288,7 +1309,7 @@ class Sequence * * @return Boolean stating whether execution was successful. */ - bool eval(); + std::shared_ptr eval(); /** * Eval Async sends all the recorded and stored operations in the vector of @@ -1297,7 +1318,7 @@ class Sequence * * @return Boolean stating whether execution was successful. */ - bool evalAsync(); + std::shared_ptr evalAsync(); /** * Eval Await waits for the fence to finish processing and then once it @@ -1306,7 +1327,7 @@ class Sequence * @param waitFor Number of milliseconds to wait before timing out. * @return Boolean stating whether execution was successful. */ - bool evalAwait(uint64_t waitFor = UINT64_MAX); + std::shared_ptr evalAwait(uint64_t waitFor = UINT64_MAX); /** * Returns true if the sequence is currently in recording activated. @@ -1323,68 +1344,12 @@ class Sequence */ bool isRunning(); - /** - * Returns true if the sequence has been successfully initialised. - * - * @return Boolean stating if sequence has been initialised. - */ - bool isInit(); - /** * Destroys and frees the GPU resources which include the buffer and memory * and sets the sequence as init=False. */ void freeMemoryDestroyGPUResources(); - /** - * Record function for operation to be added to the GPU queue in batch. This - * template requires classes to be derived from the OpBase class. This - * function also requires the Sequence to be recording, otherwise it will - * not be able to add the operation. - * - * @param tensors Vector of tensors to use for the operation - * @param TArgs Template parameters that are used to initialise operation - * which allows for extensible configurations on initialisation. - */ - template - bool record(std::vector> tensors, TArgs&&... params) - { - static_assert(std::is_base_of::value, - "Kompute Sequence record(...) template only valid with " - "OpBase derived classes"); - - KP_LOG_DEBUG("Kompute Sequence record function started"); - - if (!this->isRecording()) { - KP_LOG_ERROR( - "Kompute sequence record attempted when not record BEGIN"); - return false; - } - - KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance"); - T* op = new T(this->mPhysicalDevice, - this->mDevice, - this->mCommandBuffer, - tensors, - std::forward(params)...); - - OpBase* baseOp = dynamic_cast(op); - - std::unique_ptr baseOpPtr{ baseOp }; - - KP_LOG_DEBUG( - "Kompute Sequence running init on OpBase derived class instance"); - baseOpPtr->init(); - - KP_LOG_DEBUG( - "Kompute Sequence running record on OpBase derived class instance"); - baseOpPtr->record(); - - mOperations.push_back(std::move(baseOpPtr)); - - return true; - } - private: // -------------- NEVER OWNED RESOURCES std::shared_ptr mPhysicalDevice = nullptr; @@ -1400,10 +1365,9 @@ class Sequence // -------------- ALWAYS OWNED RESOURCES vk::Fence mFence; - std::vector> mOperations; + std::vector> mOperations; // State - bool mIsInit = false; bool mRecording = false; bool mIsRunning = false; @@ -1422,8 +1386,6 @@ namespace kp { class OpTensorSyncDevice : public OpBase { public: - OpTensorSyncDevice(); - /** * Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensos provided cannot be of type TensorTypes::eStorage. * @@ -1432,25 +1394,17 @@ class OpTensorSyncDevice : public OpBase * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that will be used to create in operation. */ - OpTensorSyncDevice(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector> tensors); + OpTensorSyncDevice(const std::vector>& tensors); /** * Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release. */ ~OpTensorSyncDevice() override; - /** - * Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element. - */ - void init() override; - /** * For device tensors, it records the copy command for the tensor to copy the data from its staging to device memory. */ - void record() override; + void record(std::shared_ptr commandBuffer) override; /** * Does not perform any preEval commands. @@ -1463,6 +1417,8 @@ class OpTensorSyncDevice : public OpBase virtual void postEval() override; private: + // -------------- ALWAYS OWNED RESOURCES + std::vector> mTensors; }; } // End namespace kp @@ -1525,150 +1481,7 @@ class Manager * @param queueIndex The queue to use from the available queues * @return Shared pointer to the manager owned sequence resource */ - std::shared_ptr sequence( - std::string sequenceName = KP_DEFAULT_SESSION, - uint32_t queueIndex = 0); - - /** - * Function that evaluates operation against named sequence. - * - * @param tensors The tensors to be used in the operation recorded - * @param sequenceName The name of the sequence to be retrieved or created - * @param TArgs Template parameters that will be used to initialise - * Operation to allow for extensible configurations on initialisation - */ - template - void evalOp(std::vector> tensors, - std::string sequenceName, - TArgs&&... params) - { - KP_LOG_DEBUG("Kompute Manager evalOp triggered"); - std::shared_ptr sq = - this->sequence(sequenceName); - - KP_LOG_DEBUG("Kompute Manager evalOp running sequence BEGIN"); - sq->begin(); - - KP_LOG_DEBUG("Kompute Manager evalOp running sequence RECORD"); - sq->record(tensors, std::forward(params)...); - - KP_LOG_DEBUG("Kompute Manager evalOp running sequence END"); - sq->end(); - - KP_LOG_DEBUG("Kompute Manager evalOp running sequence EVAL"); - sq->eval(); - - KP_LOG_DEBUG("Kompute Manager evalOp running sequence SUCCESS"); - } - - /** - * Function that evaluates operation against a newly created sequence. - * - * @param tensors The tensors to be used in the operation recorded - * @param TArgs Template parameters that will be used to initialise - * Operation to allow for extensible configurations on initialisation - */ - template - void evalOpDefault(std::vector> tensors, - TArgs&&... params) - { - KP_LOG_DEBUG("Kompute Manager evalOp Default triggered"); - this->mCurrentSequenceIndex++; - this->evalOp( - tensors, KP_DEFAULT_SESSION, std::forward(params)...); - } - - /** - * Function that evaluates operation against named sequence asynchronously. - * - * @param tensors The tensors to be used in the operation recorded - * @param sequenceName The name of the sequence to be retrieved or created - * @param params Template parameters that will be used to initialise - * Operation to allow for extensible configurations on initialisation - */ - template - void evalOpAsync(std::vector> tensors, - std::string sequenceName, - TArgs&&... params) - { - KP_LOG_DEBUG("Kompute Manager evalOpAsync triggered"); - - std::shared_ptr sq = - this->sequence(sequenceName); - - KP_LOG_DEBUG("Kompute Manager evalOpAsync running sequence BEGIN"); - sq->begin(); - - KP_LOG_DEBUG("Kompute Manager evalOpAsync running sequence RECORD"); - sq->record(tensors, std::forward(params)...); - - KP_LOG_DEBUG("Kompute Manager evalOpAsync running sequence END"); - sq->end(); - - KP_LOG_DEBUG("Kompute Manager evalOpAsync running sequence EVAL"); - sq->evalAsync(); - - KP_LOG_DEBUG("Kompute Manager evalOpAsync running sequence SUCCESS"); - } - - /** - * Operation that evaluates operation against default sequence - * asynchronously. - * - * @param tensors The tensors to be used in the operation recorded - * @param params Template parameters that will be used to initialise - * Operation to allow for extensible configurations on initialisation - */ - template - void evalOpAsyncDefault(std::vector> tensors, - TArgs&&... params) - { - KP_LOG_DEBUG("Kompute Manager evalOpAsyncDefault triggered"); - this->mCurrentSequenceIndex++; - this->evalOpAsync( - tensors, KP_DEFAULT_SESSION, std::forward(params)...); - } - - /** - * Operation that awaits for named sequence to finish. - * - * @param sequenceName The name of the sequence to wait for termination - * @param waitFor The amount of time to wait before timing out - */ - void evalOpAwait(std::string sequenceName, uint64_t waitFor = UINT64_MAX) - { - KP_LOG_DEBUG("Kompute Manager evalOpAwait triggered with sequence {}", - sequenceName); - std::unordered_map>::iterator - found = this->mManagedSequences.find(sequenceName); - - if (found != this->mManagedSequences.end()) { - if (std::shared_ptr sq = found->second) { - KP_LOG_DEBUG("Kompute Manager evalOpAwait running sequence " - "Sequence EVAL AWAIT"); - if (sq->isRunning()) { - sq->evalAwait(waitFor); - } - } - KP_LOG_DEBUG( - "Kompute Manager evalOpAwait running sequence SUCCESS"); - } else { - KP_LOG_ERROR("Kompute Manager evalOpAwait Sequence not found"); - } - } - - /** - * Operation that awaits for default sequence to finish. - * - * @param tensors The tensors to be used in the operation recorded - * @param params Template parameters that will be used to initialise - * Operation to allow for extensible configurations on initialisation - */ - void evalOpAwaitDefault(uint64_t waitFor = UINT64_MAX) - { - KP_LOG_DEBUG("Kompute Manager evalOpAwaitDefault triggered"); - this->evalOpAwait(KP_DEFAULT_SESSION, waitFor); - } + std::shared_ptr sequence(uint32_t queueIndex = 0); /** * Function that simplifies the common workflow of tensor creation and @@ -1686,79 +1499,12 @@ class Manager Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice, bool syncDataToGPU = true); - /** - * Function that simplifies the common workflow of tensor initialisation. It - * will take the constructor parameters for a Tensor and will will us it to - * create a new Tensor. The tensor memory will then be managed and owned by - * the manager. - * - * @param tensors Array of tensors to rebuild - * @param syncDataToGPU Whether to sync the data to GPU memory - */ - void rebuild(std::vector> tensors, - bool syncDataToGPU = true); - - /** - * Function that simplifies the common workflow of tensor initialisation. It - * will take the constructor parameters for a Tensor and will will us it to - * create a new Tensor. The tensor memory will then be managed and owned by - * the manager. - * - * @param tensors Single tensor to rebuild - * @param syncDataToGPU Whether to sync the data to GPU memory - */ - void rebuild(std::shared_ptr tensor, - bool syncDataToGPU = true); - - /** - * Destroy owned Vulkan GPU resources and free GPU memory for - * single tensor. - * - * @param tensors Single tensor to rebuild - */ - void destroy(std::shared_ptr tensor); - - /** - * Destroy owned Vulkan GPU resources and free GPU memory for - * vector of tensors. - * - * @param tensors Single tensor to rebuild - */ - void destroy(std::vector> tensors); - - /** - * Destroy owned Vulkan GPU resources and free GPU memory for - * vector of sequences. Destroying by sequence name is more efficent - * and hence recommended instead of by object. - * - * @param sequences Vector for shared ptrs with sequences to destroy - */ - void destroy(std::vector> sequences); - - /** - * Destroy owned Vulkan GPU resources and free GPU memory for - * single sequence. Destroying by sequence name is more efficent - * and hence recommended instead of by object. - * - * @param sequences Single sequence to rebuild - */ - void destroy(std::shared_ptr sequence); - - /** - * Destroy owned Vulkan GPU resources and free GPU memory for - * sequence by name. - * - * @param sequenceName Single name of named sequence to destroy - */ - void destroy(const std::string& sequenceName); - - /** - * Destroy owned Vulkan GPU resources and free GPU memory for - * sequences using vector of named sequence names. - * - * @param sequenceName Vector of sequence names to destroy - */ - void destroy(const std::vector& sequenceNames); + std::shared_ptr algorithm( + const std::vector>& tensors = {}, + const std::vector& spirv = {}, + const Workgroup& workgroup = {}, + const Constants& specializationConstants = {}, + const Constants& pushConstants = {}); private: // -------------- OPTIONALLY OWNED RESOURCES @@ -1770,10 +1516,9 @@ class Manager bool mFreeDevice = false; // -------------- ALWAYS OWNED RESOURCES - std::set> mManagedTensors; - - std::unordered_map> - mManagedSequences; + std::vector> mManagedTensors; + std::vector> mManagedSequences; + std::vector> mManagedAlgorithms; std::vector mComputeQueueFamilyIndices; std::vector> mComputeQueues; @@ -1798,181 +1543,24 @@ class Manager namespace kp { -/** - Abstraction for compute shaders that are run on top of tensors grouped via - ParameterGroups (which group descriptorsets) -*/ -class Algorithm -{ -public: - /** - Base constructor for Algorithm. Should not be used unless explicit - intended. - */ - Algorithm(); - - /** - * Default constructor for Algorithm - * - * @param device The Vulkan device to use for creating resources - * @param commandBuffer The vulkan command buffer to bind the pipeline and - * shaders - */ - Algorithm(std::shared_ptr device, - std::shared_ptr commandBuffer, - const Constants& specializationConstants = {}); - - /** - * Initialiser for the shader data provided to the algorithm as well as - * tensor parameters that will be used in shader. - * - * @param shaderFileData The bytes in spir-v format of the shader - * @tensorParams The Tensors to be used in the Algorithm / shader for - * @specalizationInstalces The specialization parameters to pass to the function - * processing - */ - void init(const std::vector& shaderFileData, - std::vector> tensorParams); - - /** - * Destructor for Algorithm which is responsible for freeing and desroying - * respective pipelines and owned parameter groups. - */ - ~Algorithm(); - - /** - * Records the dispatch function with the provided template parameters or - * alternatively using the size of the tensor by default. - * - * @param x Layout X dispatch value - * @param y Layout Y dispatch value - * @param z Layout Z dispatch value - */ - void recordDispatch(uint32_t x = 1, uint32_t y = 1, uint32_t z = 1); - -private: - // -------------- NEVER OWNED RESOURCES - std::shared_ptr mDevice; - std::shared_ptr mCommandBuffer; - - // -------------- OPTIONALLY OWNED RESOURCES - std::shared_ptr mDescriptorSetLayout; - bool mFreeDescriptorSetLayout = false; - std::shared_ptr mDescriptorPool; - bool mFreeDescriptorPool = false; - std::shared_ptr mDescriptorSet; - bool mFreeDescriptorSet = false; - std::shared_ptr mShaderModule; - bool mFreeShaderModule = false; - std::shared_ptr mPipelineLayout; - bool mFreePipelineLayout = false; - std::shared_ptr mPipelineCache; - bool mFreePipelineCache = false; - std::shared_ptr mPipeline; - bool mFreePipeline = false; - - // -------------- ALWAYS OWNED RESOURCES - Constants mSpecializationConstants; - - // Create util functions - void createShaderModule(const std::vector& shaderFileData); - void createPipeline(); - - // Parameters - void createParameters(std::vector>& tensorParams); - void createDescriptorPool(); -}; - -} // End namespace kp - -namespace kp { - /** * Operation that provides a general abstraction that simplifies the use of * algorithm and parameter components which can be used with shaders. * By default it enables the user to provide a dynamic number of tensors * which are then passed as inputs. */ -class OpAlgoCreate : public OpBase +class OpAlgoDispatch : public OpBase { public: - /** - * Base constructor, should not be used unless explicitly intended. - */ - OpAlgoCreate(); - - /** - * Default constructor with parameters that provides the bare minimum - * requirements for the operations to be able to create and manage their - * sub-components. - * - * @param physicalDevice Vulkan physical device used to find device queues - * @param device Vulkan logical device for passing to Algorithm - * @param commandBuffer Vulkan Command Buffer to record commands into - * @param tensors Tensors that are to be used in this operation - * @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format) - * @param komputeWorkgroup Optional parameter to specify the layout for processing - */ - OpAlgoCreate(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector>& tensors, - const Workgroup& komputeWorkgroup = {}, - const Constants& specializationConstants = {}); - - /** - * Constructor that enables a file to be passed to the operation with - * the contents of the shader. This can be either in raw format or in - * compiled SPIR-V binary format. - * - * @param physicalDevice Vulkan physical device used to find device queues - * @param device Vulkan logical device for passing to Algorithm - * @param commandBuffer Vulkan Command Buffer to record commands into - * @param tensors Tensors that are to be used in this operation - * @param shaderFilePath Parameter to specify the shader to load (either in spirv or raw format) - * @param komputeWorkgroup Optional parameter to specify the layout for processing - */ - OpAlgoCreate(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector>& tensors, - std::string shaderFilePath, - const Workgroup& komputeWorkgroup = {}, - const Constants& specializationConstants = {}); - - /** - * Constructor that enables raw shader data to be passed to the main operation - * which can be either in raw shader glsl code or in compiled SPIR-V binary. - * - * @param physicalDevice Vulkan physical device used to find device queues - * @param device Vulkan logical device for passing to Algorithm - * @param commandBuffer Vulkan Command Buffer to record commands into - * @param tensors Tensors that are to be used in this operation - * @param shaderDataRaw Optional parameter to specify the shader data either in binary or raw form - * @param komputeWorkgroup Optional parameter to specify the layout for processing - */ - OpAlgoCreate(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector>& tensors, - const std::vector& shaderDataRaw, - const Workgroup& komputeWorkgroup = {}, - const Constants& specializationConstants = {}); + OpAlgoDispatch(const std::vector>& tensors, + const std::shared_ptr& algorithm); /** * Default destructor, which is in charge of destroying the algorithm * components but does not destroy the underlying tensors */ - virtual ~OpAlgoCreate() override; - - /** - * The init function is responsible for the initialisation of the algorithm - * component based on the parameters specified, and allows for extensibility - * on the options provided. Further dependent classes can perform more - * specific checks such as ensuring tensors provided are initialised, etc. - */ - virtual void init() override; + virtual ~OpAlgoDispatch() override; /** * This records the commands that are to be sent to the GPU. This includes @@ -1982,7 +1570,7 @@ class OpAlgoCreate : public OpBase * copy of the output data for the staging buffer so it can be read by the * host. */ - virtual void record() override; + virtual void record(std::shared_ptr commandBuffer) override; /** * Does not perform any preEval commands. @@ -1996,121 +1584,23 @@ class OpAlgoCreate : public OpBase */ virtual void postEval() override; - protected: - // -------------- NEVER OWNED RESOURCES - - // -------------- OPTIONALLY OWNED RESOURCES - std::shared_ptr mAlgorithm; - bool mFreeAlgorithm = false; - +private: // -------------- ALWAYS OWNED RESOURCES - - Workgroup mWorkgroup; - - std::string mShaderFilePath; ///< Optional member variable which can be provided for the OpAlgoCreate to find the data automatically and load for processing - std::vector mShaderDataRaw; ///< Optional member variable which can be provided to contain either the raw shader content or the spirv binary content - - virtual std::vector fetchSpirvBinaryData(); + std::vector> mTensors; + std::shared_ptr mAlgorithm; }; } // End namespace kp -#include - -namespace kp { - -/** - * Operation base class to simplify the creation of operations that require - * right hand and left hand side datapoints together with a single output. - * The expected data passed is two input tensors and one output tensor. - */ -class OpAlgoLhsRhsOut : public OpAlgoCreate -{ - public: - /** - * Base constructor, should not be used unless explicitly intended. - */ - OpAlgoLhsRhsOut(); - - /** - * Default constructor with parameters that provides the bare minimum - * requirements for the operations to be able to create and manage their - * sub-components. - * - * @param physicalDevice Vulkan physical device used to find device queues - * @param device Vulkan logical device for passing to Algorithm - * @param commandBuffer Vulkan Command Buffer to record commands into - * @param tensors Tensors that are to be used in this operation - * @param freeTensors Whether operation manages the memory of the Tensors - * @param komputeWorkgroup Optional parameter to specify the layout for processing - */ - OpAlgoLhsRhsOut(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector> tensors, - const Workgroup& komputeWorkgroup = {}); - - /** - * Default destructor, which is in charge of destroying the algorithm - * components but does not destroy the underlying tensors - */ - virtual ~OpAlgoLhsRhsOut() override; - - /** - * The init function is responsible for ensuring that all of the tensors - * provided are aligned with requirements such as LHS, RHS and Output - * tensors, and creates the algorithm component which processes the - * computation. - */ - virtual void init() override; - - /** - * This records the commands that are to be sent to the GPU. This includes - * the barriers that ensure the memory has been copied before going in and - * out of the shader, as well as the dispatch operation that sends the - * shader processing to the gpu. This function also records the GPU memory - * copy of the output data for the staging buffer so it can be read by the - * host. - */ - virtual void record() override; - - /** - * Executes after the recorded commands are submitted, and performs a copy - * of the GPU Device memory into the staging buffer so the output data can - * be retrieved. - */ - virtual void postEval() override; - - protected: - // -------------- NEVER OWNED RESOURCES - std::shared_ptr mTensorLHS; ///< Reference to the parameter used in the left hand side equation of the shader - std::shared_ptr mTensorRHS; ///< Reference to the parameter used in the right hand side equation of the shader - std::shared_ptr mTensorOutput; ///< Reference to the parameter used in the output of the shader and will be copied with a staging vector -}; - -} // End namespace kp - -#include - -#if RELEASE - -#endif - namespace kp { /** * Operation that performs multiplication on two tensors and outpus on third * tensor. */ -class OpMult : public OpAlgoCreate +class OpMult : public OpAlgoDispatch { public: - /** - * Base constructor, should not be used unless explicitly intended. - */ - OpMult() { - - } /** * Default constructor with parameters that provides the bare minimum @@ -2123,46 +1613,30 @@ class OpMult : public OpAlgoCreate * @param tensors Tensors that are to be used in this operation * @param komputeWorkgroup Optional parameter to specify the layout for processing */ - OpMult(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector> tensors, - const Workgroup& komputeWorkgroup = {}) - : OpAlgoCreate(physicalDevice, device, commandBuffer, tensors, "", komputeWorkgroup) + OpMult(std::vector> tensors, std::shared_ptr algorithm) + : OpAlgoDispatch(tensors, algorithm) { KP_LOG_DEBUG("Kompute OpMult constructor with params"); -#ifndef RELEASE - this->mShaderFilePath = "shaders/glsl/opmult.comp.spv"; -#endif - } + if (tensors.size() != 3) { + throw std::runtime_error("Kompute OpMult expected 3 tensors but got " + tensors.size()); + } -#if RELEASE - /** - * If RELEASE=1 it will be using the static version of the shader which is - * loaded using this file directly. Otherwise it should not override the function. - */ - std::vector fetchSpirvBinaryData() override - { - KP_LOG_WARN( - "Kompute OpMult Running shaders directly from header"); - - return std::vector( + std::vector spirv( (uint32_t*)shader_data::shaders_glsl_opmult_comp_spv, (uint32_t*)(shader_data::shaders_glsl_opmult_comp_spv + kp::shader_data::shaders_glsl_opmult_comp_spv_len)); + algorithm->rebuild(tensors, spirv, Workgroup({tensors[0]->size()})); } -#endif /** * Default destructor, which is in charge of destroying the algorithm * components but does not destroy the underlying tensors */ - ~OpMult() override { + virtual ~OpMult() override { KP_LOG_DEBUG("Kompute OpMult destructor started"); } - }; } // End namespace kp @@ -2175,8 +1649,6 @@ namespace kp { class OpTensorCopy : public OpBase { public: - OpTensorCopy(); - /** * Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. * @@ -2185,25 +1657,17 @@ class OpTensorCopy : public OpBase * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that will be used to create in operation. */ - OpTensorCopy(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector> tensors); + OpTensorCopy(const std::vector>& tensors); /** * Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release. */ ~OpTensorCopy() override; - /** - * Performs basic checks such as ensuring there are at least two tensors provided, that they are initialised and that they are not of type TensorTypes::eStorage. - */ - void init() override; - /** * Records the copy commands from the first tensor into all the other tensors provided. Also optionally records a barrier. */ - void record() override; + void record(std::shared_ptr commandBuffer) override; /** * Does not perform any preEval commands. @@ -2216,6 +1680,8 @@ class OpTensorCopy : public OpBase virtual void postEval() override; private: + // -------------- ALWAYS OWNED RESOURCES + std::vector> mTensors; }; } // End namespace kp @@ -2228,8 +1694,6 @@ namespace kp { class OpTensorSyncLocal : public OpBase { public: - OpTensorSyncLocal(); - /** * Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensors provided cannot be of type TensorTypes::eStorage. * @@ -2238,25 +1702,17 @@ class OpTensorSyncLocal : public OpBase * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that will be used to create in operation. */ - OpTensorSyncLocal(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector> tensors); + OpTensorSyncLocal(const std::vector>& tensors); /** * Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release. */ ~OpTensorSyncLocal() override; - /** - * Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element. - */ - void init() override; - /** * For device tensors, it records the copy command for the tensor to copy the data from its device to staging memory. */ - void record() override; + void record(std::shared_ptr commandBuffer) override; /** * Does not perform any preEval commands. @@ -2269,6 +1725,8 @@ class OpTensorSyncLocal : public OpBase virtual void postEval() override; private: + // -------------- ALWAYS OWNED RESOURCES + std::vector> mTensors; }; } // End namespace kp diff --git a/src/Manager.cpp b/src/Manager.cpp index 7bd629165..ba0249f1d 100644 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -132,7 +132,7 @@ Manager::sequence(uint32_t queueIndex) this->mComputeQueues[queueIndex], this->mComputeQueueFamilyIndices[queueIndex]); - this->mManagedSequences.insert(sq); + this->mManagedSequences.push_back(sq); return sq; } @@ -337,10 +337,7 @@ Manager::tensor( std::shared_ptr tensor = std::make_shared( kp::Tensor(this->mPhysicalDevice, this->mDevice, data, tensorType)); - if (syncDataToGPU) { - this->evalOpDefault({ tensor }); - } - this->mManagedTensors.insert(tensor); + this->mManagedTensors.push_back(tensor); return tensor; } @@ -363,134 +360,9 @@ Manager::algorithm( specializationConstants, pushConstants)); - this->mManagedAlgorithms.insert(algorithm); + this->mManagedAlgorithms.push_back(algorithm); return algorithm; } -void -Manager::rebuild(std::vector> tensors, - bool syncDataToGPU) -{ - KP_LOG_DEBUG("Kompute Manager rebuild triggered"); - for (std::shared_ptr tensor : tensors) { - - // False syncData to run all tensors at once instead one by one - this->rebuild(tensor, false); - } - - if (syncDataToGPU) { - this->evalOpDefault(tensors); - } -} - -void -Manager::rebuild(std::shared_ptr tensor, - bool syncDataToGPU) -{ - KP_LOG_DEBUG("Kompute Manager rebuild Tensor triggered"); - - if (tensor->isInit()) { - tensor->freeMemoryDestroyGPUResources(); - } - - tensor->init(this->mPhysicalDevice, this->mDevice); - - std::set>::iterator it = - this->mManagedTensors.find(tensor); - if (it == this->mManagedTensors.end()) { - this->mManagedTensors.insert(tensor); - } - - if (syncDataToGPU) { - this->evalOpDefault({ tensor }); - } -} - -void -Manager::destroy(std::shared_ptr tensor) -{ - KP_LOG_DEBUG("Kompute Manager rebuild Tensor triggered"); - - if (tensor->isInit()) { - tensor->freeMemoryDestroyGPUResources(); - } - - // TODO: Confirm not limiting destroying tensors owned by this manager allowed - std::set>::iterator it = - this->mManagedTensors.find(tensor); - - if (it != this->mManagedTensors.end()) { - this->mManagedTensors.erase(tensor); - } -} - -void -Manager::destroy(std::vector> tensors) -{ - KP_LOG_DEBUG("Kompute Manager rebuild Tensor triggered"); - - for (std::shared_ptr tensor : tensors) { - this->destroy(tensor); - } -} - -void -Manager::destroy(std::vector> sequences) -{ - KP_LOG_DEBUG("Kompute Manager rebuild Sequence triggered"); - - for (std::shared_ptr sequence : sequences) { - this->destroy(sequence); - } -} - -void -Manager::destroy(std::shared_ptr sequence) -{ - KP_LOG_DEBUG("Kompute Manager rebuild Sequence triggered"); - - // Inefficient but required to delete by value - // Depending on the amount of named sequences created may be worth creating - // a set to ensure efficient delete. - for (std::unordered_map>::iterator it = this->mManagedSequences.begin(); it != this->mManagedSequences.end(); it++) { - if (it->second == sequence) { - this->mManagedSequences.erase(it); - break; - } - } - - if (sequence->isInit()) { - sequence->freeMemoryDestroyGPUResources(); - } -} - -void -Manager::destroy(const std::string& sequenceName) -{ - KP_LOG_DEBUG("Kompute Manager rebuild Sequence triggered"); - - std::unordered_map>::iterator - found = this->mManagedSequences.find(sequenceName); - - if (found != this->mManagedSequences.end()) { - // We don't call destroy(sequence) as erasing sequence by name more efficient - if (found->second->isInit()) { - found->second->freeMemoryDestroyGPUResources(); - } - this->mManagedSequences.erase(sequenceName); - } -} - -void -Manager::destroy(const std::vector& sequenceNames) -{ - KP_LOG_DEBUG("Kompute Manager rebuild Sequence triggered"); - - for (const std::string& sequenceName : sequenceNames) { - this->destroy(sequenceName); - } -} - - } diff --git a/src/OpAlgoCreate.cpp b/src/OpAlgoCreate.cpp deleted file mode 100644 index 008cf9bbe..000000000 --- a/src/OpAlgoCreate.cpp +++ /dev/null @@ -1,51 +0,0 @@ -#pragma once - -#include "kompute/operations/OpAlgoCreate.hpp" - -namespace kp { - -OpAlgoCreate::OpAlgoCreate(std::vector> tensors, - std::shared_ptr algorithm) - : OpBase(tensors, algorithm) -{ - KP_LOG_DEBUG("Kompute OpAlgoCreate constructor"); - - this->mManagesAlgorithm = true; - this->mManagesTensors = false; -} - -OpAlgoCreate::~OpAlgoCreate() -{ - KP_LOG_DEBUG("Kompute OpAlgoCreate destructor started"); -} - -void -OpAlgoCreate::init( - std::shared_ptr physicalDevice, - std::shared_ptr device) { - - KP_LOG_DEBUG("Kompute OpAlgoCreate init started"); - - // Explicitly calling top level function to create algo - OpBase::init(physicalDevice, device); -} - -void -OpAlgoCreate::record(std::shared_ptr commandBuffer) -{ - KP_LOG_DEBUG("Kompute OpAlgoCreate record called"); -} - -void -OpAlgoCreate::preEval() -{ - KP_LOG_DEBUG("Kompute OpAlgoCreate preEval called"); -} - -void -OpAlgoCreate::postEval() -{ - KP_LOG_DEBUG("Kompute OpAlgoCreate postSubmit called"); -} - -} diff --git a/src/OpAlgoDispatch.cpp b/src/OpAlgoDispatch.cpp index 25d2ba519..3623fcddd 100644 --- a/src/OpAlgoDispatch.cpp +++ b/src/OpAlgoDispatch.cpp @@ -4,14 +4,11 @@ namespace kp { -OpAlgoDispatch::OpAlgoDispatch(std::vector> tensors, - std::shared_ptr algorithm) - : OpBase(tensors, algorithm) +OpAlgoDispatch::OpAlgoDispatch(const std::vector>& tensors, + const std::shared_ptr& algorithm) { KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor"); - this->mManagesAlgorithm = false; - this->mManagesTensors = false; } OpAlgoDispatch::~OpAlgoDispatch() @@ -19,13 +16,6 @@ OpAlgoDispatch::~OpAlgoDispatch() KP_LOG_DEBUG("Kompute OpAlgoDispatch destructor started"); } -void -OpAlgoDispatch::init(std::shared_ptr physicalDevice, - std::shared_ptr device) -{ - KP_LOG_DEBUG("Kompute OpAlgoDispatch init called"); -} - void OpAlgoDispatch::record(std::shared_ptr commandBuffer) { diff --git a/src/OpAlgoLhsRhsOut.cpp b/src/OpAlgoLhsRhsOut.cpp deleted file mode 100644 index 89eb15c60..000000000 --- a/src/OpAlgoLhsRhsOut.cpp +++ /dev/null @@ -1,118 +0,0 @@ -#pragma once - -#include "kompute/operations/OpAlgoLhsRhsOut.hpp" - -namespace kp { - -OpAlgoLhsRhsOut::OpAlgoLhsRhsOut() -{ - KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor base"); -} - -OpAlgoLhsRhsOut::OpAlgoLhsRhsOut( - std::vector>& tensors, - std::shared_ptr algorithm) - // The inheritance is initialised with the copyOutputData to false given that - // this depencendant class handles the transfer of data via staging buffers in - // a granular way. - : OpAlgoCreate(tensors, algorithm) -{ - KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor with params"); -} - -OpAlgoLhsRhsOut::~OpAlgoLhsRhsOut() -{ - KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut destructor started"); -} - -void -OpAlgoLhsRhsOut::init(std::shared_ptr physicalDevice, - std::shared_ptr device) -{ - KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut init called"); - - if (this->mTensors.size() < 3) { - throw std::runtime_error( - "Kompute OpAlgoLhsRhsOut called with less than 1 tensor"); - } else if (this->mTensors.size() > 3) { - KP_LOG_WARN( - "Kompute OpAlgoLhsRhsOut called with more than 3 this->mTensors"); - } - - this->mTensorLHS = this->mTensors[0]; - this->mTensorRHS = this->mTensors[1]; - this->mTensorOutput = this->mTensors[2]; - - if (!(this->mTensorLHS->isInit() && this->mTensorRHS->isInit() && - this->mTensorOutput->isInit())) { - throw std::runtime_error( - "Kompute OpAlgoLhsRhsOut all tensor parameters must be initialised. " - "LHS: " + - std::to_string(this->mTensorLHS->isInit()) + - " RHS: " + std::to_string(this->mTensorRHS->isInit()) + - " Output: " + std::to_string(this->mTensorOutput->isInit())); - } - - if (!(this->mTensorLHS->size() == this->mTensorRHS->size() && - this->mTensorRHS->size() == this->mTensorOutput->size())) { - throw std::runtime_error( - "Kompute OpAlgoLhsRhsOut all tensor parameters must be the same size " - "LHS: " + - std::to_string(this->mTensorLHS->size()) + - " RHS: " + std::to_string(this->mTensorRHS->size()) + - " Output: " + std::to_string(this->mTensorOutput->size())); - } - - KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut fetching spirv data"); - - std::vector shaderFileData = this->fetchSpirvBinaryData(); - - KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut Initialising algorithm component"); -} - -void -OpAlgoLhsRhsOut::record(std::shared_ptr commandBuffer) -{ - KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut record called"); - - // Barrier to ensure the data is finished writing to buffer memory - this->mTensorLHS->recordBufferMemoryBarrier( - this->mCommandBuffer, - vk::AccessFlagBits::eHostWrite, - vk::AccessFlagBits::eShaderRead, - vk::PipelineStageFlagBits::eHost, - vk::PipelineStageFlagBits::eComputeShader); - this->mTensorRHS->recordBufferMemoryBarrier( - this->mCommandBuffer, - vk::AccessFlagBits::eHostWrite, - vk::AccessFlagBits::eShaderRead, - vk::PipelineStageFlagBits::eHost, - vk::PipelineStageFlagBits::eComputeShader); - - this->mAlgorithm->recordDispatch(this->mKomputeWorkgroup[0], - this->mKomputeWorkgroup[1], - this->mKomputeWorkgroup[2]); - - // Barrier to ensure the shader code is executed before buffer read - this->mTensorOutput->recordBufferMemoryBarrier( - this->mCommandBuffer, - vk::AccessFlagBits::eShaderWrite, - vk::AccessFlagBits::eTransferRead, - vk::PipelineStageFlagBits::eComputeShader, - vk::PipelineStageFlagBits::eTransfer); - - if (this->mTensorOutput->tensorType() == Tensor::TensorTypes::eDevice) { - this->mTensorOutput->recordCopyFromDeviceToStaging(this->mCommandBuffer, - true); - } -} - -void -OpAlgoLhsRhsOut::postEval() -{ - KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut postSubmit called"); - - this->mTensorOutput->mapDataFromHostMemory(); -} - -} diff --git a/src/OpTensorCopy.cpp b/src/OpTensorCopy.cpp index 8f88eeb65..374fe4ea1 100644 --- a/src/OpTensorCopy.cpp +++ b/src/OpTensorCopy.cpp @@ -3,42 +3,21 @@ namespace kp { -OpTensorCopy::OpTensorCopy(std::vector> tensors) - : OpBase(tensors, nullptr) +OpTensorCopy::OpTensorCopy(const std::vector>& tensors) { KP_LOG_DEBUG("Kompute OpTensorCopy constructor with params"); - this->mManagesTensors = false; - this->mManagesAlgorithm = false; -} - -OpTensorCopy::~OpTensorCopy() -{ - KP_LOG_DEBUG("Kompute OpTensorCopy destructor started"); -} - -void -OpTensorCopy::init(std::shared_ptr physicalDevice, - std::shared_ptr device) -{ - KP_LOG_DEBUG("Kompute OpTensorCopy init called"); - if (this->mTensors.size() < 2) { throw std::runtime_error( "Kompute OpTensorCopy called with less than 2 tensor"); } - for (std::shared_ptr tensor : this->mTensors) { - if (!tensor->isInit()) { - throw std::runtime_error( - "Kompute OpTensorCopy tensor parameter has not been initialized"); - } - if (tensor->tensorType() == Tensor::TensorTypes::eStorage) { - throw std::runtime_error("Kompute OpTensorCopy tensor parameter is " - "of TensorTypes::eStorage and hence " - "cannot be used to receive or pass data."); - } - } + this->mTensors = tensors; +} + +OpTensorCopy::~OpTensorCopy() +{ + KP_LOG_DEBUG("Kompute OpTensorCopy destructor started"); } void diff --git a/src/OpTensorCreate.cpp b/src/OpTensorCreate.cpp deleted file mode 100644 index a343f1510..000000000 --- a/src/OpTensorCreate.cpp +++ /dev/null @@ -1,46 +0,0 @@ - -#include "kompute/operations/OpTensorCreate.hpp" - -namespace kp { - -OpTensorCreate::OpTensorCreate( - std::vector>& tensors) - : OpBase(tensors, nullptr) -{ - KP_LOG_DEBUG("Compute OpTensorCreate constructor with params"); - this->mManagesTensors = true; -} - -OpTensorCreate::~OpTensorCreate() -{ - KP_LOG_DEBUG("Kompute OpTensorCreate destructor started"); -} - -void -OpTensorCreate::init(std::shared_ptr physicalDevice, - std::shared_ptr device) -{ - KP_LOG_DEBUG("Kompute OpTensorCreate init called"); - - OpBase::init(physicalDevice, device); -} - -void -OpTensorCreate::record(std::shared_ptr commandBuffer) -{ - KP_LOG_DEBUG("Kompute OpTensorCreate record called"); -} - -void -OpTensorCreate::preEval() -{ - KP_LOG_DEBUG("Kompute OpTensorCreate preEval called"); -} - -void -OpTensorCreate::postEval() -{ - KP_LOG_DEBUG("Kompute OpTensorCreate postEval called"); -} - -} diff --git a/src/OpTensorSyncDevice.cpp b/src/OpTensorSyncDevice.cpp index 872f82365..2cdd4e443 100644 --- a/src/OpTensorSyncDevice.cpp +++ b/src/OpTensorSyncDevice.cpp @@ -4,10 +4,16 @@ namespace kp { OpTensorSyncDevice::OpTensorSyncDevice( - std::vector> tensors) - : OpBase(tensors, nullptr) + const std::vector>& tensors) { KP_LOG_DEBUG("Kompute OpTensorSyncDevice constructor with params"); + + if (tensors.size() < 1) { + throw std::runtime_error( + "Kompute OpTensorSyncDevice called with less than 1 tensor"); + } + + this->mTensors = tensors; } OpTensorSyncDevice::~OpTensorSyncDevice() @@ -15,31 +21,6 @@ OpTensorSyncDevice::~OpTensorSyncDevice() KP_LOG_DEBUG("Kompute OpTensorSyncDevice destructor started"); } -void -OpTensorSyncDevice::init(std::shared_ptr physicalDevice, - std::shared_ptr device) -{ - KP_LOG_DEBUG("Kompute OpTensorSyncDevice init called"); - - if (this->mTensors.size() < 1) { - throw std::runtime_error( - "Kompute OpTensorSyncDevice called with less than 1 tensor"); - } - - for (std::shared_ptr tensor : this->mTensors) { - if (!tensor->isInit()) { - throw std::runtime_error("Kompute OpTensorSyncDevice: Tensor param " - "has not been initialized"); - } - if (tensor->tensorType() == Tensor::TensorTypes::eStorage) { - KP_LOG_WARN( - "Kompute OpTensorSyncLocal tensor parameter is of type " - "TensorTypes::eStorage and hence cannot be used to receive or " - "pass data."); - } - } -} - void OpTensorSyncDevice::record(std::shared_ptr commandBuffer) { diff --git a/src/OpTensorSyncLocal.cpp b/src/OpTensorSyncLocal.cpp index fd98b092d..3cd022bf2 100644 --- a/src/OpTensorSyncLocal.cpp +++ b/src/OpTensorSyncLocal.cpp @@ -6,13 +6,16 @@ namespace kp { OpTensorSyncLocal::OpTensorSyncLocal( - std::vector> tensors) - : OpBase(tensors, nullptr) + const std::vector>& tensors) { KP_LOG_DEBUG("Kompute OpTensorSyncLocal constructor with params"); - this->mManagesTensors = false; - this->mManagesAlgorithm = false; + if (tensors.size() < 1) { + throw std::runtime_error( + "Kompute OpTensorSyncLocal called with less than 1 tensor"); + } + + this->mTensors = tensors; } OpTensorSyncLocal::~OpTensorSyncLocal() @@ -20,25 +23,6 @@ OpTensorSyncLocal::~OpTensorSyncLocal() KP_LOG_DEBUG("Kompute OpTensorSyncLocal destructor started"); } -void -OpTensorSyncLocal::init(std::shared_ptr physicalDevice, - std::shared_ptr device) -{ - KP_LOG_DEBUG("Kompute OpTensorSyncLocal init called"); - - if (this->mTensors.size() < 1) { - throw std::runtime_error( - "Kompute OpTensorSyncLocal called with less than 1 tensor"); - } - - for (std::shared_ptr tensor : this->mTensors) { - if (!tensor->isInit()) { - throw std::runtime_error( - "Kompute OpTensorSyncLocal: Tensor has not been initialized"); - } - } -} - void OpTensorSyncLocal::record(std::shared_ptr commandBuffer) { diff --git a/src/Sequence.cpp b/src/Sequence.cpp index 4f6596efb..20e441500 100644 --- a/src/Sequence.cpp +++ b/src/Sequence.cpp @@ -26,96 +26,60 @@ Sequence::~Sequence() this->freeMemoryDestroyGPUResources(); } -bool +void Sequence::begin() { KP_LOG_DEBUG("Kompute sequence called BEGIN"); if (this->isRecording()) { - KP_LOG_WARN("Kompute Sequence begin called when already recording"); - return false; + KP_LOG_DEBUG("Kompute Sequence begin called when already recording"); + return; } if (this->isRunning()) { - KP_LOG_WARN( - "Kompute Sequence begin called when sequence still running"); - return false; - } - - if (!this->mCommandPool) { - throw std::runtime_error("Kompute Sequence command pool is null"); - } - - if (this->mOperations.size()) { - KP_LOG_INFO("Kompute Sequence clearing previous operations"); - this->mOperations.clear(); + throw std::runtime_error("Kompute Sequence begin called when sequence still running"); } if (!this->mRecording) { KP_LOG_INFO("Kompute Sequence command recording BEGIN"); this->mCommandBuffer->begin(vk::CommandBufferBeginInfo()); this->mRecording = true; - } else { - KP_LOG_WARN("Kompute Sequence attempted to start command recording " - "but recording already started"); } - return true; } -bool +void Sequence::end() { KP_LOG_DEBUG("Kompute Sequence calling END"); if (!this->isRecording()) { KP_LOG_WARN("Kompute Sequence end called when not recording"); - return false; - } - - if (!this->mCommandPool) { - throw std::runtime_error("Kompute Sequence command pool is null"); - } - - if (this->mRecording) { + return; + } + else { KP_LOG_INFO("Kompute Sequence command recording END"); this->mCommandBuffer->end(); this->mRecording = false; - } else { - KP_LOG_WARN("Kompute Sequence attempted to end command recording but " - "recording not started"); } - return true; } -bool +std::shared_ptr Sequence::eval() { KP_LOG_DEBUG("Kompute sequence EVAL BEGIN"); - bool evalResult = this->evalAsync(); - if (!evalResult) { - KP_LOG_DEBUG("Kompute sequence EVAL FAILURE"); - return false; - } - - evalResult = this->evalAwait(); - - KP_LOG_DEBUG("Kompute sequence EVAL SUCCESS"); - - return evalResult; + return this->evalAsync()->evalAwait(); } -bool +std::shared_ptr Sequence::evalAsync() { if (this->isRecording()) { - KP_LOG_WARN("Kompute Sequence evalAsync called when still recording"); - return false; + this->end(); } if (this->mIsRunning) { - KP_LOG_WARN("Kompute Sequence evalAsync called when an eval async was " + throw std::runtime_error("Kompute Sequence evalAsync called when an eval async was " "called without successful wait"); - return false; } this->mIsRunning = true; @@ -134,15 +98,15 @@ Sequence::evalAsync() this->mComputeQueue->submit(1, &submitInfo, this->mFence); - return true; + return shared_from_this(); } -bool +std::shared_ptr Sequence::evalAwait(uint64_t waitFor) { if (!this->mIsRunning) { KP_LOG_WARN("Kompute Sequence evalAwait called without existing eval"); - return false; + return shared_from_this(); } vk::Result result = @@ -153,15 +117,15 @@ Sequence::evalAwait(uint64_t waitFor) this->mIsRunning = false; if (result == vk::Result::eTimeout) { - KP_LOG_WARN("Kompute Sequence evalAwait timed out"); - return false; + KP_LOG_WARN("Kompute Sequence evalAwait reached timeout of {}", waitFor); + return shared_from_this(); } for (size_t i = 0; i < this->mOperations.size(); i++) { this->mOperations[i]->postEval(); } - return true; + return shared_from_this(); } bool @@ -221,6 +185,22 @@ Sequence::freeMemoryDestroyGPUResources() } +std::shared_ptr +Sequence::record(std::shared_ptr op) +{ + KP_LOG_DEBUG("Kompute Sequence record function started"); + + this->begin(); + + KP_LOG_DEBUG( + "Kompute Sequence running record on OpBase derived class instance"); + op->record(this->mCommandBuffer); + + this->mOperations.push_back(op); + + return shared_from_this(); +} + void Sequence::createCommandPool() { diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index 3615d74c0..cd808952a 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -69,147 +69,6 @@ class Manager */ std::shared_ptr sequence(uint32_t queueIndex = 0); - /** - * Function that evaluates operation against named sequence. - * - * @param tensors The tensors to be used in the operation recorded - * @param sequenceName The name of the sequence to be retrieved or created - * @param TArgs Template parameters that will be used to initialise - * Operation to allow for extensible configurations on initialisation - */ - template - void evalOp(std::vector> tensors, - std::string sequenceName, - TArgs&&... params) - { - KP_LOG_DEBUG("Kompute Manager evalOp triggered"); - std::shared_ptr sq = - this->sequence(sequenceName); - - KP_LOG_DEBUG("Kompute Manager evalOp running sequence BEGIN"); - sq->begin(); - - KP_LOG_DEBUG("Kompute Manager evalOp running sequence RECORD"); - sq->record(tensors, std::forward(params)...); - - KP_LOG_DEBUG("Kompute Manager evalOp running sequence END"); - sq->end(); - - KP_LOG_DEBUG("Kompute Manager evalOp running sequence EVAL"); - sq->eval(); - - KP_LOG_DEBUG("Kompute Manager evalOp running sequence SUCCESS"); - } - - /** - * Function that evaluates operation against a newly created sequence. - * - * @param tensors The tensors to be used in the operation recorded - * @param TArgs Template parameters that will be used to initialise - * Operation to allow for extensible configurations on initialisation - */ - template - void evalOpDefault(std::vector> tensors, - TArgs&&... params) - { - KP_LOG_DEBUG("Kompute Manager evalOp Default triggered"); - this->mCurrentSequenceIndex++; - this->evalOp( - tensors, KP_DEFAULT_SESSION, std::forward(params)...); - } - - /** - * Function that evaluates operation against named sequence asynchronously. - * - * @param tensors The tensors to be used in the operation recorded - * @param sequenceName The name of the sequence to be retrieved or created - * @param params Template parameters that will be used to initialise - * Operation to allow for extensible configurations on initialisation - */ - template - void evalOpAsync(std::vector> tensors, - std::string sequenceName, - TArgs&&... params) - { - KP_LOG_DEBUG("Kompute Manager evalOpAsync triggered"); - - std::shared_ptr sq = - this->sequence(sequenceName); - - KP_LOG_DEBUG("Kompute Manager evalOpAsync running sequence BEGIN"); - sq->begin(); - - KP_LOG_DEBUG("Kompute Manager evalOpAsync running sequence RECORD"); - sq->record(tensors, std::forward(params)...); - - KP_LOG_DEBUG("Kompute Manager evalOpAsync running sequence END"); - sq->end(); - - KP_LOG_DEBUG("Kompute Manager evalOpAsync running sequence EVAL"); - sq->evalAsync(); - - KP_LOG_DEBUG("Kompute Manager evalOpAsync running sequence SUCCESS"); - } - - /** - * Operation that evaluates operation against default sequence - * asynchronously. - * - * @param tensors The tensors to be used in the operation recorded - * @param params Template parameters that will be used to initialise - * Operation to allow for extensible configurations on initialisation - */ - template - void evalOpAsyncDefault(std::vector> tensors, - TArgs&&... params) - { - KP_LOG_DEBUG("Kompute Manager evalOpAsyncDefault triggered"); - this->mCurrentSequenceIndex++; - this->evalOpAsync( - tensors, KP_DEFAULT_SESSION, std::forward(params)...); - } - - /** - * Operation that awaits for named sequence to finish. - * - * @param sequenceName The name of the sequence to wait for termination - * @param waitFor The amount of time to wait before timing out - */ - void evalOpAwait(std::string sequenceName, uint64_t waitFor = UINT64_MAX) - { - KP_LOG_DEBUG("Kompute Manager evalOpAwait triggered with sequence {}", - sequenceName); - std::unordered_map>::iterator - found = this->mManagedSequences.find(sequenceName); - - if (found != this->mManagedSequences.end()) { - if (std::shared_ptr sq = found->second) { - KP_LOG_DEBUG("Kompute Manager evalOpAwait running sequence " - "Sequence EVAL AWAIT"); - if (sq->isRunning()) { - sq->evalAwait(waitFor); - } - } - KP_LOG_DEBUG( - "Kompute Manager evalOpAwait running sequence SUCCESS"); - } else { - KP_LOG_ERROR("Kompute Manager evalOpAwait Sequence not found"); - } - } - - /** - * Operation that awaits for default sequence to finish. - * - * @param tensors The tensors to be used in the operation recorded - * @param params Template parameters that will be used to initialise - * Operation to allow for extensible configurations on initialisation - */ - void evalOpAwaitDefault(uint64_t waitFor = UINT64_MAX) - { - KP_LOG_DEBUG("Kompute Manager evalOpAwaitDefault triggered"); - this->evalOpAwait(KP_DEFAULT_SESSION, waitFor); - } - /** * Function that simplifies the common workflow of tensor creation and * initialization. It will take the constructor parameters for a Tensor @@ -233,80 +92,6 @@ class Manager const Constants& specializationConstants = {}, const Constants& pushConstants = {}); - /** - * Function that simplifies the common workflow of tensor initialisation. It - * will take the constructor parameters for a Tensor and will will us it to - * create a new Tensor. The tensor memory will then be managed and owned by - * the manager. - * - * @param tensors Array of tensors to rebuild - * @param syncDataToGPU Whether to sync the data to GPU memory - */ - void rebuild(std::vector> tensors, - bool syncDataToGPU = true); - - /** - * Function that simplifies the common workflow of tensor initialisation. It - * will take the constructor parameters for a Tensor and will will us it to - * create a new Tensor. The tensor memory will then be managed and owned by - * the manager. - * - * @param tensors Single tensor to rebuild - * @param syncDataToGPU Whether to sync the data to GPU memory - */ - void rebuild(std::shared_ptr tensor, - bool syncDataToGPU = true); - - /** - * Destroy owned Vulkan GPU resources and free GPU memory for - * single tensor. - * - * @param tensors Single tensor to rebuild - */ - void destroy(std::shared_ptr tensor); - - /** - * Destroy owned Vulkan GPU resources and free GPU memory for - * vector of tensors. - * - * @param tensors Single tensor to rebuild - */ - void destroy(std::vector> tensors); - - /** - * Destroy owned Vulkan GPU resources and free GPU memory for - * vector of sequences. Destroying by sequence name is more efficent - * and hence recommended instead of by object. - * - * @param sequences Vector for shared ptrs with sequences to destroy - */ - void destroy(std::vector> sequences); - - /** - * Destroy owned Vulkan GPU resources and free GPU memory for - * single sequence. Destroying by sequence name is more efficent - * and hence recommended instead of by object. - * - * @param sequences Single sequence to rebuild - */ - void destroy(std::shared_ptr sequence); - - /** - * Destroy owned Vulkan GPU resources and free GPU memory for - * sequence by name. - * - * @param sequenceName Single name of named sequence to destroy - */ - void destroy(const std::string& sequenceName); - - /** - * Destroy owned Vulkan GPU resources and free GPU memory for - * sequences using vector of named sequence names. - * - * @param sequenceName Vector of sequence names to destroy - */ - void destroy(const std::vector& sequenceNames); - private: // -------------- OPTIONALLY OWNED RESOURCES std::shared_ptr mInstance = nullptr; @@ -317,10 +102,9 @@ class Manager bool mFreeDevice = false; // -------------- ALWAYS OWNED RESOURCES - std::set> mManagedTensors; - std::set> mManagedSequences; - std::set> mManagedAlgorithms; - //std::unique_ptr mDefaultSequence; + std::vector> mManagedTensors; + std::vector> mManagedSequences; + std::vector> mManagedAlgorithms; std::vector mComputeQueueFamilyIndices; std::vector> mComputeQueues; diff --git a/src/include/kompute/Sequence.hpp b/src/include/kompute/Sequence.hpp index eeecd0a04..47827d729 100644 --- a/src/include/kompute/Sequence.hpp +++ b/src/include/kompute/Sequence.hpp @@ -9,7 +9,7 @@ namespace kp { /** * Container of operations that can be sent to GPU as batch */ -class Sequence +class Sequence: public std::enable_shared_from_this { public: /** @@ -31,13 +31,30 @@ class Sequence */ ~Sequence(); + /** + * Record function for operation to be added to the GPU queue in batch. This + * template requires classes to be derived from the OpBase class. This + * function also requires the Sequence to be recording, otherwise it will + * not be able to add the operation. + * + * @param tensors Vector of tensors to use for the operation + * @param TArgs Template parameters that are used to initialise operation + * which allows for extensible configurations on initialisation. + */ + std::shared_ptr record(std::shared_ptr op); + + /** + * Clear function clears all operations currently recorded and starts recording again. + */ + void clear(); + /** * Begins recording commands for commands to be submitted into the command * buffer. * * @return Boolean stating whether execution was successful. */ - bool begin(); + void begin(); /** * Ends the recording and stops recording commands when the record command @@ -45,7 +62,7 @@ class Sequence * * @return Boolean stating whether execution was successful. */ - bool end(); + void end(); /** * Eval sends all the recorded and stored operations in the vector of @@ -53,7 +70,7 @@ class Sequence * * @return Boolean stating whether execution was successful. */ - bool eval(); + std::shared_ptr eval(); /** * Eval Async sends all the recorded and stored operations in the vector of @@ -62,7 +79,7 @@ class Sequence * * @return Boolean stating whether execution was successful. */ - bool evalAsync(); + std::shared_ptr evalAsync(); /** * Eval Await waits for the fence to finish processing and then once it @@ -71,7 +88,7 @@ class Sequence * @param waitFor Number of milliseconds to wait before timing out. * @return Boolean stating whether execution was successful. */ - bool evalAwait(uint64_t waitFor = UINT64_MAX); + std::shared_ptr evalAwait(uint64_t waitFor = UINT64_MAX); /** * Returns true if the sequence is currently in recording activated. @@ -94,55 +111,6 @@ class Sequence */ void freeMemoryDestroyGPUResources(); - /** - * Record function for operation to be added to the GPU queue in batch. This - * template requires classes to be derived from the OpBase class. This - * function also requires the Sequence to be recording, otherwise it will - * not be able to add the operation. - * - * @param tensors Vector of tensors to use for the operation - * @param TArgs Template parameters that are used to initialise operation - * which allows for extensible configurations on initialisation. - */ - template - bool record(std::vector> tensors, TArgs&&... params) - { - static_assert(std::is_base_of::value, - "Kompute Sequence record(...) template only valid with " - "OpBase derived classes"); - - KP_LOG_DEBUG("Kompute Sequence record function started"); - - if (!this->isRecording()) { - KP_LOG_ERROR( - "Kompute sequence record attempted when not record BEGIN"); - return false; - } - - KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance"); - T* op = new T(this->mPhysicalDevice, - this->mDevice, - this->mCommandBuffer, - tensors, - std::forward(params)...); - - OpBase* baseOp = dynamic_cast(op); - - std::unique_ptr baseOpPtr{ baseOp }; - - KP_LOG_DEBUG( - "Kompute Sequence running init on OpBase derived class instance"); - baseOpPtr->init(); - - KP_LOG_DEBUG( - "Kompute Sequence running record on OpBase derived class instance"); - baseOpPtr->record(); - - mOperations.push_back(std::move(baseOpPtr)); - - return true; - } - private: // -------------- NEVER OWNED RESOURCES std::shared_ptr mPhysicalDevice = nullptr; @@ -158,7 +126,7 @@ class Sequence // -------------- ALWAYS OWNED RESOURCES vk::Fence mFence; - std::vector> mOperations; + std::vector> mOperations; // State bool mRecording = false; diff --git a/src/include/kompute/operations/OpAlgoCreate.hpp b/src/include/kompute/operations/OpAlgoCreate.hpp deleted file mode 100644 index 3f5c859a2..000000000 --- a/src/include/kompute/operations/OpAlgoCreate.hpp +++ /dev/null @@ -1,77 +0,0 @@ -#pragma once - -#include - -#include "kompute/Core.hpp" - -#include "kompute/shaders/shaderopmult.hpp" - -#include "kompute/Algorithm.hpp" -#include "kompute/Tensor.hpp" - -#include "kompute/operations/OpBase.hpp" - -namespace kp { - -/** - * Operation that provides a general abstraction that simplifies the use of - * algorithm and parameter components which can be used with shaders. - * By default it enables the user to provide a dynamic number of tensors - * which are then passed as inputs. - */ -class OpAlgoCreate : public OpBase -{ - public: - - /** - * Default constructor with parameters that provides the bare minimum - * requirements for the operations to be able to create and manage their - * sub-components. - * - * @param physicalDevice Vulkan physical device used to find device queues - * @param device Vulkan logical device for passing to Algorithm - * @param commandBuffer Vulkan Command Buffer to record commands into - * @param tensors Tensors that are to be used in this operation - * @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format) - * @param komputeWorkgroup Optional parameter to specify the layout for processing - */ - OpAlgoCreate(std::vector> tensors, - std::shared_ptr algorithm); - - /** - * Default destructor, which is in charge of destroying the algorithm - * components but does not destroy the underlying tensors - */ - virtual ~OpAlgoCreate() override; - - - virtual void init( - std::shared_ptr physicalDevice, - std::shared_ptr device) override; - - /** - * This records the commands that are to be sent to the GPU. This includes - * the barriers that ensure the memory has been copied before going in and - * out of the shader, as well as the dispatch operation that sends the - * shader processing to the gpu. This function also records the GPU memory - * copy of the output data for the staging buffer so it can be read by the - * host. - */ - virtual void record(std::shared_ptr commandBuffer) override; - - - /** - * Does not perform any preEval commands. - */ - virtual void preEval() override; - - /** - * Executes after the recorded commands are submitted, and performs a copy - * of the GPU Device memory into the staging buffer so the output data can - * be retrieved. - */ - virtual void postEval() override; -}; - -} // End namespace kp - diff --git a/src/include/kompute/operations/OpAlgoDispatch.hpp b/src/include/kompute/operations/OpAlgoDispatch.hpp index 7763aa9b9..0af5b5fba 100644 --- a/src/include/kompute/operations/OpAlgoDispatch.hpp +++ b/src/include/kompute/operations/OpAlgoDispatch.hpp @@ -17,51 +17,8 @@ class OpAlgoDispatch : public OpBase { public: - /** - * Default constructor with parameters that provides the bare minimum - * requirements for the operations to be able to create and manage their - * sub-components. - * - * @param physicalDevice Vulkan physical device used to find device queues - * @param device Vulkan logical device for passing to Algorithm - * @param commandBuffer Vulkan Command Buffer to record commands into - * @param tensors Tensors that are to be used in this operation - * @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format) - * @param komputeWorkgroup Optional parameter to specify the layout for processing - */ - OpAlgoDispatch(std::vector> tensors, - std::shared_ptr algorithm); - - /** - * Constructor that enables a file to be passed to the operation with - * the contents of the shader. This can be either in raw format or in - * compiled SPIR-V binary format. - * - * @param physicalDevice Vulkan physical device used to find device queues - * @param device Vulkan logical device for passing to Algorithm - * @param commandBuffer Vulkan Command Buffer to record commands into - * @param tensors Tensors that are to be used in this operation - * @param shaderFilePath Parameter to specify the shader to load (either in spirv or raw format) - * @param komputeWorkgroup Optional parameter to specify the layout for processing - */ - OpAlgoDispatch(std::vector>& tensors, - std::shared_ptr& algorithm, - std::string shaderFilePath); - - /** - * Constructor that enables raw shader data to be passed to the main operation - * which can be either in raw shader glsl code or in compiled SPIR-V binary. - * - * @param physicalDevice Vulkan physical device used to find device queues - * @param device Vulkan logical device for passing to Algorithm - * @param commandBuffer Vulkan Command Buffer to record commands into - * @param tensors Tensors that are to be used in this operation - * @param shaderDataRaw Optional parameter to specify the shader data either in binary or raw form - * @param komputeWorkgroup Optional parameter to specify the layout for processing - */ - OpAlgoDispatch(std::vector>& tensors, - std::shared_ptr& algorithm, - const std::vector& shaderDataRaw); + OpAlgoDispatch(const std::vector>& tensors, + const std::shared_ptr& algorithm); /** * Default destructor, which is in charge of destroying the algorithm @@ -69,15 +26,6 @@ class OpAlgoDispatch : public OpBase */ virtual ~OpAlgoDispatch() override; - /** - * The init function is responsible for the initialisation of the algorithm - * component based on the parameters specified, and allows for extensibility - * on the options provided. Further dependent classes can perform more - * specific checks such as ensuring tensors provided are initialised, etc. - */ - virtual void init(std::shared_ptr physicalDevice, - std::shared_ptr device) override; - /** * This records the commands that are to be sent to the GPU. This includes * the barriers that ensure the memory has been copied before going in and @@ -88,7 +36,6 @@ class OpAlgoDispatch : public OpBase */ virtual void record(std::shared_ptr commandBuffer) override; - /** * Does not perform any preEval commands. */ @@ -101,6 +48,10 @@ class OpAlgoDispatch : public OpBase */ virtual void postEval() override; +private: + // -------------- ALWAYS OWNED RESOURCES + std::vector> mTensors; + std::shared_ptr mAlgorithm; }; } // End namespace kp diff --git a/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp b/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp deleted file mode 100644 index 65cdf14a1..000000000 --- a/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp +++ /dev/null @@ -1,78 +0,0 @@ -#pragma once - -#include - -#include "kompute/Core.hpp" - -#include "kompute/Algorithm.hpp" -#include "kompute/Tensor.hpp" - -#include "kompute/operations/OpAlgoCreate.hpp" - -namespace kp { - -/** - * Operation base class to simplify the creation of operations that require - * right hand and left hand side datapoints together with a single output. - * The expected data passed is two input tensors and one output tensor. - */ -class OpAlgoLhsRhsOut : public OpAlgoCreate -{ - public: - - /** - * Default constructor with parameters that provides the bare minimum - * requirements for the operations to be able to create and manage their - * sub-components. - * - * @param physicalDevice Vulkan physical device used to find device queues - * @param device Vulkan logical device for passing to Algorithm - * @param commandBuffer Vulkan Command Buffer to record commands into - * @param tensors Tensors that are to be used in this operation - * @param freeTensors Whether operation manages the memory of the Tensors - * @param komputeWorkgroup Optional parameter to specify the layout for processing - */ - OpAlgoLhsRhsOut(std::vector>& tensors, - std::shared_ptr algorithm); - - /** - * Default destructor, which is in charge of destroying the algorithm - * components but does not destroy the underlying tensors - */ - virtual ~OpAlgoLhsRhsOut() override; - - /** - * The init function is responsible for ensuring that all of the tensors - * provided are aligned with requirements such as LHS, RHS and Output - * tensors, and creates the algorithm component which processes the - * computation. - */ - virtual void init(std::shared_ptr physicalDevice, - std::shared_ptr device) override; - - /** - * This records the commands that are to be sent to the GPU. This includes - * the barriers that ensure the memory has been copied before going in and - * out of the shader, as well as the dispatch operation that sends the - * shader processing to the gpu. This function also records the GPU memory - * copy of the output data for the staging buffer so it can be read by the - * host. - */ - virtual void record(std::shared_ptr commandBuffer) override; - - /** - * Executes after the recorded commands are submitted, and performs a copy - * of the GPU Device memory into the staging buffer so the output data can - * be retrieved. - */ - virtual void postEval() override; - - protected: - // -------------- NEVER OWNED RESOURCES - std::shared_ptr mTensorLHS; ///< Reference to the parameter used in the left hand side equation of the shader - std::shared_ptr mTensorRHS; ///< Reference to the parameter used in the right hand side equation of the shader - std::shared_ptr mTensorOutput; ///< Reference to the parameter used in the output of the shader and will be copied with a staging vector -}; - -} // End namespace kp - diff --git a/src/include/kompute/operations/OpBase.hpp b/src/include/kompute/operations/OpBase.hpp index f54d01390..fd628cf02 100644 --- a/src/include/kompute/operations/OpBase.hpp +++ b/src/include/kompute/operations/OpBase.hpp @@ -19,25 +19,6 @@ class OpBase { public: - /** - * Default constructor with parameters that provides the bare minimum - * requirements for the operations to be able to create and manage their - * sub-components. - * - * @param physicalDevice Vulkan physical device used to find device queues - * @param device Vulkan logical device for passing to Algorithm - * @param commandBuffer Vulkan Command Buffer to record commands into - * @param tensors Tensors that are to be used in this operation - */ - OpBase(std::vector>& tensors, - std::shared_ptr algorithm) - { - KP_LOG_DEBUG("Compute OpBase constructor with params"); - this->mTensors = tensors; - this->mAlgorithm = algorithm; - this->mIsInit = false; - } - /** * Default destructor for OpBase class. This OpBase destructor class should * always be called to destroy and free owned resources unless it is @@ -46,81 +27,6 @@ class OpBase virtual ~OpBase() { KP_LOG_DEBUG("Kompute OpBase destructor started"); - this->destroy(); - } - - virtual std::shared_ptr algorithm() { - return this->mAlgorithm; - } - - virtual std::vector> tensors() { - return this->mTensors; - } - - virtual bool isInit() { - return this->mIsInit; - } - - /** - * The init function is responsible for setting up all the resources and - * should be called after the Operation has been created. - */ - // TODO: Potentially remove physicalDevice in favour of memoryProperties (for tensor) - virtual void init( - std::shared_ptr physicalDevice, - std::shared_ptr device) { - - if (this->mTensors.size() < 1) { - throw std::runtime_error("Kompute OpBase init called with 0 tensors"); - } - - if (this->mManagesTensors) { - for (std::shared_ptr tensor : this->mTensors) { - if (tensor->isInit()) { - // TODO: Evaluate whether throwing runtime error or just writing error log - throw std::runtime_error( - "Kompute OpTensorCreate: Tensor has already been initialized"); - } - else { - tensor->init(physicalDevice, device); - } - } - } - - if (this->mManagesAlgorithm) { - this->mAlgorithm->init(device, this->mTensors); - } - } - - virtual void destroy() { - if (!this->mIsInit) { - KP_LOG_WARN("Kompute OpBase destroy called but not initialised"); - } - - if (this->mManagesTensors) { - for (const std::shared_ptr& tensor : this->mTensors) { - if (!tensor->isInit()) { - KP_LOG_WARN("Kompute OpBase attempted to free managed tensor " - "but tensor is not initialised"); - } else { - KP_LOG_DEBUG("Kompute OpBase freeing tensor"); - tensor->freeMemoryDestroyGPUResources(); - } - } - this->mTensors.clear(); - } - - if (this->mManagesAlgorithm) { - if (this->mAlgorithm && this->mAlgorithm->isInit()) { - KP_LOG_DEBUG("Kompute OpBase freeing tensor"); - this->mAlgorithm->freeMemoryDestroyGPUResources(); - } else { - KP_LOG_WARN("Kompute OpBase attempted to free managed algorithm" - "but algorithm is not initialised"); - } - } - - this->mIsInit = false; } /** @@ -149,16 +55,6 @@ class OpBase * provided by the user. */ virtual void postEval() = 0; - - protected: - // -------------- OPTIONALLY OWNED RESOURCES - std::vector> mTensors; - bool mManagesTensors = false; - std::shared_ptr mAlgorithm; - bool mManagesAlgorithm = false; - - // -------------- ALWAYS OWNED RESOURCES - bool mIsInit; }; } // End namespace kp diff --git a/src/include/kompute/operations/OpMult.hpp b/src/include/kompute/operations/OpMult.hpp index 485210f0a..184a30cd9 100644 --- a/src/include/kompute/operations/OpMult.hpp +++ b/src/include/kompute/operations/OpMult.hpp @@ -4,14 +4,12 @@ #include "kompute/Core.hpp" -#if RELEASE #include "kompute/shaders/shaderopmult.hpp" -#endif #include "kompute/Algorithm.hpp" #include "kompute/Tensor.hpp" -#include "kompute/operations/OpAlgoCreate.hpp" +#include "kompute/operations/OpAlgoDispatch.hpp" namespace kp { @@ -19,15 +17,9 @@ namespace kp { * Operation that performs multiplication on two tensors and outpus on third * tensor. */ -class OpMult : public OpAlgoCreate +class OpMult : public OpAlgoDispatch { public: - /** - * Base constructor, should not be used unless explicitly intended. - */ - OpMult() { - - } /** * Default constructor with parameters that provides the bare minimum @@ -40,46 +32,30 @@ class OpMult : public OpAlgoCreate * @param tensors Tensors that are to be used in this operation * @param komputeWorkgroup Optional parameter to specify the layout for processing */ - OpMult(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector> tensors, - const Workgroup& komputeWorkgroup = {}) - : OpAlgoCreate(physicalDevice, device, commandBuffer, tensors, "", komputeWorkgroup) + OpMult(std::vector> tensors, std::shared_ptr algorithm) + : OpAlgoDispatch(tensors, algorithm) { KP_LOG_DEBUG("Kompute OpMult constructor with params"); -#ifndef RELEASE - this->mShaderFilePath = "shaders/glsl/opmult.comp.spv"; -#endif - } + if (tensors.size() != 3) { + throw std::runtime_error("Kompute OpMult expected 3 tensors but got " + tensors.size()); + } -#if RELEASE - /** - * If RELEASE=1 it will be using the static version of the shader which is - * loaded using this file directly. Otherwise it should not override the function. - */ - std::vector fetchSpirvBinaryData() override - { - KP_LOG_WARN( - "Kompute OpMult Running shaders directly from header"); - - return std::vector( + std::vector spirv( (uint32_t*)shader_data::shaders_glsl_opmult_comp_spv, (uint32_t*)(shader_data::shaders_glsl_opmult_comp_spv + kp::shader_data::shaders_glsl_opmult_comp_spv_len)); + algorithm->rebuild(tensors, spirv, Workgroup({tensors[0]->size()})); } -#endif /** * Default destructor, which is in charge of destroying the algorithm * components but does not destroy the underlying tensors */ - ~OpMult() override { + virtual ~OpMult() override { KP_LOG_DEBUG("Kompute OpMult destructor started"); } - }; } // End namespace kp diff --git a/src/include/kompute/operations/OpTensorCopy.hpp b/src/include/kompute/operations/OpTensorCopy.hpp index d35139e8c..01fad8334 100644 --- a/src/include/kompute/operations/OpTensorCopy.hpp +++ b/src/include/kompute/operations/OpTensorCopy.hpp @@ -22,19 +22,13 @@ class OpTensorCopy : public OpBase * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that will be used to create in operation. */ - OpTensorCopy(std::vector> tensors); + OpTensorCopy(const std::vector>& tensors); /** * Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release. */ ~OpTensorCopy() override; - /** - * Performs basic checks such as ensuring there are at least two tensors provided, that they are initialised and that they are not of type TensorTypes::eStorage. - */ - void init(std::shared_ptr physicalDevice, - std::shared_ptr device) override; - /** * Records the copy commands from the first tensor into all the other tensors provided. Also optionally records a barrier. */ @@ -51,6 +45,8 @@ class OpTensorCopy : public OpBase virtual void postEval() override; private: + // -------------- ALWAYS OWNED RESOURCES + std::vector> mTensors; }; } // End namespace kp diff --git a/src/include/kompute/operations/OpTensorCreate.hpp b/src/include/kompute/operations/OpTensorCreate.hpp deleted file mode 100644 index b4ac80862..000000000 --- a/src/include/kompute/operations/OpTensorCreate.hpp +++ /dev/null @@ -1,71 +0,0 @@ -#pragma once - -#include "kompute/Core.hpp" - -#include "kompute/operations/OpBase.hpp" -#include "kompute/Tensor.hpp" -#include "kompute/Algorithm.hpp" - -namespace kp { - -/** - * Base Operation which provides the high level interface that Kompute - * operations implement in order to perform a set of actions in the GPU. - * - * Operations can perform actions on tensors, and optionally can also own an - * Algorithm with respective parameters. kp::Operations with kp::Algorithms - * would inherit from kp::OpBaseAlgo. - */ -class OpTensorCreate : public OpBase -{ - public: - - /** - * Default constructor with parameters that provides the bare minimum - * requirements for the operations to be able to create and manage their - * sub-components. - * - * @param physicalDevice Vulkan physical device used to find device queues - * @param device Vulkan logical device for passing to Algorithm - * @param commandBuffer Vulkan Command Buffer to record commands into - * @param tensors Tensors that are to be used in this operation - */ - OpTensorCreate(std::vector>& tensors); - - /** - * Default destructor for OpTensorCreate class. This OpTensorCreate destructor class should - * always be called to destroy and free owned resources unless it is - * intended to destroy the resources in the parent class. - */ - virtual ~OpTensorCreate() override; - - /** - * The init function is responsible for setting up all the resources and - * should be called after the Operation has been created. - */ - virtual void init( - std::shared_ptr physicalDevice, - std::shared_ptr device) override; - - /** - * Record runs the core actions to create the tensors. For device tensors - * it records a copyCommand to move the data from the staging tensor to the - * device tensor. The mapping for staging tensors happens in the init function - * not in the record function. - */ - void record(std::shared_ptr commandBuffer) override; - - /** - * Does not perform any preEval commands. - */ - virtual void preEval() override; - - /** - * Performs a copy back into the main tensor to ensure that the data - * contained is the one that is now being stored in the GPU. - */ - virtual void postEval() override; - -}; - -} // End namespace kp diff --git a/src/include/kompute/operations/OpTensorSyncDevice.hpp b/src/include/kompute/operations/OpTensorSyncDevice.hpp index 35e97a475..8addce188 100644 --- a/src/include/kompute/operations/OpTensorSyncDevice.hpp +++ b/src/include/kompute/operations/OpTensorSyncDevice.hpp @@ -21,19 +21,13 @@ class OpTensorSyncDevice : public OpBase * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that will be used to create in operation. */ - OpTensorSyncDevice(std::vector> tensors); + OpTensorSyncDevice(const std::vector>& tensors); /** * Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release. */ ~OpTensorSyncDevice() override; - /** - * Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element. - */ - void init(std::shared_ptr physicalDevice, - std::shared_ptr device) override; - /** * For device tensors, it records the copy command for the tensor to copy the data from its staging to device memory. */ @@ -50,6 +44,8 @@ class OpTensorSyncDevice : public OpBase virtual void postEval() override; private: + // -------------- ALWAYS OWNED RESOURCES + std::vector> mTensors; }; } // End namespace kp diff --git a/src/include/kompute/operations/OpTensorSyncLocal.hpp b/src/include/kompute/operations/OpTensorSyncLocal.hpp index eebdd7084..7df8ccdd7 100644 --- a/src/include/kompute/operations/OpTensorSyncLocal.hpp +++ b/src/include/kompute/operations/OpTensorSyncLocal.hpp @@ -22,19 +22,13 @@ class OpTensorSyncLocal : public OpBase * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that will be used to create in operation. */ - OpTensorSyncLocal(std::vector> tensors); + OpTensorSyncLocal(const std::vector>& tensors); /** * Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release. */ ~OpTensorSyncLocal() override; - /** - * Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element. - */ - void init(std::shared_ptr physicalDevice, - std::shared_ptr device) override; - /** * For device tensors, it records the copy command for the tensor to copy the data from its device to staging memory. */ @@ -52,6 +46,8 @@ class OpTensorSyncLocal : public OpBase private: + // -------------- ALWAYS OWNED RESOURCES + std::vector> mTensors; }; } // End namespace kp