diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index 67efbe708..b202ab580 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -1793,523 +1793,3 @@ class OpMult : public OpAlgoDispatch }; } // End namespace kp - -// SPDX-License-Identifier: Apache-2.0 - -namespace kp { - -/** - * Container of operations that can be sent to GPU as batch - */ -class Sequence : public std::enable_shared_from_this -{ - public: - /** - * Main constructor for sequence which requires core vulkan components to - * generate all dependent resources. - * - * @param physicalDevice Vulkan physical device - * @param device Vulkan logical device - * @param computeQueue Vulkan compute queue - * @param queueIndex Vulkan compute queue index in device - * @param totalTimestamps Maximum number of timestamps to allocate - */ - Sequence(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr computeQueue, - uint32_t queueIndex, - uint32_t totalTimestamps = 0); - /** - * Destructor for sequence which is responsible for cleaning all subsequent - * owned operations. - */ - ~Sequence(); - - /** - * Record function for operation to be added to the GPU queue in batch. This - * template requires classes to be derived from the OpBase class. This - * function also requires the Sequence to be recording, otherwise it will - * not be able to add the operation. - * - * @param op Object derived from kp::BaseOp that will be recoreded by the - * sequence which will be used when the operation is evaluated. - * @return shared_ptr of the Sequence class itself - */ - std::shared_ptr record(std::shared_ptr op); - - /** - * Record function for operation to be added to the GPU queue in batch. This - * template requires classes to be derived from the OpBase class. This - * function also requires the Sequence to be recording, otherwise it will - * not be able to add the operation. - * - * @param tensors Vector of tensors to use for the operation - * @param TArgs Template parameters that are used to initialise operation - * which allows for extensible configurations on initialisation. - * @return shared_ptr of the Sequence class itself - */ - template - std::shared_ptr record( - std::vector> tensors, - TArgs&&... params) - { - std::shared_ptr op{ new T(tensors, std::forward(params)...) }; - return this->record(op); - } - /** - * Record function for operation to be added to the GPU queue in batch. This - * template requires classes to be derived from the OpBase class. This - * function also requires the Sequence to be recording, otherwise it will - * not be able to add the operation. - * - * @param algorithm Algorithm to use for the record often used for OpAlgo - * operations - * @param TArgs Template parameters that are used to initialise operation - * which allows for extensible configurations on initialisation. - * @return shared_ptr of the Sequence class itself - */ - template - std::shared_ptr record(std::shared_ptr algorithm, - TArgs&&... params) - { - std::shared_ptr op{ new T(algorithm, - std::forward(params)...) }; - return this->record(op); - } - - /** - * Eval sends all the recorded and stored operations in the vector of - * operations into the gpu as a submit job synchronously (with a barrier). - * - * @return shared_ptr of the Sequence class itself - */ - std::shared_ptr eval(); - - /** - * Resets all the recorded and stored operations, records the operation - * provided and submits into the gpu as a submit job synchronously (with a - * barrier). - * - * @return shared_ptr of the Sequence class itself - */ - std::shared_ptr eval(std::shared_ptr op); - - /** - * Eval sends all the recorded and stored operations in the vector of - * operations into the gpu as a submit job with a barrier. - * - * @param tensors Vector of tensors to use for the operation - * @param TArgs Template parameters that are used to initialise operation - * which allows for extensible configurations on initialisation. - * @return shared_ptr of the Sequence class itself - */ - template - std::shared_ptr eval(std::vector> tensors, - TArgs&&... params) - { - std::shared_ptr op{ new T(tensors, std::forward(params)...) }; - return this->eval(op); - } - /** - * Eval sends all the recorded and stored operations in the vector of - * operations into the gpu as a submit job with a barrier. - * - * @param algorithm Algorithm to use for the record often used for OpAlgo - * operations - * @param TArgs Template parameters that are used to initialise operation - * which allows for extensible configurations on initialisation. - * @return shared_ptr of the Sequence class itself - */ - template - std::shared_ptr eval(std::shared_ptr algorithm, - TArgs&&... params) - { - std::shared_ptr op{ new T(algorithm, - std::forward(params)...) }; - return this->eval(op); - } - - /** - * Eval Async sends all the recorded and stored operations in the vector of - * operations into the gpu as a submit job without a barrier. EvalAwait() - * must ALWAYS be called after to ensure the sequence is terminated - * correctly. - * - * @return Boolean stating whether execution was successful. - */ - std::shared_ptr evalAsync(); - /** - * Clears currnet operations to record provided one in the vector of - * operations into the gpu as a submit job without a barrier. EvalAwait() - * must ALWAYS be called after to ensure the sequence is terminated - * correctly. - * - * @return Boolean stating whether execution was successful. - */ - std::shared_ptr evalAsync(std::shared_ptr op); - /** - * Eval sends all the recorded and stored operations in the vector of - * operations into the gpu as a submit job with a barrier. - * - * @param tensors Vector of tensors to use for the operation - * @param TArgs Template parameters that are used to initialise operation - * which allows for extensible configurations on initialisation. - * @return shared_ptr of the Sequence class itself - */ - template - std::shared_ptr evalAsync( - std::vector> tensors, - TArgs&&... params) - { - std::shared_ptr op{ new T(tensors, std::forward(params)...) }; - return this->evalAsync(op); - } - /** - * Eval sends all the recorded and stored operations in the vector of - * operations into the gpu as a submit job with a barrier. - * - * @param algorithm Algorithm to use for the record often used for OpAlgo - * operations - * @param TArgs Template parameters that are used to initialise operation - * which allows for extensible configurations on initialisation. - * @return shared_ptr of the Sequence class itself - */ - template - std::shared_ptr evalAsync(std::shared_ptr algorithm, - TArgs&&... params) - { - std::shared_ptr op{ new T(algorithm, - std::forward(params)...) }; - return this->evalAsync(op); - } - - /** - * Eval Await waits for the fence to finish processing and then once it - * finishes, it runs the postEval of all operations. - * - * @param waitFor Number of milliseconds to wait before timing out. - * @return shared_ptr of the Sequence class itself - */ - std::shared_ptr evalAwait(uint64_t waitFor = UINT64_MAX); - - /** - * Clear function clears all operations currently recorded and starts - * recording again. - */ - void clear(); - - /** - * Return the timestamps that were latched at the beginning and - * after each operation during the last eval() call. - */ - std::vector getTimestamps(); - - /** - * Begins recording commands for commands to be submitted into the command - * buffer. - * - * @return Boolean stating whether execution was successful. - */ - void begin(); - - /** - * Ends the recording and stops recording commands when the record command - * is sent. - * - * @return Boolean stating whether execution was successful. - */ - void end(); - - /** - * Returns true if the sequence is currently in recording activated. - * - * @return Boolean stating if recording ongoing. - */ - bool isRecording(); - - /** - * Returns true if the sequence has been initialised, and it's based on the - * GPU resources being refrenced. - * - * @return Boolean stating if is initialized - */ - bool isInit(); - - /** - * Clears command buffer and triggers re-record of all the current - * operations saved, which is useful if the underlying kp::Tensors or - * kp::Algorithms are modified and need to be re-recorded. - */ - void rerecord(); - - /** - * Returns true if the sequence is currently running - mostly used for async - * workloads. - * - * @return Boolean stating if currently running. - */ - bool isRunning(); - - /** - * Destroys and frees the GPU resources which include the buffer and memory - * and sets the sequence as init=False. - */ - void destroy(); - - private: - // -------------- NEVER OWNED RESOURCES - std::shared_ptr mPhysicalDevice = nullptr; - std::shared_ptr mDevice = nullptr; - std::shared_ptr mComputeQueue = nullptr; - uint32_t mQueueIndex = -1; - - // -------------- OPTIONALLY OWNED RESOURCES - std::shared_ptr mCommandPool = nullptr; - bool mFreeCommandPool = false; - std::shared_ptr mCommandBuffer = nullptr; - bool mFreeCommandBuffer = false; - - // -------------- ALWAYS OWNED RESOURCES - vk::Fence mFence; - std::vector> mOperations; - std::shared_ptr timestampQueryPool = nullptr; - - // State - bool mRecording = false; - bool mIsRunning = false; - - // Create functions - void createCommandPool(); - void createCommandBuffer(); - void createTimestampQueryPool(uint32_t totalTimestamps); -}; - -} // End namespace kp - -// SPDX-License-Identifier: Apache-2.0 - -#include -#include - -#define KP_DEFAULT_SESSION "DEFAULT" - -namespace kp { - -/** - Base orchestrator which creates and manages device and child components -*/ -class Manager -{ - public: - /** - Base constructor and default used which creates the base resources - including choosing the device 0 by default. - */ - Manager(); - - /** - * Similar to base constructor but allows for further configuration to use - * when creating the Vulkan resources. - * - * @param physicalDeviceIndex The index of the physical device to use - * @param familyQueueIndices (Optional) List of queue indices to add for - * explicit allocation - * @param desiredExtensions The desired extensions to load from - * physicalDevice - */ - Manager(uint32_t physicalDeviceIndex, - const std::vector& familyQueueIndices = {}, - const std::vector& desiredExtensions = {}); - - /** - * Manager constructor which allows your own vulkan application to integrate - * with the kompute use. - * - * @param instance Vulkan compute instance to base this application - * @param physicalDevice Vulkan physical device to use for application - * @param device Vulkan logical device to use for all base resources - * @param physicalDeviceIndex Index for vulkan physical device used - */ - Manager(std::shared_ptr instance, - std::shared_ptr physicalDevice, - std::shared_ptr device); - - /** - * Manager destructor which would ensure all owned resources are destroyed - * unless explicitly stated that resources should not be destroyed or freed. - */ - ~Manager(); - - /** - * Create a managed sequence that will be destroyed by this manager - * if it hasn't been destroyed by its reference count going to zero. - * - * @param queueIndex The queue to use from the available queues - * @param nrOfTimestamps The maximum number of timestamps to allocate. - * If zero (default), disables latching of timestamps. - * @returns Shared pointer with initialised sequence - */ - std::shared_ptr sequence(uint32_t queueIndex = 0, - uint32_t totalTimestamps = 0); - - /** - * Create a managed tensor that will be destroyed by this manager - * if it hasn't been destroyed by its reference count going to zero. - * - * @param data The data to initialize the tensor with - * @param tensorType The type of tensor to initialize - * @returns Shared pointer with initialised tensor - */ - template - std::shared_ptr> tensorT( - const std::vector& data, - Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) - { - KP_LOG_DEBUG("Kompute Manager tensor creation triggered"); - - std::shared_ptr> tensor{ new kp::TensorT( - this->mPhysicalDevice, this->mDevice, data, tensorType) }; - - if (this->mManageResources) { - this->mManagedTensors.push_back(tensor); - } - - return tensor; - } - - std::shared_ptr> tensor( - const std::vector& data, - Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) - { - return this->tensorT(data, tensorType); - } - - std::shared_ptr tensor( - void* data, - uint32_t elementTotalCount, - uint32_t elementMemorySize, - const Tensor::TensorDataTypes& dataType, - Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) - { - std::shared_ptr tensor{ new kp::Tensor(this->mPhysicalDevice, - this->mDevice, - data, - elementTotalCount, - elementMemorySize, - dataType, - tensorType) }; - - if (this->mManageResources) { - this->mManagedTensors.push_back(tensor); - } - - return tensor; - } - - std::shared_ptr algorithm( - const std::vector>& tensors = {}, - const std::vector& spirv = {}, - const Workgroup& workgroup = {}, - const std::vector& specializationConstants = {}, - const std::vector& pushConstants = {}) - { - return this->algorithm<>(tensors, spirv, workgroup, specializationConstants, pushConstants); - } - - /** - * Create a managed algorithm that will be destroyed by this manager - * if it hasn't been destroyed by its reference count going to zero. - * - * @param tensors (optional) The tensors to initialise the algorithm with - * @param spirv (optional) The SPIRV bytes for the algorithm to dispatch - * @param workgroup (optional) kp::Workgroup for algorithm to use, and - * defaults to (tensor[0].size(), 1, 1) - * @param specializationConstants (optional) kp::Constant to use for - * specialization constants, and defaults to an empty constant - * @param pushConstants (optional) kp::Constant to use for push constants, - * and defaults to an empty constant - * @returns Shared pointer with initialised algorithm - */ - template - std::shared_ptr algorithm( - const std::vector>& tensors, - const std::vector& spirv, - const Workgroup& workgroup, - const std::vector& specializationConstants, - const std::vector

& pushConstants) - { - - KP_LOG_DEBUG("Kompute Manager algorithm creation triggered"); - - std::shared_ptr algorithm{ new kp::Algorithm( - this->mDevice, - tensors, - spirv, - workgroup, - specializationConstants, - pushConstants) }; - - if (this->mManageResources) { - this->mManagedAlgorithms.push_back(algorithm); - } - - return algorithm; - } - - /** - * Destroy the GPU resources and all managed resources by manager. - **/ - void destroy(); - /** - * Run a pseudo-garbage collection to release all the managed resources - * that have been already freed due to these reaching to zero ref count. - **/ - void clear(); - - /** - * Information about the current device. - * - * @return vk::PhysicalDeviceProperties containing information about the device - **/ - vk::PhysicalDeviceProperties getDeviceProperties() const; - - /** - * List the devices available in the current vulkan instance. - * - * @return vector of physical devices containing their respective properties - **/ - std::vector listDevices() const; - - private: - // -------------- OPTIONALLY OWNED RESOURCES - std::shared_ptr mInstance = nullptr; - bool mFreeInstance = false; - std::shared_ptr mPhysicalDevice = nullptr; - std::shared_ptr mDevice = nullptr; - bool mFreeDevice = false; - - // -------------- ALWAYS OWNED RESOURCES - std::vector> mManagedTensors; - std::vector> mManagedSequences; - std::vector> mManagedAlgorithms; - - std::vector mComputeQueueFamilyIndices; - std::vector> mComputeQueues; - - bool mManageResources = false; - -#if DEBUG -#ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS - vk::DebugReportCallbackEXT mDebugReportCallback; - vk::DispatchLoaderDynamic mDebugDispatcher; -#endif -#endif - - // Create functions - void createInstance(); - void createDevice(const std::vector& familyQueueIndices = {}, - uint32_t hysicalDeviceIndex = 0, - const std::vector& desiredExtensions = {}); -}; - -} // End namespace kp diff --git a/src/OpAlgoDispatch.cpp b/src/OpAlgoDispatch.cpp index c6099ff85..88d6e55fb 100644 --- a/src/OpAlgoDispatch.cpp +++ b/src/OpAlgoDispatch.cpp @@ -10,6 +10,7 @@ OpAlgoDispatch::~OpAlgoDispatch() KP_LOG_DEBUG("Kompute OpAlgoDispatch destructor started"); if (this->mPushConstantsData) { + KP_LOG_DEBUG("Kompute freeing push constants data"); free(this->mPushConstantsData); } }