diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp index 50ec9ad28..d5263628b 100644 --- a/src/Algorithm.cpp +++ b/src/Algorithm.cpp @@ -359,7 +359,7 @@ Algorithm::createPipeline() } void -Algorithm::bindCore(const vk::CommandBuffer& commandBuffer) +Algorithm::recordBindCore(const vk::CommandBuffer& commandBuffer) { KP_LOG_DEBUG("Kompute Algorithm binding pipeline"); @@ -377,7 +377,7 @@ Algorithm::bindCore(const vk::CommandBuffer& commandBuffer) } void -Algorithm::bindPush(const vk::CommandBuffer& commandBuffer) +Algorithm::recordBindPush(const vk::CommandBuffer& commandBuffer) { if (this->mPushConstants.size()) { KP_LOG_DEBUG("Kompute Algorithm binding push constants size: {}", diff --git a/src/OpAlgoDispatch.cpp b/src/OpAlgoDispatch.cpp index 3aef85e4f..44908adb3 100644 --- a/src/OpAlgoDispatch.cpp +++ b/src/OpAlgoDispatch.cpp @@ -38,8 +38,8 @@ OpAlgoDispatch::record(const vk::CommandBuffer& commandBuffer) this->mAlgorithm->setPush(this->mPushConstants); } - this->mAlgorithm->bindCore(commandBuffer); - this->mAlgorithm->bindPush(commandBuffer); + this->mAlgorithm->recordBindCore(commandBuffer); + this->mAlgorithm->recordBindPush(commandBuffer); this->mAlgorithm->recordDispatch(commandBuffer); } diff --git a/src/include/kompute/Algorithm.hpp b/src/include/kompute/Algorithm.hpp index cabc673b6..fae9cfd4b 100644 --- a/src/include/kompute/Algorithm.hpp +++ b/src/include/kompute/Algorithm.hpp @@ -14,11 +14,19 @@ class Algorithm { public: /** - * Default constructor for Algorithm + * Main constructor for algorithm with configuration parameters to create + * the underlying resources. * * @param device The Vulkan device to use for creating resources - * @param commandBuffer The vulkan command buffer to bind the pipeline and - * shaders + * @param tensors (optional) The tensors to use to create the descriptor resources + * @param spirv (optional) The spirv code to use to create the algorithm + * @param workgroup (optional) The kp::Workgroup to use for the dispatch which defaults to + * kp::Workgroup(tensor[0].size(), 1, 1) if not set. + * @param specializationConstants (optional) The kp::Constants to use to initialize + * the specialization constants which cannot be changed once set. + * @param pushConstants (optional) The kp::Constants to use when initializing the + * pipeline, which set the size of the push constants - these can be modified but + * all new values must have the same vector size as this initial value. */ Algorithm(std::shared_ptr device, const std::vector>& tensors = {}, @@ -28,13 +36,18 @@ class Algorithm const Constants& pushConstants = {}); /** - * Initialiser for the shader data provided to the algorithm as well as - * tensor parameters that will be used in shader. + * Rebuild function to reconstruct algorithm with configuration parameters to create + * the underlying resources. * - * @param shaderFileData The bytes in spir-v format of the shader - * @tensorParams The Tensors to be used in the Algorithm / shader for - * @specalizationInstalces The specialization parameters to pass to the - * function processing + * @param tensors The tensors to use to create the descriptor resources + * @param spirv The spirv code to use to create the algorithm + * @param workgroup (optional) The kp::Workgroup to use for the dispatch which defaults to + * kp::Workgroup(tensor[0].size(), 1, 1) if not set. + * @param specializationConstants (optional) The kp::Constants to use to initialize + * the specialization constants which cannot be changed once set. + * @param pushConstants (optional) The kp::Constants to use when initializing the + * pipeline, which set the size of the push constants - these can be modified but + * all new values must have the same vector size as this initial value. */ void rebuild(const std::vector>& tensors, const std::vector& spirv, @@ -52,24 +65,77 @@ class Algorithm * Records the dispatch function with the provided template parameters or * alternatively using the size of the tensor by default. * - * @param x Layout X dispatch value - * @param y Layout Y dispatch value - * @param z Layout Z dispatch value + * @param commandBuffer Command buffer to record the algorithm resources to */ void recordDispatch(const vk::CommandBuffer& commandBuffer); - void bindCore(const vk::CommandBuffer& commandBuffer); + /** + * Records command that binds the "core" algorithm components which consist of + * binding the pipeline and binding the descriptorsets. + * + * @param commandBuffer Command buffer to record the algorithm resources to + */ + void recordBindCore(const vk::CommandBuffer& commandBuffer); - void bindPush(const vk::CommandBuffer& commandBuffer); + /** + * Records command that binds the push constants to the command buffer provided + * - it is required that the pushConstants provided are of the same size as the + * ones provided during initialization. + * + * @param commandBuffer Command buffer to record the algorithm resources to + */ + void recordBindPush(const vk::CommandBuffer& commandBuffer); + /** + * function that checks all the gpu resource components to verify if these have + * been created and returns true if all are valid. + * + * @returns returns true if the algorithm is currently initialized. + */ bool isInit(); + /** + * Sets the work group to use in the recordDispatch + * + * @param workgroup The kp::Workgroup value to use to update the algorithm. It + * must have a value greater than 1 on the x value (index 1) otherwise it will + * be initialized on the size of the first tensor (ie. this->mTensor[0]->size()) + */ void setWorkgroup(const Workgroup& workgroup, uint32_t minSize = 1); + /** + * Sets the push constants to the new value provided to use in the next bindPush() + * + * @param The kp::Constant to use to set the push constants to use in the next + * bindPush(...) calls. The constants provided must be of the same size as the + * ones created during initialization. + */ void setPush(const Constants& pushConstants); + /** + * Gets the current workgroup from the algorithm. + * + * @param The kp::Constant to use to set the push constants to use in the next + * bindPush(...) calls. The constants provided must be of the same size as the + * ones created during initialization. + */ const Workgroup& getWorkgroup(); + /** + * Gets the specialization constants of the current algorithm. + * + * @returns The kp::Constants currently set for specialization constants + */ const Constants& getSpecializationConstants(); + /** + * Gets the specialization constants of the current algorithm. + * + * @returns The kp::Constants currently set for push constants + */ const Constants& getPush(); + /** + * Gets the current tensors that are used in the algorithm. + * + * @returns The list of tensors used in the algorithm. + */ const std::vector>& getTensors(); void destroy(); @@ -101,8 +167,6 @@ class Algorithm Constants mPushConstants; Workgroup mWorkgroup; - bool mIsInit; - // Create util functions void createShaderModule(); void createPipeline(); diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index 1e6b0adb2..957e45d2e 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -24,15 +24,13 @@ class Manager Manager(); /** - * Similar to base constructor but allows the user to provide the device - * they would like to create the resources on. + * Similar to base constructor but allows for further configuration to use when + * creating the Vulkan resources. * * @param physicalDeviceIndex The index of the physical device to use - * @param manageResources (Optional) Whether to manage the memory of the - * resources created and destroy when the manager is destroyed. * @param familyQueueIndices (Optional) List of queue indices to add for * explicit allocation - * @param totalQueues The total number of compute queues to create. + * @param desiredExtensions The desired extensions to load from physicalDevice */ Manager(uint32_t physicalDeviceIndex, const std::vector& familyQueueIndices = {}, @@ -58,32 +56,40 @@ class Manager ~Manager(); /** - * Get or create a managed Sequence that will be contained by this manager. - * If the named sequence does not currently exist, it would be created and - * initialised. + * Create a managed sequence that will be destroyed by this manager + * if it hasn't been destroyed by its reference count going to zero. * - * @param sequenceName The name for the named sequence to be retrieved or - * created * @param queueIndex The queue to use from the available queues - * @return Shared pointer to the manager owned sequence resource + * @returns Shared pointer with initialised sequence */ std::shared_ptr sequence(uint32_t queueIndex = 0); /** - * Function that simplifies the common workflow of tensor creation and - * initialization. It will take the constructor parameters for a Tensor - * and will will us it to create a new Tensor and then create it. The - * tensor memory will then be managed and owned by the manager. + * Create a managed tensor that will be destroyed by this manager + * if it hasn't been destroyed by its reference count going to zero. * * @param data The data to initialize the tensor with * @param tensorType The type of tensor to initialize - * @param syncDataToGPU Whether to sync the data to GPU memory - * @returns Initialized Tensor with memory Syncd to GPU device + * @returns Shared pointer with initialised tensor */ std::shared_ptr tensor( const std::vector& data, Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice); + /** + * Create a managed algorithm that will be destroyed by this manager + * if it hasn't been destroyed by its reference count going to zero. + * + * @param tensors (optional) The tensors to initialise the algorithm with + * @param spirv (optional) The SPIRV bytes for the algorithm to dispatch + * @param workgroup (optional) kp::Workgroup for algorithm to use, and + * defaults to (tensor[0].size(), 1, 1) + * @param specializationConstants (optional) kp::Constant to use for + * specialization constants, and defaults to an empty constant + * @param pushConstants (optional) kp::Constant to use for push constants, + * and defaults to an empty constant + * @returns Shared pointer with initialised algorithm + */ std::shared_ptr algorithm( const std::vector>& tensors = {}, const std::vector& spirv = {}, @@ -91,7 +97,14 @@ class Manager const Constants& specializationConstants = {}, const Constants& pushConstants = {}); + /** + * Destroy the GPU resources and all managed resources by manager. + **/ void destroy(); + /** + * Run a pseudo-garbage collection to release all the managed resources + * that have been already freed due to these reaching to zero ref count. + **/ void clear(); private: diff --git a/src/include/kompute/Parameter.hpp b/src/include/kompute/Parameter.hpp deleted file mode 100644 index a37eb31f8..000000000 --- a/src/include/kompute/Parameter.hpp +++ /dev/null @@ -1,47 +0,0 @@ -#pragma once - -#include "kompute/Core.hpp" - -#include "kompute/Tensor.hpp" - -namespace kp { - -class Algorithm -{ - public: - Algorithm(); - - Algorithm(std::shared_ptr device); - - void init(std::string shaderFilePath, - std::vector> tensorParams); - - ~Algorithm(); - - private: - // -------------- NEVER OWNED RESOURCES - std::shared_ptr mDevice; - - // -------------- OPTIONALLY OWNED RESOURCES - std::shared_ptr mDescriptorSetLayout; - bool mFreeDescriptorSetLayout = false; - std::shared_ptr mDescriptorPool; - bool mFreeDescriptorPool = false; - std::shared_ptr mDescriptorSet; - bool mFreeDescriptorSet = false; - std::shared_ptr mShaderModule; - bool mFreeShaderModule = false; - std::shared_ptr mPipelineLayout; - bool mFreePipelineLayout = false; - std::shared_ptr mPipelineCache; - bool mFreePipelineCache = false; - std::shared_ptr mPipeline; - bool mFreePipeline = false; - - // Create util functions - void createParameters(); - void createShaderModule(std::string shaderFilePath); - void createPipeline(); -}; - -} // End namespace kp diff --git a/src/include/kompute/Sequence.hpp b/src/include/kompute/Sequence.hpp index 5741fb4e6..10aa80148 100644 --- a/src/include/kompute/Sequence.hpp +++ b/src/include/kompute/Sequence.hpp @@ -32,6 +32,14 @@ class Sequence : public std::enable_shared_from_this ~Sequence(); /** + * Record function for operation to be added to the GPU queue in batch. This + * template requires classes to be derived from the OpBase class. This + * function also requires the Sequence to be recording, otherwise it will + * not be able to add the operation. + * + * @param op Object derived from kp::BaseOp that will be recoreded by the sequence + * which will be used when the operation is evaluated. + * @return shared_ptr of the Sequence class itself */ std::shared_ptr record(std::shared_ptr op); @@ -44,6 +52,7 @@ class Sequence : public std::enable_shared_from_this * @param tensors Vector of tensors to use for the operation * @param TArgs Template parameters that are used to initialise operation * which allows for extensible configurations on initialisation. + * @return shared_ptr of the Sequence class itself */ template std::shared_ptr record( @@ -52,6 +61,18 @@ class Sequence : public std::enable_shared_from_this std::shared_ptr op{ new T(tensors, std::forward(params)...) }; return this->record(op); } + /** + * Record function for operation to be added to the GPU queue in batch. This + * template requires classes to be derived from the OpBase class. This + * function also requires the Sequence to be recording, otherwise it will + * not be able to add the operation. + * + * @param algorithm Algorithm to use for the record often used for OpAlgo + * operations + * @param TArgs Template parameters that are used to initialise operation + * which allows for extensible configurations on initialisation. + * @return shared_ptr of the Sequence class itself + */ template std::shared_ptr record(std::shared_ptr algorithm, TArgs&&... params) @@ -63,21 +84,29 @@ class Sequence : public std::enable_shared_from_this /** * Eval sends all the recorded and stored operations in the vector of - * operations into the gpu as a submit job with a barrier. + * operations into the gpu as a submit job synchronously (with a barrier). * * @return shared_ptr of the Sequence class itself */ std::shared_ptr eval(); + /** + * Resets all the recorded and stored operations, records the operation + * provided and submits into the gpu as a submit job synchronously (with a barrier). + * + * @return shared_ptr of the Sequence class itself + */ std::shared_ptr eval(std::shared_ptr op); /** * Eval sends all the recorded and stored operations in the vector of * operations into the gpu as a submit job with a barrier. * + * @param tensors Vector of tensors to use for the operation + * @param TArgs Template parameters that are used to initialise operation + * which allows for extensible configurations on initialisation. * @return shared_ptr of the Sequence class itself */ - // TODO: Aim to have only a single function with tensors/algorithm template std::shared_ptr eval(std::vector> tensors, TArgs&&... params) @@ -85,6 +114,16 @@ class Sequence : public std::enable_shared_from_this std::shared_ptr op{ new T(tensors, std::forward(params)...) }; return this->eval(op); } + /** + * Eval sends all the recorded and stored operations in the vector of + * operations into the gpu as a submit job with a barrier. + * + * @param algorithm Algorithm to use for the record often used for OpAlgo + * operations + * @param TArgs Template parameters that are used to initialise operation + * which allows for extensible configurations on initialisation. + * @return shared_ptr of the Sequence class itself + */ template std::shared_ptr eval(std::shared_ptr algorithm, TArgs&&... params) @@ -96,18 +135,27 @@ class Sequence : public std::enable_shared_from_this /** * Eval Async sends all the recorded and stored operations in the vector of - * operations into the gpu as a submit job with a barrier. EvalAwait() must - * be called after to ensure the sequence is terminated correctly. + * operations into the gpu as a submit job without a barrier. EvalAwait() must + * ALWAYS be called after to ensure the sequence is terminated correctly. * * @return Boolean stating whether execution was successful. */ std::shared_ptr evalAsync(); + /** + * Clears currnet operations to record provided one in the vector of + * operations into the gpu as a submit job without a barrier. EvalAwait() must + * ALWAYS be called after to ensure the sequence is terminated correctly. + * + * @return Boolean stating whether execution was successful. + */ std::shared_ptr evalAsync(std::shared_ptr op); - /** * Eval sends all the recorded and stored operations in the vector of * operations into the gpu as a submit job with a barrier. * + * @param tensors Vector of tensors to use for the operation + * @param TArgs Template parameters that are used to initialise operation + * which allows for extensible configurations on initialisation. * @return shared_ptr of the Sequence class itself */ template @@ -118,6 +166,16 @@ class Sequence : public std::enable_shared_from_this std::shared_ptr op{ new T(tensors, std::forward(params)...) }; return this->evalAsync(op); } + /** + * Eval sends all the recorded and stored operations in the vector of + * operations into the gpu as a submit job with a barrier. + * + * @param algorithm Algorithm to use for the record often used for OpAlgo + * operations + * @param TArgs Template parameters that are used to initialise operation + * which allows for extensible configurations on initialisation. + * @return shared_ptr of the Sequence class itself + */ template std::shared_ptr evalAsync(std::shared_ptr algorithm, TArgs&&... params) @@ -132,7 +190,7 @@ class Sequence : public std::enable_shared_from_this * finishes, it runs the postEval of all operations. * * @param waitFor Number of milliseconds to wait before timing out. - * @return Boolean stating whether execution was successful. + * @return shared_ptr of the Sequence class itself */ std::shared_ptr evalAwait(uint64_t waitFor = UINT64_MAX); @@ -165,8 +223,19 @@ class Sequence : public std::enable_shared_from_this */ bool isRecording(); + /** + * Returns true if the sequence has been initialised, and it's based on the + * GPU resources being refrenced. + * + * @return Boolean stating if is initialized + */ bool isInit(); + /** + * Clears command buffer and triggers re-record of all the current operations + * saved, which is useful if the underlying kp::Tensors or kp::Algorithms + * are modified and need to be re-recorded. + */ void rerecord(); /** diff --git a/src/include/kompute/Tensor.hpp b/src/include/kompute/Tensor.hpp index 7b24f3de7..195af44f4 100644 --- a/src/include/kompute/Tensor.hpp +++ b/src/include/kompute/Tensor.hpp @@ -29,12 +29,14 @@ class Tensor }; /** - * Default constructor with data provided which would be used to create the + * Constructor with data provided which would be used to create the * respective vulkan buffer and memory. * + * @param physicalDevice The physical device to use to fetch properties + * @param device The device to use to create the buffer and memory from * @param data Non-zero-sized vector of data that will be used by the * tensor - * @param tensorType Type for the tensor which is of type TensorTypes + * @param tensorTypes Type for the tensor which is of type TensorTypes */ Tensor(std::shared_ptr physicalDevice, std::shared_ptr device, @@ -48,10 +50,11 @@ class Tensor ~Tensor(); /** - * Initialiser which calls the initialisation for all the respective tensors - * as well as creates the respective staging tensors. The staging tensors - * would only be created for the tensors of type TensorType::eDevice as - * otherwise there is no need to copy from host memory. + * Function to trigger reinitialisation of the tensor buffer and memory with + * new data as well as new potential device type. + * + * @param data Vector of data to use to initialise vector from + * @param tensorType The type to use for the tensor */ void rebuild(const std::vector& data, TensorTypes tensorType = TensorTypes::eDevice); @@ -61,6 +64,11 @@ class Tensor */ void destroy(); + /** + * Check whether tensor is initialized based on the created gpu resources. + * + * @returns Boolean stating whether tensor is initialized + */ bool isInit(); /** diff --git a/src/include/kompute/operations/OpAlgoDispatch.hpp b/src/include/kompute/operations/OpAlgoDispatch.hpp index 6975f2793..018fbced5 100644 --- a/src/include/kompute/operations/OpAlgoDispatch.hpp +++ b/src/include/kompute/operations/OpAlgoDispatch.hpp @@ -17,6 +17,13 @@ class OpAlgoDispatch : public OpBase { public: + /** + * Constructor that stores the algorithm to use as well as the relevant + * push constants to override when recording. + * + * @param algorithm The algorithm object to use for dispatch + * @param pushConstants The push constants to use for override + */ OpAlgoDispatch(const std::shared_ptr& algorithm, const kp::Constants& pushConstants = {}); @@ -33,18 +40,22 @@ class OpAlgoDispatch : public OpBase * shader processing to the gpu. This function also records the GPU memory * copy of the output data for the staging buffer so it can be read by the * host. + * + * @param commandBuffer The command buffer to record the command into. */ virtual void record(const vk::CommandBuffer& commandBuffer) override; /** * Does not perform any preEval commands. + * + * @param commandBuffer The command buffer to record the command into. */ virtual void preEval(const vk::CommandBuffer& commandBuffer) override; /** - * Executes after the recorded commands are submitted, and performs a copy - * of the GPU Device memory into the staging buffer so the output data can - * be retrieved. + * Does not perform any postEval commands. + * + * @param commandBuffer The command buffer to record the command into. */ virtual void postEval(const vk::CommandBuffer& commandBuffer) override; diff --git a/src/include/kompute/operations/OpBase.hpp b/src/include/kompute/operations/OpBase.hpp index 34818fcf0..f4efb2e9b 100644 --- a/src/include/kompute/operations/OpBase.hpp +++ b/src/include/kompute/operations/OpBase.hpp @@ -32,6 +32,8 @@ class OpBase * The record function is intended to only send a record command or run * commands that are expected to record operations that are to be submitted * as a batch into the GPU. + * + * @param commandBuffer The command buffer to record the command into. */ virtual void record(const vk::CommandBuffer& commandBuffer) = 0; @@ -42,6 +44,8 @@ class OpBase * there are situations where eval can be called multiple times, so the * resources that are created should be idempotent in case it's called multiple * times in a row. + * + * @param commandBuffer The command buffer to record the command into. */ virtual void preEval(const vk::CommandBuffer& commandBuffer) = 0; @@ -52,6 +56,8 @@ class OpBase * there are situations where eval can be called multiple times, so the * resources that are destroyed should not require a re-init unless explicitly * provided by the user. + * + * @param commandBuffer The command buffer to record the command into. */ virtual void postEval(const vk::CommandBuffer& commandBuffer) = 0; }; diff --git a/src/include/kompute/operations/OpMult.hpp b/src/include/kompute/operations/OpMult.hpp index 992b0e8a0..5c6dec9f0 100644 --- a/src/include/kompute/operations/OpMult.hpp +++ b/src/include/kompute/operations/OpMult.hpp @@ -26,11 +26,9 @@ class OpMult : public OpAlgoDispatch * requirements for the operations to be able to create and manage their * sub-components. * - * @param physicalDevice Vulkan physical device used to find device queues - * @param device Vulkan logical device for passing to Algorithm - * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation - * @param komputeWorkgroup Optional parameter to specify the layout for processing + * @param algorithm An algorithm that will be overridden with the OpMult + * shader data and the tensors provided which are expected to be 3 */ OpMult(std::vector> tensors, std::shared_ptr algorithm) : OpAlgoDispatch(algorithm) diff --git a/src/include/kompute/operations/OpTensorCopy.hpp b/src/include/kompute/operations/OpTensorCopy.hpp index 3d202031f..892528996 100644 --- a/src/include/kompute/operations/OpTensorCopy.hpp +++ b/src/include/kompute/operations/OpTensorCopy.hpp @@ -9,38 +9,47 @@ namespace kp { /** - Operation that copies the data from the first tensor to the rest of the tensors provided, using a record command for all the vectors. This operation does not own/manage the memory of the tensors passed to it. The operation must only receive tensors of type + * Operation that copies the data from the first tensor to the rest of the tensors + * provided, using a record command for all the vectors. This operation does not + * own/manage the memory of the tensors passed to it. The operation must only + * receive tensors of type */ class OpTensorCopy : public OpBase { public: /** - * Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. + * Default constructor with parameters that provides the core vulkan resources + * and the tensors that will be used in the operation. * - * @param physicalDevice Vulkan physical device used to find device queues - * @param device Vulkan logical device for passing to Algorithm - * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that will be used to create in operation. */ OpTensorCopy(const std::vector>& tensors); /** - * Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release. + * Default destructor. This class does not manage memory so it won't be + * expecting the parent to perform a release. */ ~OpTensorCopy() override; /** - * Records the copy commands from the first tensor into all the other tensors provided. Also optionally records a barrier. + * Records the copy commands from the first tensor into all the other + * tensors provided. Also optionally records a barrier. + * + * @param commandBuffer The command buffer to record the command into. */ void record(const vk::CommandBuffer& commandBuffer) override; /** * Does not perform any preEval commands. + * + * @param commandBuffer The command buffer to record the command into. */ virtual void preEval(const vk::CommandBuffer& commandBuffer) override; /** * Copies the local vectors for all the tensors to sync the data with the gpu. + * + * @param commandBuffer The command buffer to record the command into. */ virtual void postEval(const vk::CommandBuffer& commandBuffer) override; diff --git a/src/include/kompute/operations/OpTensorSyncDevice.hpp b/src/include/kompute/operations/OpTensorSyncDevice.hpp index cbb8ec40e..216ac74c9 100644 --- a/src/include/kompute/operations/OpTensorSyncDevice.hpp +++ b/src/include/kompute/operations/OpTensorSyncDevice.hpp @@ -8,17 +8,20 @@ namespace kp { /** - Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging. + * Operation that syncs tensor's device by mapping local data into the device memory. + * For TensorTypes::eDevice it will use a record operation for the memory to be syncd + * into GPU memory which means that the operation will be done in sync with GPU commands. + * For TensorTypes::eHost it will only map the data into host memory which will + * happen during preEval before the recorded commands are dispatched. */ class OpTensorSyncDevice : public OpBase { public: /** - * Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensos provided cannot be of type TensorTypes::eStorage. + * Default constructor with parameters that provides the core vulkan resources + * and the tensors that will be used in the operation. The tensos provided cannot + * be of type TensorTypes::eStorage. * - * @param physicalDevice Vulkan physical device used to find device queues - * @param device Vulkan logical device for passing to Algorithm - * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that will be used to create in operation. */ OpTensorSyncDevice(const std::vector>& tensors); @@ -29,17 +32,24 @@ class OpTensorSyncDevice : public OpBase ~OpTensorSyncDevice() override; /** - * For device tensors, it records the copy command for the tensor to copy the data from its staging to device memory. + * For device tensors, it records the copy command for the tensor to copy the + * data from its staging to device memory. + * + * @param commandBuffer The command buffer to record the command into. */ void record(const vk::CommandBuffer& commandBuffer) override; /** * Does not perform any preEval commands. + * + * @param commandBuffer The command buffer to record the command into. */ virtual void preEval(const vk::CommandBuffer& commandBuffer) override; /** * Does not perform any postEval commands. + * + * @param commandBuffer The command buffer to record the command into. */ virtual void postEval(const vk::CommandBuffer& commandBuffer) override; diff --git a/src/include/kompute/operations/OpTensorSyncLocal.hpp b/src/include/kompute/operations/OpTensorSyncLocal.hpp index 276f38137..fc52acc35 100644 --- a/src/include/kompute/operations/OpTensorSyncLocal.hpp +++ b/src/include/kompute/operations/OpTensorSyncLocal.hpp @@ -9,38 +9,50 @@ namespace kp { /** - Operation that syncs tensor's local memory by mapping device data into the local CPU memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging. + * Operation that syncs tensor's local memory by mapping device data into the + * local CPU memory. For TensorTypes::eDevice it will use a record operation + * for the memory to be syncd into GPU memory which means that the operation + * will be done in sync with GPU commands. For TensorTypes::eHost it will + * only map the data into host memory which will happen during preEval before + * the recorded commands are dispatched. */ class OpTensorSyncLocal : public OpBase { public: /** - * Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensors provided cannot be of type TensorTypes::eStorage. + * Default constructor with parameters that provides the core vulkan resources + * and the tensors that will be used in the operation. The tensors provided + * cannot be of type TensorTypes::eStorage. * - * @param physicalDevice Vulkan physical device used to find device queues - * @param device Vulkan logical device for passing to Algorithm - * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that will be used to create in operation. */ OpTensorSyncLocal(const std::vector>& tensors); /** - * Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release. + * Default destructor. This class does not manage memory so it won't be expecting + * the parent to perform a release. */ ~OpTensorSyncLocal() override; /** - * For device tensors, it records the copy command for the tensor to copy the data from its device to staging memory. + * For device tensors, it records the copy command for the tensor to copy the + * data from its device to staging memory. + * + * @param commandBuffer The command buffer to record the command into. */ void record(const vk::CommandBuffer& commandBuffer) override; /** * Does not perform any preEval commands. + * + * @param commandBuffer The command buffer to record the command into. */ virtual void preEval(const vk::CommandBuffer& commandBuffer) override; /** * For host tensors it performs the map command from the host memory into local memory. + * + * @param commandBuffer The command buffer to record the command into. */ virtual void postEval(const vk::CommandBuffer& commandBuffer) override;