diff --git a/README.md b/README.md index e1c19f185..c8650b4e1 100644 --- a/README.md +++ b/README.md @@ -15,13 +15,15 @@

Vulkan Kompute

-

The General Purpose Vulkan Compute Framework. Blazing fast, lightweight, easy to set up and optimized for advanced data processing usecases.

+

The General Purpose Vulkan Compute Framework.

+

Blazing fast, lightweight, easy to set up and optimized for advanced data processing usecases.

+ 🔋 [Documentation](https://axsaucedo.github.io/vulkan-kompute/) 💻 [Import to your project](https://axsaucedo.github.io/vulkan-kompute/) ⌨ [Tutorials](https://axsaucedo.github.io/vulkan-kompute/) 💾 @@ -38,7 +40,7 @@ ### Setup -Kompute is provided as a single header file `Kompute.hpp` that can be simply included in your code. +Kompute is provided as a single header file [`Kompute.hpp`](single_include/kompute/Kompute.hpp) that can be simply included in your code. You can go to our [release page]() to grab the latest library or you can [build from source](). @@ -62,7 +64,7 @@ int main() { mgr.evalOp(params); // Run Kompute operation on the parameters provided with dispatch layout - mgr.evalOp>(params, "path/to/shader.comp.spv"); + mgr.evalOp>(params, "path/to/shader.comp.spv"); // Print the output std::cout << fmt::format("Output: {}", tensorOutput.data()) << std::endl; diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index 204fd3823..e9aa48848 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -214,7 +214,7 @@ class Tensor ~Tensor(); /** - * Initialiser creates the buffer and GPU memory. + * Initialiser which calls the initialisation for all the respective tensors as well as creates the respective staging tensors. The staging tensors woudl only be created for the tensors of type TensorType::eDevice as otherwise there is no need to copy from host memory. */ void init(std::shared_ptr physicalDevice, std::shared_ptr device, @@ -383,6 +383,7 @@ class OpBase this->mDevice = device; this->mCommandBuffer = commandBuffer; this->mTensors = tensors; + this->mFreeTensors = freeTensors; } /** @@ -1105,7 +1106,7 @@ class OpAlgoLhsRhsOut : public OpAlgoBase OpAlgoLhsRhsOut(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, - std::vector>& tensors); + std::vector> tensors); /** * Default destructor, which is in charge of destroying the algorithm @@ -1166,7 +1167,7 @@ template OpAlgoLhsRhsOut::OpAlgoLhsRhsOut(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, - std::vector>& tensors) + std::vector> tensors) // The inheritance is initialised with the copyOutputData to false given that // this depencendant class handles the transfer of data via staging buffers in // a granular way. @@ -1318,7 +1319,7 @@ class OpMult : public OpAlgoBase OpMult(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, - std::vector>& tensors) + std::vector> tensors) : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, true) { SPDLOG_DEBUG("Kompute OpMult constructor with params"); @@ -1383,13 +1384,13 @@ class OpCreateTensor : public OpBase * @param physicalDevice Vulkan physical device used to find device queues * @param device Vulkan logical device for passing to Algorithm * @param commandBuffer Vulkan Command Buffer to record commands into - * @param tensors Tensors that are to be used in this operation + * @param tensors Tensors that will be used to create in operation. * @param freeTensors Whether operation manages the memory of the Tensors */ OpCreateTensor(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, - std::vector>& tensors); + std::vector> tensors); /** * Default destructor which in this case expects the parent class to free @@ -1418,8 +1419,7 @@ class OpCreateTensor : public OpBase private: // Never owned resources - std::shared_ptr mPrimaryTensor; - std::shared_ptr mStagingTensor; + std::vector> mStagingTensors; }; } // End namespace kp diff --git a/src/OpCreateTensor.cpp b/src/OpCreateTensor.cpp index 266f57479..f99a81ba5 100644 --- a/src/OpCreateTensor.cpp +++ b/src/OpCreateTensor.cpp @@ -14,7 +14,7 @@ OpCreateTensor::OpCreateTensor( std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, - std::vector>& tensors) + std::vector> tensors) : OpBase(physicalDevice, device, commandBuffer, tensors, true) { SPDLOG_DEBUG("Kompute OpCreateTensor constructor with params"); @@ -23,6 +23,13 @@ OpCreateTensor::OpCreateTensor( OpCreateTensor::~OpCreateTensor() { SPDLOG_DEBUG("Kompute OpCreateTensor destructor started"); + + SPDLOG_DEBUG("Kompute OpCreateTensor destroying staging tensors"); + for (size_t i = 0; i < this->mStagingTensors.size(); i++) { + if (this->mStagingTensors[i]) { + this->mStagingTensors[i]->freeMemoryDestroyGPUResources(); + } + } } void @@ -33,30 +40,35 @@ OpCreateTensor::init() if (this->mTensors.size() < 1) { throw std::runtime_error( "Kompute OpCreateTensor called with less than 1 tensor"); - } else if (this->mTensors.size() > 1) { - spdlog::warn("Kompute OpCreateTensor called with more than 1 tensor"); } - this->mPrimaryTensor = this->mTensors[0]; + for (std::shared_ptr tensor: this->mTensors) { + if (tensor->isInit()) { + throw std::runtime_error("Kompute OpCreateTensor: Tensor has already been initialized"); + } + if (tensor->tensorType() == Tensor::TensorTypes::eDevice) { + tensor->init( + this->mPhysicalDevice, this->mDevice, this->mCommandBuffer); - if (this->mPrimaryTensor->tensorType() == Tensor::TensorTypes::eDevice) { - this->mPrimaryTensor->init( - this->mPhysicalDevice, this->mDevice, this->mCommandBuffer); + std::shared_ptr stagingTensor = std::make_shared( + tensor->data(), Tensor::TensorTypes::eStaging); - this->mStagingTensor = std::make_shared( - this->mPrimaryTensor->data(), Tensor::TensorTypes::eStaging); + stagingTensor->init( + this->mPhysicalDevice, this->mDevice, this->mCommandBuffer); - this->mStagingTensor->init( - this->mPhysicalDevice, this->mDevice, this->mCommandBuffer); + stagingTensor->mapDataIntoHostMemory(); - this->mStagingTensor->mapDataIntoHostMemory(); + this->mStagingTensors.push_back(stagingTensor); - // Adding to the OpBase owned resource so they are freed - this->mTensors.push_back(this->mStagingTensor); + } else { - } else { - this->mPrimaryTensor->init( - this->mPhysicalDevice, this->mDevice, this->mCommandBuffer); + tensor->init( + this->mPhysicalDevice, this->mDevice, this->mCommandBuffer); + + // We push a nullptr when no staging tensor is needed to match + // index number in array to have one to one mapping with tensors + this->mStagingTensors.push_back(nullptr); + } } } @@ -65,8 +77,10 @@ OpCreateTensor::record() { SPDLOG_DEBUG("Kompute OpCreateTensor record called"); - if (this->mPrimaryTensor->tensorType() == Tensor::TensorTypes::eDevice) { - this->mPrimaryTensor->recordCopyFrom(this->mStagingTensor, true); + for (size_t i = 0; i < this->mTensors.size(); i++) { + if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) { + this->mTensors[i]->recordCopyFrom(this->mStagingTensors[i], false); + } } } @@ -75,9 +89,13 @@ OpCreateTensor::postSubmit() { SPDLOG_DEBUG("Kompute OpCreateTensor postSubmit called"); - this->mStagingTensor->mapDataFromHostMemory(); + for (size_t i = 0; i < this->mTensors.size(); i++) { + if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) { + this->mStagingTensors[i]->mapDataFromHostMemory(); - this->mPrimaryTensor->setData(this->mStagingTensor->data()); + this->mTensors[i]->setData(this->mStagingTensors[i]->data()); + } + } } } diff --git a/src/Tensor.cpp b/src/Tensor.cpp index 882f11630..36871ea7a 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -25,7 +25,7 @@ Tensor::Tensor(std::vector data, TensorTypes tensorType) Tensor::~Tensor() { - SPDLOG_DEBUG("Kompute Tensor destructor started"); + SPDLOG_DEBUG("Kompute Tensor destructor started. Type: {}", this->tensorType()); if (this->isInit()) { this->freeMemoryDestroyGPUResources(); diff --git a/src/include/kompute/Tensor.hpp b/src/include/kompute/Tensor.hpp index e5cdd6932..d5649f17b 100644 --- a/src/include/kompute/Tensor.hpp +++ b/src/include/kompute/Tensor.hpp @@ -52,7 +52,7 @@ class Tensor ~Tensor(); /** - * Initialiser creates the buffer and GPU memory. + * Initialiser which calls the initialisation for all the respective tensors as well as creates the respective staging tensors. The staging tensors woudl only be created for the tensors of type TensorType::eDevice as otherwise there is no need to copy from host memory. */ void init(std::shared_ptr physicalDevice, std::shared_ptr device, diff --git a/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp b/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp index dca11eb0c..2480ea6e4 100644 --- a/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp +++ b/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp @@ -42,7 +42,7 @@ class OpAlgoLhsRhsOut : public OpAlgoBase OpAlgoLhsRhsOut(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, - std::vector>& tensors); + std::vector> tensors); /** * Default destructor, which is in charge of destroying the algorithm @@ -103,7 +103,7 @@ template OpAlgoLhsRhsOut::OpAlgoLhsRhsOut(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, - std::vector>& tensors) + std::vector> tensors) // The inheritance is initialised with the copyOutputData to false given that // this depencendant class handles the transfer of data via staging buffers in // a granular way. diff --git a/src/include/kompute/operations/OpBase.hpp b/src/include/kompute/operations/OpBase.hpp index 41d8f50f3..c8c1af432 100644 --- a/src/include/kompute/operations/OpBase.hpp +++ b/src/include/kompute/operations/OpBase.hpp @@ -45,6 +45,7 @@ class OpBase this->mDevice = device; this->mCommandBuffer = commandBuffer; this->mTensors = tensors; + this->mFreeTensors = freeTensors; } /** diff --git a/src/include/kompute/operations/OpCreateTensor.hpp b/src/include/kompute/operations/OpCreateTensor.hpp index e7f7320af..f08bef14c 100644 --- a/src/include/kompute/operations/OpCreateTensor.hpp +++ b/src/include/kompute/operations/OpCreateTensor.hpp @@ -25,13 +25,13 @@ class OpCreateTensor : public OpBase * @param physicalDevice Vulkan physical device used to find device queues * @param device Vulkan logical device for passing to Algorithm * @param commandBuffer Vulkan Command Buffer to record commands into - * @param tensors Tensors that are to be used in this operation + * @param tensors Tensors that will be used to create in operation. * @param freeTensors Whether operation manages the memory of the Tensors */ OpCreateTensor(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, - std::vector>& tensors); + std::vector> tensors); /** * Default destructor which in this case expects the parent class to free @@ -60,8 +60,7 @@ class OpCreateTensor : public OpBase private: // Never owned resources - std::shared_ptr mPrimaryTensor; - std::shared_ptr mStagingTensor; + std::vector> mStagingTensors; }; } // End namespace kp diff --git a/src/include/kompute/operations/OpMult.hpp b/src/include/kompute/operations/OpMult.hpp index f2b62da91..45a63f54c 100644 --- a/src/include/kompute/operations/OpMult.hpp +++ b/src/include/kompute/operations/OpMult.hpp @@ -46,7 +46,7 @@ class OpMult : public OpAlgoBase OpMult(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, - std::vector>& tensors) + std::vector> tensors) : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, true) { SPDLOG_DEBUG("Kompute OpMult constructor with params");