Added functionality for multiple device creation

2020-08-29 20:57:46 +01:00 · 2020-08-29 20:57:46 +01:00 · d4cc61817e
commit d4cc61817e
parent 6c69d832d3
9 changed files with 61 additions and 41 deletions
--- a/README.md
+++ b/README.md
@ -15,13 +15,15 @@
 <td>

 <h1>Vulkan Kompute</h1>
-<h3>The General Purpose Vulkan Compute Framework. Blazing fast, lightweight, easy to set up and optimized for advanced data processing usecases.</h3>
+<h3>The General Purpose Vulkan Compute Framework. </h3>

 </td>

 </tr>
 </table>

+<h4>Blazing fast, lightweight, easy to set up and optimized for advanced data processing usecases.</h4>
+
 🔋 [Documentation](https://axsaucedo.github.io/vulkan-kompute/) 💻 [Import to your project](https://axsaucedo.github.io/vulkan-kompute/) ⌨ [Tutorials](https://axsaucedo.github.io/vulkan-kompute/) 💾


@ -38,7 +40,7 @@

 ### Setup

-Kompute is provided as a single header file `Kompute.hpp` that can be simply included in your code.
+Kompute is provided as a single header file [`Kompute.hpp`](single_include/kompute/Kompute.hpp) that can be simply included in your code.

 You can go to our [release page]() to grab the latest library or you can [build from source]().

@ -62,7 +64,7 @@ int main() {
    mgr.evalOp<kp::OpCreateTensor>(params);

    // Run Kompute operation on the parameters provided with dispatch layout
-    mgr.evalOp<kp::OpAlgoShader<10, 1, 1>>(params, "path/to/shader.comp.spv");
+    mgr.evalOp<kp::OpAlgoShader<3, 1, 1>>(params, "path/to/shader.comp.spv");

    // Print the output
    std::cout << fmt::format("Output: {}", tensorOutput.data()) << std::endl;
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@ -214,7 +214,7 @@ class Tensor
    ~Tensor();

    /**
-     * Initialiser creates the buffer and GPU memory.
+     * Initialiser which calls the initialisation for all the respective tensors as well as creates the respective staging tensors. The staging tensors woudl only be created for the tensors of type TensorType::eDevice as otherwise there is no need to copy from host memory.
     */
    void init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
              std::shared_ptr<vk::Device> device,
@ -383,6 +383,7 @@ class OpBase
        this->mDevice = device;
        this->mCommandBuffer = commandBuffer;
        this->mTensors = tensors;
+        this->mFreeTensors = freeTensors;
    }

    /**
@ -1105,7 +1106,7 @@ class OpAlgoLhsRhsOut : public OpAlgoBase<tX, tY, tZ>
    OpAlgoLhsRhsOut(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
           std::shared_ptr<vk::Device> device,
           std::shared_ptr<vk::CommandBuffer> commandBuffer,
-           std::vector<std::shared_ptr<Tensor>>& tensors);
+           std::vector<std::shared_ptr<Tensor>> tensors);

    /**
     * Default destructor, which is in charge of destroying the algorithm
@ -1166,7 +1167,7 @@ template<uint32_t tX, uint32_t tY, uint32_t tZ>
 OpAlgoLhsRhsOut<tX, tY, tZ>::OpAlgoLhsRhsOut(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
                           std::shared_ptr<vk::Device> device,
                           std::shared_ptr<vk::CommandBuffer> commandBuffer,
-                           std::vector<std::shared_ptr<Tensor>>& tensors)
+                           std::vector<std::shared_ptr<Tensor>> tensors)
  // The inheritance is initialised with the copyOutputData to false given that
  // this depencendant class handles the transfer of data via staging buffers in 
  // a granular way.
@ -1318,7 +1319,7 @@ class OpMult : public OpAlgoBase<tX, tY, tZ>
    OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
           std::shared_ptr<vk::Device> device,
           std::shared_ptr<vk::CommandBuffer> commandBuffer,
-           std::vector<std::shared_ptr<Tensor>>& tensors)
+           std::vector<std::shared_ptr<Tensor>> tensors)
      : OpAlgoBase<tX, tY, tZ>(physicalDevice, device, commandBuffer, tensors, true)
    {
        SPDLOG_DEBUG("Kompute OpMult constructor with params");
@ -1383,13 +1384,13 @@ class OpCreateTensor : public OpBase
     * @param physicalDevice Vulkan physical device used to find device queues
     * @param device Vulkan logical device for passing to Algorithm
     * @param commandBuffer Vulkan Command Buffer to record commands into
-     * @param tensors Tensors that are to be used in this operation
+     * @param tensors Tensors that will be used to create in operation.
     * @param freeTensors Whether operation manages the memory of the Tensors
     */
    OpCreateTensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
                   std::shared_ptr<vk::Device> device,
                   std::shared_ptr<vk::CommandBuffer> commandBuffer,
-                   std::vector<std::shared_ptr<Tensor>>& tensors);
+                   std::vector<std::shared_ptr<Tensor>> tensors);

    /**
     * Default destructor which in this case expects the parent class to free
@ -1418,8 +1419,7 @@ class OpCreateTensor : public OpBase

  private:
    // Never owned resources
-    std::shared_ptr<Tensor> mPrimaryTensor;
-    std::shared_ptr<Tensor> mStagingTensor;
+    std::vector<std::shared_ptr<Tensor>> mStagingTensors;
 };

 } // End namespace kp
--- a/src/OpCreateTensor.cpp
+++ b/src/OpCreateTensor.cpp
@ -14,7 +14,7 @@ OpCreateTensor::OpCreateTensor(
  std::shared_ptr<vk::PhysicalDevice> physicalDevice,
  std::shared_ptr<vk::Device> device,
  std::shared_ptr<vk::CommandBuffer> commandBuffer,
-  std::vector<std::shared_ptr<Tensor>>& tensors)
+  std::vector<std::shared_ptr<Tensor>> tensors)
  : OpBase(physicalDevice, device, commandBuffer, tensors, true)
 {
    SPDLOG_DEBUG("Kompute OpCreateTensor constructor with params");
@ -23,6 +23,13 @@ OpCreateTensor::OpCreateTensor(
 OpCreateTensor::~OpCreateTensor()
 {
    SPDLOG_DEBUG("Kompute OpCreateTensor destructor started");
+
+    SPDLOG_DEBUG("Kompute OpCreateTensor destroying staging tensors");
+    for (size_t i = 0; i < this->mStagingTensors.size(); i++) {
+        if (this->mStagingTensors[i]) {
+            this->mStagingTensors[i]->freeMemoryDestroyGPUResources();
+        }
+    }
 }

 void
@ -33,30 +40,35 @@ OpCreateTensor::init()
    if (this->mTensors.size() < 1) {
        throw std::runtime_error(
          "Kompute OpCreateTensor called with less than 1 tensor");
-    } else if (this->mTensors.size() > 1) {
-        spdlog::warn("Kompute OpCreateTensor called with more than 1 tensor");
    }

-    this->mPrimaryTensor = this->mTensors[0];
+    for (std::shared_ptr<Tensor> tensor: this->mTensors) {
+        if (tensor->isInit()) {
+            throw std::runtime_error("Kompute OpCreateTensor: Tensor has already been initialized");
+        }
+        if (tensor->tensorType() == Tensor::TensorTypes::eDevice) {
+            tensor->init(
+              this->mPhysicalDevice, this->mDevice, this->mCommandBuffer);

-    if (this->mPrimaryTensor->tensorType() == Tensor::TensorTypes::eDevice) {
-        this->mPrimaryTensor->init(
-          this->mPhysicalDevice, this->mDevice, this->mCommandBuffer);
+            std::shared_ptr<Tensor> stagingTensor = std::make_shared<Tensor>(
+              tensor->data(), Tensor::TensorTypes::eStaging);

-        this->mStagingTensor = std::make_shared<Tensor>(
-          this->mPrimaryTensor->data(), Tensor::TensorTypes::eStaging);
+            stagingTensor->init(
+              this->mPhysicalDevice, this->mDevice, this->mCommandBuffer);

-        this->mStagingTensor->init(
-          this->mPhysicalDevice, this->mDevice, this->mCommandBuffer);
+            stagingTensor->mapDataIntoHostMemory();

-        this->mStagingTensor->mapDataIntoHostMemory();
+            this->mStagingTensors.push_back(stagingTensor);

-        // Adding to the OpBase owned resource so they are freed
-        this->mTensors.push_back(this->mStagingTensor);
+        } else {

-    } else {
-        this->mPrimaryTensor->init(
-          this->mPhysicalDevice, this->mDevice, this->mCommandBuffer);
+            tensor->init(
+              this->mPhysicalDevice, this->mDevice, this->mCommandBuffer);
+
+            // We push a nullptr when no staging tensor is needed to match 
+            // index number in array to have one to one mapping with tensors
+            this->mStagingTensors.push_back(nullptr);
+        }
    }
 }

@ -65,8 +77,10 @@ OpCreateTensor::record()
 {
    SPDLOG_DEBUG("Kompute OpCreateTensor record called");

-    if (this->mPrimaryTensor->tensorType() == Tensor::TensorTypes::eDevice) {
-        this->mPrimaryTensor->recordCopyFrom(this->mStagingTensor, true);
+    for (size_t i = 0; i < this->mTensors.size(); i++) {
+        if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
+            this->mTensors[i]->recordCopyFrom(this->mStagingTensors[i], false);
+        }
    }
 }

@ -75,9 +89,13 @@ OpCreateTensor::postSubmit()
 {
    SPDLOG_DEBUG("Kompute OpCreateTensor postSubmit called");

-    this->mStagingTensor->mapDataFromHostMemory();
+    for (size_t i = 0; i < this->mTensors.size(); i++) {
+        if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
+            this->mStagingTensors[i]->mapDataFromHostMemory();

-    this->mPrimaryTensor->setData(this->mStagingTensor->data());
+            this->mTensors[i]->setData(this->mStagingTensors[i]->data());
+        }
+    }
 }

 }
--- a/src/Tensor.cpp
+++ b/src/Tensor.cpp
@ -25,7 +25,7 @@ Tensor::Tensor(std::vector<uint32_t> data, TensorTypes tensorType)

 Tensor::~Tensor()
 {
-    SPDLOG_DEBUG("Kompute Tensor destructor started");
+    SPDLOG_DEBUG("Kompute Tensor destructor started. Type: {}", this->tensorType());

    if (this->isInit()) {
        this->freeMemoryDestroyGPUResources();
--- a/src/include/kompute/Tensor.hpp
+++ b/src/include/kompute/Tensor.hpp
@ -52,7 +52,7 @@ class Tensor
    ~Tensor();

    /**
-     * Initialiser creates the buffer and GPU memory.
+     * Initialiser which calls the initialisation for all the respective tensors as well as creates the respective staging tensors. The staging tensors woudl only be created for the tensors of type TensorType::eDevice as otherwise there is no need to copy from host memory.
     */
    void init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
              std::shared_ptr<vk::Device> device,
--- a/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp
+++ b/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp
@ -42,7 +42,7 @@ class OpAlgoLhsRhsOut : public OpAlgoBase<tX, tY, tZ>
    OpAlgoLhsRhsOut(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
           std::shared_ptr<vk::Device> device,
           std::shared_ptr<vk::CommandBuffer> commandBuffer,
-           std::vector<std::shared_ptr<Tensor>>& tensors);
+           std::vector<std::shared_ptr<Tensor>> tensors);

    /**
     * Default destructor, which is in charge of destroying the algorithm
@ -103,7 +103,7 @@ template<uint32_t tX, uint32_t tY, uint32_t tZ>
 OpAlgoLhsRhsOut<tX, tY, tZ>::OpAlgoLhsRhsOut(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
                           std::shared_ptr<vk::Device> device,
                           std::shared_ptr<vk::CommandBuffer> commandBuffer,
-                           std::vector<std::shared_ptr<Tensor>>& tensors)
+                           std::vector<std::shared_ptr<Tensor>> tensors)
  // The inheritance is initialised with the copyOutputData to false given that
  // this depencendant class handles the transfer of data via staging buffers in 
  // a granular way.
--- a/src/include/kompute/operations/OpBase.hpp
+++ b/src/include/kompute/operations/OpBase.hpp
@ -45,6 +45,7 @@ class OpBase
        this->mDevice = device;
        this->mCommandBuffer = commandBuffer;
        this->mTensors = tensors;
+        this->mFreeTensors = freeTensors;
    }

    /**
--- a/src/include/kompute/operations/OpCreateTensor.hpp
+++ b/src/include/kompute/operations/OpCreateTensor.hpp
@ -25,13 +25,13 @@ class OpCreateTensor : public OpBase
     * @param physicalDevice Vulkan physical device used to find device queues
     * @param device Vulkan logical device for passing to Algorithm
     * @param commandBuffer Vulkan Command Buffer to record commands into
-     * @param tensors Tensors that are to be used in this operation
+     * @param tensors Tensors that will be used to create in operation.
     * @param freeTensors Whether operation manages the memory of the Tensors
     */
    OpCreateTensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
                   std::shared_ptr<vk::Device> device,
                   std::shared_ptr<vk::CommandBuffer> commandBuffer,
-                   std::vector<std::shared_ptr<Tensor>>& tensors);
+                   std::vector<std::shared_ptr<Tensor>> tensors);

    /**
     * Default destructor which in this case expects the parent class to free
@ -60,8 +60,7 @@ class OpCreateTensor : public OpBase

  private:
    // Never owned resources
-    std::shared_ptr<Tensor> mPrimaryTensor;
-    std::shared_ptr<Tensor> mStagingTensor;
+    std::vector<std::shared_ptr<Tensor>> mStagingTensors;
 };

 } // End namespace kp
--- a/src/include/kompute/operations/OpMult.hpp
+++ b/src/include/kompute/operations/OpMult.hpp
@ -46,7 +46,7 @@ class OpMult : public OpAlgoBase<tX, tY, tZ>
    OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
           std::shared_ptr<vk::Device> device,
           std::shared_ptr<vk::CommandBuffer> commandBuffer,
-           std::vector<std::shared_ptr<Tensor>>& tensors)
+           std::vector<std::shared_ptr<Tensor>> tensors)
      : OpAlgoBase<tX, tY, tZ>(physicalDevice, device, commandBuffer, tensors, true)
    {
        SPDLOG_DEBUG("Kompute OpMult constructor with params");