From d9789b63d2c2b9cc4d574cba41bbb01c94f9e09e Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Wed, 10 Feb 2021 08:41:31 +0000
Subject: [PATCH 1/7] Added initial destroy and updated rebuild functions

---
 src/Manager.cpp                 |  23 ++++++
 src/include/kompute/Manager.hpp | 137 +++++++++++++++++++++++++++-----
 2 files changed, 141 insertions(+), 19 deletions(-)
diff --git a/src/Manager.cpp b/src/Manager.cpp
index e7bb88f2b..00e2a6e67 100644
--- a/src/Manager.cpp
+++ b/src/Manager.cpp
@@ -328,4 +328,27 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices)
     SPDLOG_DEBUG("Kompute Manager compute queue obtained");
 }
 
+std::shared_ptr<Tensor>
+Manager::tensor(
+  const std::vector<float>& data,
+  Tensor::TensorTypes tensorType,
+  bool syncDataToGPU)
+{
+    SPDLOG_DEBUG("Kompute Manager tensor triggered");
+
+    SPDLOG_DEBUG("Kompute Manager creating new tensor shared ptr");
+    std::shared_ptr<Tensor> tensor =
+      std::make_shared<Tensor>(kp::Tensor(data, tensorType));
+
+    tensor->init(this->mPhysicalDevice, this->mDevice);
+
+    if (syncDataToGPU) {
+        this->evalOpDefault<OpTensorSyncDevice>({ tensor });
+    }
+    this->mManagedTensors.insert(tensor);
+
+    return tensor;
+}
+
+
 }
diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp
index 5ef32ff65..3019569c5 100644
--- a/src/include/kompute/Manager.hpp
+++ b/src/include/kompute/Manager.hpp
@@ -226,23 +226,7 @@ class Manager
     std::shared_ptr<Tensor> tensor(
       const std::vector<float>& data,
       Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice,
-      bool syncDataToGPU = true)
-    {
-        SPDLOG_DEBUG("Kompute Manager tensor triggered");
-
-        SPDLOG_DEBUG("Kompute Manager creating new tensor shared ptr");
-        std::shared_ptr<Tensor> tensor =
-          std::make_shared<Tensor>(kp::Tensor(data, tensorType));
-
-        tensor->init(this->mPhysicalDevice, this->mDevice);
-
-        if (syncDataToGPU) {
-            this->evalOpDefault<OpTensorSyncDevice>({ tensor });
-        }
-        this->mManagedTensors.insert(tensor);
-
-        return tensor;
-    }
+      bool syncDataToGPU = true);
 
     /**
      * Function that simplifies the common workflow of tensor initialisation. It
@@ -252,7 +236,6 @@ class Manager
      *
      * @param tensors Array of tensors to rebuild
      * @param syncDataToGPU Whether to sync the data to GPU memory
-     * @returns Initialized Tensor with memory Syncd to GPU device
      */
     void rebuild(std::vector<std::shared_ptr<kp::Tensor>> tensors,
                         bool syncDataToGPU = true)
@@ -277,7 +260,6 @@ class Manager
      *
      * @param tensors Single tensor to rebuild
      * @param syncDataToGPU Whether to sync the data to GPU memory
-     * @returns Initialized Tensor with memory Syncd to GPU device
      */
     void rebuild(std::shared_ptr<kp::Tensor> tensor,
                         bool syncDataToGPU = true)
@@ -301,6 +283,123 @@ class Manager
         }
     }
 
+    /**
+     * Destroy owned Vulkan GPU resources and free GPU memory for
+     * single tensor.
+     *
+     * @param tensors Single tensor to rebuild
+     */
+    void destroy(std::shared_ptr<kp::Tensor> tensor)
+    {
+        SPDLOG_DEBUG("Kompute Manager rebuild Tensor triggered");
+
+        if (tensor->isInit()) {
+            tensor->freeMemoryDestroyGPUResources();
+        }
+
+        // TODO: Confirm not limiting destroying tensors owned by this manager allowed
+        std::set<std::shared_ptr<Tensor>>::iterator it =
+          this->mManagedTensors.find(tensor);
+
+        if (it != this->mManagedTensors.end()) {
+            this->mManagedTensors.erase(tensor);
+        }
+    }
+
+    /**
+     * Destroy owned Vulkan GPU resources and free GPU memory for
+     * vector of tensors.
+     *
+     * @param tensors Single tensor to rebuild
+     */
+    void destroy(std::vector<std::shared_ptr<kp::Tensor>> tensors)
+    {
+        SPDLOG_DEBUG("Kompute Manager rebuild Tensor triggered");
+
+        for (std::shared_ptr<Tensor> tensor : tensors) {
+            this->destroy(tensor);
+        }
+    }
+
+    /**
+     * Destroy owned Vulkan GPU resources and free GPU memory for
+     * vector of sequences. Destroying by sequence name is more efficent
+     * and hence recommended instead of by object.
+     *
+     * @param sequences Vector for shared ptrs with sequences to destroy
+     */
+    void destroy(std::vector<std::shared_ptr<kp::Sequence>> sequences)
+    {
+        SPDLOG_DEBUG("Kompute Manager rebuild Sequence triggered");
+
+        for (std::shared_ptr<kp::Sequence> sequence : sequences) {
+            this->destroy(sequence);
+        }
+    }
+
+    /**
+     * Destroy owned Vulkan GPU resources and free GPU memory for
+     * single sequence. Destroying by sequence name is more efficent
+     * and hence recommended instead of by object.
+     *
+     * @param sequences Single sequence to rebuild
+     */
+    void destroy(std::shared_ptr<kp::Sequence> sequence)
+    {
+        SPDLOG_DEBUG("Kompute Manager rebuild Sequence triggered");
+
+        // Inefficient but required to delete by value
+        // Depending on the amount of named sequences created may be worth creating
+        // a set to ensure efficient delete.
+        for (std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator it = this->mManagedSequences.begin(); it != this->mManagedSequences.end(); it++) {
+            if (it->second == sequence) {
+                this->mManagedSequences.erase(it);
+                break;
+            }
+        }
+
+        if (sequence->isInit()) {
+            sequence->freeMemoryDestroyGPUResources();
+        }
+    }
+
+    /**
+     * Destroy owned Vulkan GPU resources and free GPU memory for
+     * sequence by name.
+     *
+     * @param sequenceName Single name of named sequence to destroy
+     */
+    void destroy(const std::string& sequenceName)
+    {
+        SPDLOG_DEBUG("Kompute Manager rebuild Sequence triggered");
+
+        std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator
+          found = this->mManagedSequences.find(sequenceName);
+
+        if (found != this->mManagedSequences.end()) {
+            // We don't call destroy(sequence) as erasing sequence by name more efficient
+            if (found->second->isInit()) {
+                found->second->freeMemoryDestroyGPUResources();
+            }
+            this->mManagedSequences.erase(sequenceName);
+        }
+    }
+
+    /**
+     * Destroy owned Vulkan GPU resources and free GPU memory for
+     * sequences using vector of named sequence names.
+     *
+     * @param sequenceName Vector of sequence names to destroy
+     */
+    void destroy(const std::vector<std::string>& sequenceNames)
+    {
+        SPDLOG_DEBUG("Kompute Manager rebuild Sequence triggered");
+
+        for (const std::string& sequenceName : sequenceNames) {
+            this->destroy(sequenceName);
+        }
+    }
+
   private:
     // -------------- OPTIONALLY OWNED RESOURCES
     std::shared_ptr<vk::Instance> mInstance = nullptr;

From 605a9d1d4a602804398a6fd2cb3fe34d6d322cd9 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Wed, 10 Feb 2021 08:41:38 +0000
Subject: [PATCH 2/7] Added destroy functions

---
 test/TestDestroy.cpp | 370 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 370 insertions(+)
 create mode 100644 test/TestDestroy.cpp

diff --git a/test/TestDestroy.cpp b/test/TestDestroy.cpp
new file mode 100644
index 000000000..a7b46a078
--- /dev/null
+++ b/test/TestDestroy.cpp
@@ -0,0 +1,370 @@
+
+#include "gtest/gtest.h"
+
+#include "kompute/Kompute.hpp"
+
+TEST(TestDestroy, TestDestroyTensorSingle)
+{
+    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
+
+    std::string shader(R"(
+      #version 450
+      layout (local_size_x = 1) in;
+      layout(set = 0, binding = 0) buffer a { float pa[]; };
+      void main() {
+          uint index = gl_GlobalInvocationID.x;
+          pa[index] = pa[index] + 1;
+      })");
+
+    {
+        std::shared_ptr<kp::Sequence> sq = nullptr;
+
+        {
+            kp::Manager mgr;
+
+            mgr.rebuild({ tensorA });
+
+            sq = mgr.sequence();
+
+            sq->begin();
+            sq->record<kp::OpAlgoBase>(
+              { tensorA }, std::vector<char>(shader.begin(), shader.end()));
+            sq->end();
+
+            sq->eval();
+
+            mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
+
+            mgr.destroy(tensorA);
+
+            EXPECT_FALSE(tensorA->isInit());
+        }
+    }
+    EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
+}
+
+TEST(TestDestroy, TestDestroyTensorVector)
+{
+    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 1, 1, 1 }) };
+    std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 1, 1, 1 }) };
+
+    std::string shader(R"(
+      #version 450
+      layout (local_size_x = 1) in;
+      layout(set = 0, binding = 0) buffer a { float pa[]; };
+      layout(set = 0, binding = 1) buffer b { float pb[]; };
+      void main() {
+          uint index = gl_GlobalInvocationID.x;
+          pa[index] = pa[index] + 1;
+          pb[index] = pb[index] + 2;
+      })");
+
+    {
+        std::shared_ptr<kp::Sequence> sq = nullptr;
+
+        {
+            kp::Manager mgr;
+
+            mgr.rebuild({ tensorA, tensorB });
+
+            sq = mgr.sequence();
+
+            sq->begin();
+            sq->record<kp::OpAlgoBase>(
+              { tensorA, tensorB }, std::vector<char>(shader.begin(), shader.end()));
+            sq->end();
+
+            sq->eval();
+
+            mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA, tensorB });
+
+            mgr.destroy({ tensorA, tensorB });
+
+            EXPECT_FALSE(tensorA->isInit());
+            EXPECT_FALSE(tensorB->isInit());
+        }
+    }
+    EXPECT_EQ(tensorA->data(), std::vector<float>({ 2, 2, 2 }));
+    EXPECT_EQ(tensorB->data(), std::vector<float>({ 3, 3, 3 }));
+}
+
+TEST(TestDestroy, TestDestroyTensorVectorUninitialised)
+{
+    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 1, 1, 1 }) };
+    std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 1, 1, 1 }) };
+
+    {
+        std::shared_ptr<kp::Sequence> sq = nullptr;
+
+        {
+            kp::Manager mgr;
+
+            mgr.rebuild({ tensorA, tensorB });
+
+            mgr.destroy({ tensorA, tensorB });
+
+            EXPECT_FALSE(tensorA->isInit());
+            EXPECT_FALSE(tensorB->isInit());
+        }
+    }
+    EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
+    EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
+}
+
+TEST(TestDestroy, TestDestroySequenceSingle)
+{
+    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
+
+    std::string shader(R"(
+      #version 450
+      layout (local_size_x = 1) in;
+      layout(set = 0, binding = 0) buffer a { float pa[]; };
+      void main() {
+          uint index = gl_GlobalInvocationID.x;
+          pa[index] = pa[index] + 1;
+      })");
+
+    {
+        std::shared_ptr<kp::Sequence> sq = nullptr;
+
+        {
+            kp::Manager mgr;
+
+            mgr.rebuild({ tensorA });
+
+            sq = mgr.sequence();
+
+            sq->begin();
+            sq->record<kp::OpAlgoBase>(
+              { tensorA }, std::vector<char>(shader.begin(), shader.end()));
+            sq->end();
+
+            sq->eval();
+
+            mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
+
+            mgr.destroy(sq);
+
+            EXPECT_FALSE(sq->isInit());
+        }
+    }
+    EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
+}
+
+TEST(TestDestroy, TestDestroySequenceVector)
+{
+    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
+
+    std::string shader(R"(
+      #version 450
+      layout (local_size_x = 1) in;
+      layout(set = 0, binding = 0) buffer a { float pa[]; };
+      void main() {
+          uint index = gl_GlobalInvocationID.x;
+          pa[index] = pa[index] + 1;
+      })");
+
+    {
+        std::shared_ptr<kp::Sequence> sq1 = nullptr;
+        std::shared_ptr<kp::Sequence> sq2 = nullptr;
+
+        {
+            kp::Manager mgr;
+
+            mgr.rebuild({ tensorA });
+
+            sq1 = mgr.sequence("One");
+            sq1->begin();
+            sq1->record<kp::OpAlgoBase>(
+              { tensorA }, std::vector<char>(shader.begin(), shader.end()));
+            sq1->end();
+            sq1->eval();
+
+            sq2 = mgr.sequence("Two");
+            sq2->begin();
+            sq2->record<kp::OpAlgoBase>(
+              { tensorA }, std::vector<char>(shader.begin(), shader.end()));
+            sq2->end();
+            sq2->eval();
+
+            mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
+
+            mgr.destroy({ sq1, sq2 });
+
+            EXPECT_FALSE(sq1->isInit());
+            EXPECT_FALSE(sq2->isInit());
+        }
+    }
+    EXPECT_EQ(tensorA->data(), std::vector<float>({ 2, 2, 2 }));
+}
+
+TEST(TestDestroy, TestDestroySequenceNameSingleInsideManager)
+{
+    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
+
+    std::string shader(R"(
+      #version 450
+      layout (local_size_x = 1) in;
+      layout(set = 0, binding = 0) buffer a { float pa[]; };
+      void main() {
+          uint index = gl_GlobalInvocationID.x;
+          pa[index] = pa[index] + 1;
+      })");
+
+    {
+        kp::Manager mgr;
+        {
+            mgr.rebuild({ tensorA });
+
+            mgr.evalOp<kp::OpAlgoBase>(
+              { tensorA }, "one",
+              std::vector<char>(shader.begin(), shader.end()));
+
+            mgr.evalOp<kp::OpAlgoBase>(
+              { tensorA }, "two",
+              std::vector<char>(shader.begin(), shader.end()));
+
+            mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
+
+            mgr.destroy("one");
+            mgr.destroy("two");
+        }
+    }
+    EXPECT_EQ(tensorA->data(), std::vector<float>({ 2, 2, 2 }));
+}
+
+TEST(TestDestroy, TestDestroySequenceNameSingleOutsideManager)
+{
+    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
+
+    std::string shader(R"(
+      #version 450
+      layout (local_size_x = 1) in;
+      layout(set = 0, binding = 0) buffer a { float pa[]; };
+      void main() {
+          uint index = gl_GlobalInvocationID.x;
+          pa[index] = pa[index] + 1;
+      })");
+
+    {
+        std::shared_ptr<kp::Sequence> sq1 = nullptr;
+
+        {
+            kp::Manager mgr;
+
+            mgr.rebuild({ tensorA });
+
+            sq1 = mgr.sequence("One");
+            sq1->begin();
+            sq1->record<kp::OpAlgoBase>(
+              { tensorA }, std::vector<char>(shader.begin(), shader.end()));
+            sq1->end();
+            sq1->eval();
+
+            mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
+
+            mgr.destroy("One");
+
+            EXPECT_FALSE(sq1->isInit());
+        }
+    }
+    EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
+}
+
+TEST(TestDestroy, TestDestroySequenceNameVectorInsideManager)
+{
+    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
+
+    std::string shader(R"(
+      #version 450
+      layout (local_size_x = 1) in;
+      layout(set = 0, binding = 0) buffer a { float pa[]; };
+      void main() {
+          uint index = gl_GlobalInvocationID.x;
+          pa[index] = pa[index] + 1;
+      })");
+
+    {
+        kp::Manager mgr;
+        {
+            mgr.rebuild({ tensorA });
+
+            mgr.evalOp<kp::OpAlgoBase>(
+              { tensorA }, "one",
+              std::vector<char>(shader.begin(), shader.end()));
+
+            mgr.evalOp<kp::OpAlgoBase>(
+              { tensorA }, "two",
+              std::vector<char>(shader.begin(), shader.end()));
+
+            mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
+
+            mgr.destroy(std::vector<std::string>({"one", "two"}));
+        }
+    }
+    EXPECT_EQ(tensorA->data(), std::vector<float>({ 2, 2, 2 }));
+}
+
+TEST(TestDestroy, TestDestroySequenceNameVectorOutsideManager)
+{
+    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
+
+    std::string shader(R"(
+      #version 450
+      layout (local_size_x = 1) in;
+      layout(set = 0, binding = 0) buffer a { float pa[]; };
+      void main() {
+          uint index = gl_GlobalInvocationID.x;
+          pa[index] = pa[index] + 1;
+      })");
+
+    {
+        kp::Manager mgr;
+        {
+            mgr.rebuild({ tensorA });
+
+            mgr.evalOp<kp::OpAlgoBase>(
+              { tensorA }, "one",
+              std::vector<char>(shader.begin(), shader.end()));
+
+            mgr.evalOp<kp::OpAlgoBase>(
+              { tensorA }, "two",
+              std::vector<char>(shader.begin(), shader.end()));
+
+            mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
+
+            mgr.destroy(std::vector<std::string>({"one", "two"}));
+        }
+    }
+    EXPECT_EQ(tensorA->data(), std::vector<float>({ 2, 2, 2 }));
+}
+
+TEST(TestDestroy, TestDestroySequenceNameDefaultOutsideManager)
+{
+    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
+
+    std::string shader(R"(
+      #version 450
+      layout (local_size_x = 1) in;
+      layout(set = 0, binding = 0) buffer a { float pa[]; };
+      void main() {
+          uint index = gl_GlobalInvocationID.x;
+          pa[index] = pa[index] + 1;
+      })");
+
+    {
+        kp::Manager mgr;
+        {
+            mgr.rebuild({ tensorA });
+
+            mgr.evalOpDefault<kp::OpAlgoBase>(
+              { tensorA },
+              std::vector<char>(shader.begin(), shader.end()));
+
+            mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
+
+            mgr.destroy(KP_DEFAULT_SESSION);
+        }
+    }
+    EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
+}

From c83ba445495567440962e0c01cfc82411939917c Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Wed, 10 Feb 2021 08:44:25 +0000
Subject: [PATCH 3/7] Moved destroy and rebuild functions to cpp

---
 src/Manager.cpp                 | 124 ++++++++++++++++++++++++++++++++
 src/include/kompute/Manager.hpp | 114 +++--------------------------
 2 files changed, 132 insertions(+), 106 deletions(-)

diff --git a/src/Manager.cpp b/src/Manager.cpp
index 00e2a6e67..c66a4030b 100644
--- a/src/Manager.cpp
+++ b/src/Manager.cpp
@@ -350,5 +350,129 @@ Manager::tensor(
     return tensor;
 }
 
+void
+Manager::rebuild(std::vector<std::shared_ptr<kp::Tensor>> tensors,
+                    bool syncDataToGPU)
+{
+    SPDLOG_DEBUG("Kompute Manager rebuild triggered");
+    for (std::shared_ptr<Tensor> tensor : tensors) {
+
+        // False syncData to run all tensors at once instead one by one
+        this->rebuild(tensor, false);
+    }
+
+    if (syncDataToGPU) {
+        this->evalOpDefault<OpTensorSyncDevice>(tensors);
+    }
+}
+
+void
+Manager::rebuild(std::shared_ptr<kp::Tensor> tensor,
+                    bool syncDataToGPU)
+{
+    SPDLOG_DEBUG("Kompute Manager rebuild Tensor triggered");
+
+    if (tensor->isInit()) {
+        tensor->freeMemoryDestroyGPUResources();
+    }
+
+    tensor->init(this->mPhysicalDevice, this->mDevice);
+
+    std::set<std::shared_ptr<Tensor>>::iterator it =
+      this->mManagedTensors.find(tensor);
+    if (it == this->mManagedTensors.end()) {
+        this->mManagedTensors.insert(tensor);
+    }
+
+    if (syncDataToGPU) {
+        this->evalOpDefault<OpTensorSyncDevice>({ tensor });
+    }
+}
+
+void
+Manager::destroy(std::shared_ptr<kp::Tensor> tensor)
+{
+    SPDLOG_DEBUG("Kompute Manager rebuild Tensor triggered");
+
+    if (tensor->isInit()) {
+        tensor->freeMemoryDestroyGPUResources();
+    }
+
+    // TODO: Confirm not limiting destroying tensors owned by this manager allowed
+    std::set<std::shared_ptr<Tensor>>::iterator it =
+      this->mManagedTensors.find(tensor);
+
+    if (it != this->mManagedTensors.end()) {
+        this->mManagedTensors.erase(tensor);
+    }
+}
+
+void
+Manager::destroy(std::vector<std::shared_ptr<kp::Tensor>> tensors)
+{
+    SPDLOG_DEBUG("Kompute Manager rebuild Tensor triggered");
+
+    for (std::shared_ptr<Tensor> tensor : tensors) {
+        this->destroy(tensor);
+    }
+}
+
+void
+Manager::destroy(std::vector<std::shared_ptr<kp::Sequence>> sequences)
+{
+    SPDLOG_DEBUG("Kompute Manager rebuild Sequence triggered");
+
+    for (std::shared_ptr<kp::Sequence> sequence : sequences) {
+        this->destroy(sequence);
+    }
+}
+
+void
+Manager::destroy(std::shared_ptr<kp::Sequence> sequence)
+{
+    SPDLOG_DEBUG("Kompute Manager rebuild Sequence triggered");
+
+    // Inefficient but required to delete by value
+    // Depending on the amount of named sequences created may be worth creating
+    // a set to ensure efficient delete.
+    for (std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator it = this->mManagedSequences.begin(); it != this->mManagedSequences.end(); it++) {
+        if (it->second == sequence) {
+            this->mManagedSequences.erase(it);
+            break;
+        }
+    }
+
+    if (sequence->isInit()) {
+        sequence->freeMemoryDestroyGPUResources();
+    }
+}
+
+void
+Manager::destroy(const std::string& sequenceName)
+{
+    SPDLOG_DEBUG("Kompute Manager rebuild Sequence triggered");
+
+    std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator
+      found = this->mManagedSequences.find(sequenceName);
+
+    if (found != this->mManagedSequences.end()) {
+        // We don't call destroy(sequence) as erasing sequence by name more efficient
+        if (found->second->isInit()) {
+            found->second->freeMemoryDestroyGPUResources();
+        }
+        this->mManagedSequences.erase(sequenceName);
+    }
+}
+
+void
+Manager::destroy(const std::vector<std::string>& sequenceNames)
+{
+    SPDLOG_DEBUG("Kompute Manager rebuild Sequence triggered");
+
+    for (const std::string& sequenceName : sequenceNames) {
+        this->destroy(sequenceName);
+    }
+}
+
 
 }
diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp
index 3019569c5..f13a45523 100644
--- a/src/include/kompute/Manager.hpp
+++ b/src/include/kompute/Manager.hpp
@@ -238,19 +238,7 @@ class Manager
      * @param syncDataToGPU Whether to sync the data to GPU memory
      */
     void rebuild(std::vector<std::shared_ptr<kp::Tensor>> tensors,
-                        bool syncDataToGPU = true)
-    {
-        SPDLOG_DEBUG("Kompute Manager rebuild triggered");
-        for (std::shared_ptr<Tensor> tensor : tensors) {
-
-            // False syncData to run all tensors at once instead one by one
-            this->rebuild(tensor, false);
-        }
-
-        if (syncDataToGPU) {
-            this->evalOpDefault<OpTensorSyncDevice>(tensors);
-        }
-    }
+                        bool syncDataToGPU = true);
 
     /**
      * Function that simplifies the common workflow of tensor initialisation. It
@@ -262,26 +250,7 @@ class Manager
      * @param syncDataToGPU Whether to sync the data to GPU memory
      */
     void rebuild(std::shared_ptr<kp::Tensor> tensor,
-                        bool syncDataToGPU = true)
-    {
-        SPDLOG_DEBUG("Kompute Manager rebuild Tensor triggered");
-
-        if (tensor->isInit()) {
-            tensor->freeMemoryDestroyGPUResources();
-        }
-
-        tensor->init(this->mPhysicalDevice, this->mDevice);
-
-        std::set<std::shared_ptr<Tensor>>::iterator it =
-          this->mManagedTensors.find(tensor);
-        if (it == this->mManagedTensors.end()) {
-            this->mManagedTensors.insert(tensor);
-        }
-
-        if (syncDataToGPU) {
-            this->evalOpDefault<OpTensorSyncDevice>({ tensor });
-        }
-    }
+                        bool syncDataToGPU = true);
 
     /**
      * Destroy owned Vulkan GPU resources and free GPU memory for
@@ -289,22 +258,7 @@ class Manager
      *
      * @param tensors Single tensor to rebuild
      */
-    void destroy(std::shared_ptr<kp::Tensor> tensor)
-    {
-        SPDLOG_DEBUG("Kompute Manager rebuild Tensor triggered");
-
-        if (tensor->isInit()) {
-            tensor->freeMemoryDestroyGPUResources();
-        }
-
-        // TODO: Confirm not limiting destroying tensors owned by this manager allowed
-        std::set<std::shared_ptr<Tensor>>::iterator it =
-          this->mManagedTensors.find(tensor);
-
-        if (it != this->mManagedTensors.end()) {
-            this->mManagedTensors.erase(tensor);
-        }
-    }
+    void destroy(std::shared_ptr<kp::Tensor> tensor);
 
     /**
      * Destroy owned Vulkan GPU resources and free GPU memory for
@@ -312,14 +266,7 @@ class Manager
      *
      * @param tensors Single tensor to rebuild
      */
-    void destroy(std::vector<std::shared_ptr<kp::Tensor>> tensors)
-    {
-        SPDLOG_DEBUG("Kompute Manager rebuild Tensor triggered");
-
-        for (std::shared_ptr<Tensor> tensor : tensors) {
-            this->destroy(tensor);
-        }
-    }
+    void destroy(std::vector<std::shared_ptr<kp::Tensor>> tensors);
 
     /**
      * Destroy owned Vulkan GPU resources and free GPU memory for
@@ -328,14 +275,7 @@ class Manager
      *
      * @param sequences Vector for shared ptrs with sequences to destroy
      */
-    void destroy(std::vector<std::shared_ptr<kp::Sequence>> sequences)
-    {
-        SPDLOG_DEBUG("Kompute Manager rebuild Sequence triggered");
-
-        for (std::shared_ptr<kp::Sequence> sequence : sequences) {
-            this->destroy(sequence);
-        }
-    }
+    void destroy(std::vector<std::shared_ptr<kp::Sequence>> sequences);
 
     /**
      * Destroy owned Vulkan GPU resources and free GPU memory for
@@ -344,24 +284,7 @@ class Manager
      *
      * @param sequences Single sequence to rebuild
      */
-    void destroy(std::shared_ptr<kp::Sequence> sequence)
-    {
-        SPDLOG_DEBUG("Kompute Manager rebuild Sequence triggered");
-
-        // Inefficient but required to delete by value
-        // Depending on the amount of named sequences created may be worth creating
-        // a set to ensure efficient delete.
-        for (std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator it = this->mManagedSequences.begin(); it != this->mManagedSequences.end(); it++) {
-            if (it->second == sequence) {
-                this->mManagedSequences.erase(it);
-                break;
-            }
-        }
-
-        if (sequence->isInit()) {
-            sequence->freeMemoryDestroyGPUResources();
-        }
-    }
+    void destroy(std::shared_ptr<kp::Sequence> sequence);
 
     /**
      * Destroy owned Vulkan GPU resources and free GPU memory for
@@ -369,21 +292,7 @@ class Manager
      *
      * @param sequenceName Single name of named sequence to destroy
      */
-    void destroy(const std::string& sequenceName)
-    {
-        SPDLOG_DEBUG("Kompute Manager rebuild Sequence triggered");
-
-        std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator
-          found = this->mManagedSequences.find(sequenceName);
-
-        if (found != this->mManagedSequences.end()) {
-            // We don't call destroy(sequence) as erasing sequence by name more efficient
-            if (found->second->isInit()) {
-                found->second->freeMemoryDestroyGPUResources();
-            }
-            this->mManagedSequences.erase(sequenceName);
-        }
-    }
+    void destroy(const std::string& sequenceName);
 
     /**
      * Destroy owned Vulkan GPU resources and free GPU memory for
@@ -391,14 +300,7 @@ class Manager
      *
      * @param sequenceName Vector of sequence names to destroy
      */
-    void destroy(const std::vector<std::string>& sequenceNames)
-    {
-        SPDLOG_DEBUG("Kompute Manager rebuild Sequence triggered");
-
-        for (const std::string& sequenceName : sequenceNames) {
-            this->destroy(sequenceName);
-        }
-    }
+    void destroy(const std::vector<std::string>& sequenceNames);
 
   private:
     // -------------- OPTIONALLY OWNED RESOURCES

From b386113849adc9538c9b9c3917a9a8f30d009c41 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Wed, 10 Feb 2021 08:44:33 +0000
Subject: [PATCH 4/7] Updated single include

---
 single_include/kompute/Kompute.hpp | 97 +++++++++++++++---------------
 1 file changed, 49 insertions(+), 48 deletions(-)

diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp
index 772397a26..21ebae5b8 100755
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@@ -1513,23 +1513,7 @@ class Manager
     std::shared_ptr<Tensor> tensor(
       const std::vector<float>& data,
       Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice,
-      bool syncDataToGPU = true)
-    {
-        SPDLOG_DEBUG("Kompute Manager tensor triggered");
-
-        SPDLOG_DEBUG("Kompute Manager creating new tensor shared ptr");
-        std::shared_ptr<Tensor> tensor =
-          std::make_shared<Tensor>(kp::Tensor(data, tensorType));
-
-        tensor->init(this->mPhysicalDevice, this->mDevice);
-
-        if (syncDataToGPU) {
-            this->evalOpDefault<OpTensorSyncDevice>({ tensor });
-        }
-        this->mManagedTensors.insert(tensor);
-
-        return tensor;
-    }
+      bool syncDataToGPU = true);
 
     /**
      * Function that simplifies the common workflow of tensor initialisation. It
@@ -1539,22 +1523,9 @@ class Manager
      *
      * @param tensors Array of tensors to rebuild
      * @param syncDataToGPU Whether to sync the data to GPU memory
-     * @returns Initialized Tensor with memory Syncd to GPU device
      */
     void rebuild(std::vector<std::shared_ptr<kp::Tensor>> tensors,
-                        bool syncDataToGPU = true)
-    {
-        SPDLOG_DEBUG("Kompute Manager rebuild triggered");
-        for (std::shared_ptr<Tensor> tensor : tensors) {
-
-            // False syncData to run all tensors at once instead one by one
-            this->rebuild(tensor, false);
-        }
-
-        if (syncDataToGPU) {
-            this->evalOpDefault<OpTensorSyncDevice>(tensors);
-        }
-    }
+                        bool syncDataToGPU = true);
 
     /**
      * Function that simplifies the common workflow of tensor initialisation. It
@@ -1564,29 +1535,59 @@ class Manager
      *
      * @param tensors Single tensor to rebuild
      * @param syncDataToGPU Whether to sync the data to GPU memory
-     * @returns Initialized Tensor with memory Syncd to GPU device
      */
     void rebuild(std::shared_ptr<kp::Tensor> tensor,
-                        bool syncDataToGPU = true)
-    {
-        SPDLOG_DEBUG("Kompute Manager rebuild Tensor triggered");
+                        bool syncDataToGPU = true);
 
-        if (tensor->isInit()) {
-            tensor->freeMemoryDestroyGPUResources();
-        }
+    /**
+     * Destroy owned Vulkan GPU resources and free GPU memory for
+     * single tensor.
+     *
+     * @param tensors Single tensor to rebuild
+     */
+    void destroy(std::shared_ptr<kp::Tensor> tensor);
 
-        tensor->init(this->mPhysicalDevice, this->mDevice);
+    /**
+     * Destroy owned Vulkan GPU resources and free GPU memory for
+     * vector of tensors.
+     *
+     * @param tensors Single tensor to rebuild
+     */
+    void destroy(std::vector<std::shared_ptr<kp::Tensor>> tensors);
 
-        std::set<std::shared_ptr<Tensor>>::iterator it =
-          this->mManagedTensors.find(tensor);
-        if (it == this->mManagedTensors.end()) {
-            this->mManagedTensors.insert(tensor);
-        }
+    /**
+     * Destroy owned Vulkan GPU resources and free GPU memory for
+     * vector of sequences. Destroying by sequence name is more efficent
+     * and hence recommended instead of by object.
+     *
+     * @param sequences Vector for shared ptrs with sequences to destroy
+     */
+    void destroy(std::vector<std::shared_ptr<kp::Sequence>> sequences);
 
-        if (syncDataToGPU) {
-            this->evalOpDefault<OpTensorSyncDevice>({ tensor });
-        }
-    }
+    /**
+     * Destroy owned Vulkan GPU resources and free GPU memory for
+     * single sequence. Destroying by sequence name is more efficent
+     * and hence recommended instead of by object.
+     *
+     * @param sequences Single sequence to rebuild
+     */
+    void destroy(std::shared_ptr<kp::Sequence> sequence);
+
+    /**
+     * Destroy owned Vulkan GPU resources and free GPU memory for
+     * sequence by name.
+     *
+     * @param sequenceName Single name of named sequence to destroy
+     */
+    void destroy(const std::string& sequenceName);
+
+    /**
+     * Destroy owned Vulkan GPU resources and free GPU memory for
+     * sequences using vector of named sequence names.
+     *
+     * @param sequenceName Vector of sequence names to destroy
+     */
+    void destroy(const std::vector<std::string>& sequenceNames);
 
   private:
     // -------------- OPTIONALLY OWNED RESOURCES

From c78a025f9b940c389a564084af086224af03a85d Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Wed, 10 Feb 2021 18:42:40 +0000
Subject: [PATCH 5/7] Updated docstrings and workflow

---
 Makefile                  |   6 +
 python/src/docstrings.hpp | 242 +++++++++++++++++++++++---------------
 scripts/requirements.txt  |   1 +
 3 files changed, 154 insertions(+), 95 deletions(-)

diff --git a/Makefile b/Makefile
index da1df2cbd..b2cda7aa7 100644
--- a/Makefile
+++ b/Makefile
@@ -170,6 +170,12 @@ run_ci:
 
 ####### General project commands #######
 
+generate_python_docstrings:
+	python -m pybind11_mkdoc \
+		-o python/src/docstrings.hpp \
+		single_include/kompute/Kompute.hpp \
+		-I/usr/include/c++/7.5.0/
+
 install_python_reqs:
 	python3 -m pip install -r scripts/requirements.txt
 
diff --git a/python/src/docstrings.hpp b/python/src/docstrings.hpp
index 79b864f8b..36f481b94 100644
--- a/python/src/docstrings.hpp
+++ b/python/src/docstrings.hpp
@@ -119,20 +119,48 @@ integrate with the vulkan kompute use.
 @param device Vulkan logical device to use for all base resources
 @param physicalDeviceIndex Index for vulkan physical device used)doc";
 
-static const char *__doc_kp_Manager_tensor =
-R"doc(Function that simplifies the common workflow of tensor creation and
-initialization. It will take the constructor parameters for a Tensor
-and will will us it to create a new Tensor and then create it using
-the OpCreateTensor command.
-
-@param data The data to initialize the tensor with @param tensorType
-The type of tensor to initialize @returns Initialized Tensor with
-memory Syncd to GPU device)doc";
-
 static const char *__doc_kp_Manager_createDevice = R"doc()doc";
 
 static const char *__doc_kp_Manager_createInstance = R"doc()doc";
 
+static const char *__doc_kp_Manager_destroy =
+R"doc(Destroy owned Vulkan GPU resources and free GPU memory for single
+tensor.
+
+@param tensors Single tensor to rebuild)doc";
+
+static const char *__doc_kp_Manager_destroy_2 =
+R"doc(Destroy owned Vulkan GPU resources and free GPU memory for vector of
+tensors.
+
+@param tensors Single tensor to rebuild)doc";
+
+static const char *__doc_kp_Manager_destroy_3 =
+R"doc(Destroy owned Vulkan GPU resources and free GPU memory for vector of
+sequences. Destroying by sequence name is more efficent and hence
+recommended instead of by object.
+
+@param sequences Vector for shared ptrs with sequences to destroy)doc";
+
+static const char *__doc_kp_Manager_destroy_4 =
+R"doc(Destroy owned Vulkan GPU resources and free GPU memory for single
+sequence. Destroying by sequence name is more efficent and hence
+recommended instead of by object.
+
+@param sequences Single sequence to rebuild)doc";
+
+static const char *__doc_kp_Manager_destroy_5 =
+R"doc(Destroy owned Vulkan GPU resources and free GPU memory for sequence by
+name.
+
+@param sequenceName Single name of named sequence to destroy)doc";
+
+static const char *__doc_kp_Manager_destroy_6 =
+R"doc(Destroy owned Vulkan GPU resources and free GPU memory for sequences
+using vector of named sequence names.
+
+@param sequenceName Vector of sequence names to destroy)doc";
+
 static const char *__doc_kp_Manager_evalOp =
 R"doc(Function that evaluates operation against named sequence.
 
@@ -178,14 +206,6 @@ R"doc(Function that evaluates operation against a newly created sequence.
 TArgs Template parameters that will be used to initialise Operation to
 allow for extensible configurations on initialisation)doc";
 
-static const char *__doc_kp_Manager_sequence =
-R"doc(Get or create a managed Sequence that will be contained by this
-manager. If the named sequence does not currently exist, it would be
-created and initialised.
-
-@param sequenceName The name for the named sequence to be retrieved or
-created @return Shared pointer to the manager owned sequence resource)doc";
-
 static const char *__doc_kp_Manager_mComputeQueueFamilyIndices = R"doc()doc";
 
 static const char *__doc_kp_Manager_mComputeQueues = R"doc()doc";
@@ -202,10 +222,50 @@ static const char *__doc_kp_Manager_mInstance = R"doc()doc";
 
 static const char *__doc_kp_Manager_mManagedSequences = R"doc()doc";
 
+static const char *__doc_kp_Manager_mManagedTensors = R"doc()doc";
+
 static const char *__doc_kp_Manager_mPhysicalDevice = R"doc()doc";
 
 static const char *__doc_kp_Manager_mPhysicalDeviceIndex = R"doc()doc";
 
+static const char *__doc_kp_Manager_rebuild =
+R"doc(Function that simplifies the common workflow of tensor initialisation.
+It will take the constructor parameters for a Tensor and will will us
+it to create a new Tensor. The tensor memory will then be managed and
+owned by the manager.
+
+@param tensors Array of tensors to rebuild @param syncDataToGPU
+Whether to sync the data to GPU memory)doc";
+
+static const char *__doc_kp_Manager_rebuild_2 =
+R"doc(Function that simplifies the common workflow of tensor initialisation.
+It will take the constructor parameters for a Tensor and will will us
+it to create a new Tensor. The tensor memory will then be managed and
+owned by the manager.
+
+@param tensors Single tensor to rebuild @param syncDataToGPU Whether
+to sync the data to GPU memory)doc";
+
+static const char *__doc_kp_Manager_sequence =
+R"doc(Get or create a managed Sequence that will be contained by this
+manager. If the named sequence does not currently exist, it would be
+created and initialised.
+
+@param sequenceName The name for the named sequence to be retrieved or
+created @param queueIndex The queue to use from the available queues
+@return Shared pointer to the manager owned sequence resource)doc";
+
+static const char *__doc_kp_Manager_tensor =
+R"doc(Function that simplifies the common workflow of tensor creation and
+initialization. It will take the constructor parameters for a Tensor
+and will will us it to create a new Tensor and then create it. The
+tensor memory will then be managed and owned by the manager.
+
+@param data The data to initialize the tensor with @param tensorType
+The type of tensor to initialize @param syncDataToGPU Whether to sync
+the data to GPU memory @returns Initialized Tensor with memory Syncd
+to GPU device)doc";
+
 static const char *__doc_kp_OpAlgoBase =
 R"doc(Operation that provides a general abstraction that simplifies the use
 of algorithm and parameter components which can be used with shaders.
@@ -334,8 +394,6 @@ static const char *__doc_kp_OpAlgoLhsRhsOut_mTensorOutput =
 R"doc(< Reference to the parameter used in the output of the shader and will
 be copied with a staging vector)doc";
 
-static const char *__doc_kp_OpAlgoLhsRhsOut_mTensorOutputStaging = R"doc(< Staging temporary tensor user do to copy the output of the tensor)doc";
-
 static const char *__doc_kp_OpAlgoLhsRhsOut_mTensorRHS =
 R"doc(< Reference to the parameter used in the right hand side equation of
 the shader)doc";
@@ -371,8 +429,7 @@ sub-components.
 @param physicalDevice Vulkan physical device used to find device
 queues @param device Vulkan logical device for passing to Algorithm
 @param commandBuffer Vulkan Command Buffer to record commands into
-@param tensors Tensors that are to be used in this operation @param
-freeTensors Whether operation manages the memory of the Tensors)doc";
+@param tensors Tensors that are to be used in this operation)doc";
 
 static const char *__doc_kp_OpBase_init =
 R"doc(The init function is responsible for setting up all the resources and
@@ -463,51 +520,15 @@ static const char *__doc_kp_OpTensorCopy_record =
 R"doc(Records the copy commands from the first tensor into all the other
 tensors provided. Also optionally records a barrier.)doc";
 
-static const char *__doc_kp_OpTensorCreate =
-R"doc(Operation that creates tensor and manages the memory of the components
-created)doc";
-
-static const char *__doc_kp_OpTensorCreate_OpTensorCreate = R"doc()doc";
-
-static const char *__doc_kp_OpTensorCreate_OpTensorCreate_2 =
-R"doc(Default constructor with parameters that provides the bare minimum
-requirements for the operations to be able to create and manage their
-sub-components.
-
-@param physicalDevice Vulkan physical device used to find device
-queues @param device Vulkan logical device for passing to Algorithm
-@param commandBuffer Vulkan Command Buffer to record commands into
-@param tensors Tensors that will be used to create in operation.
-@param freeTensors Whether operation manages the memory of the Tensors)doc";
-
-static const char *__doc_kp_OpTensorCreate_init =
-R"doc(In charge of initialising the primary Tensor as well as the staging
-tensor as required. It will only initialise a staging tensor if the
-Primary tensor is of type Device. For staging tensors it performs a
-mapDataIntoHostMemory which would perform immediately as opposed to on
-sequence eval/submission.)doc";
-
-static const char *__doc_kp_OpTensorCreate_mStagingTensors = R"doc()doc";
-
-static const char *__doc_kp_OpTensorCreate_postEval =
-R"doc(Performs a copy back into the main tensor to ensure that the data
-contained is the one that is now being stored in the GPU.)doc";
-
-static const char *__doc_kp_OpTensorCreate_preEval = R"doc(Does not perform any preEval commands.)doc";
-
-static const char *__doc_kp_OpTensorCreate_record =
-R"doc(Record runs the core actions to create the tensors. For device tensors
-it records a copyCommand to move the data from the staging tensor to
-the device tensor. The mapping for staging tensors happens in the init
-function not in the record function.)doc";
-
 static const char *__doc_kp_OpTensorSyncDevice =
 R"doc(Operation that syncs tensor's device by mapping local data into the
-device memory. For TensorTypes::eDevice it will use a staging tensor
-to perform the copy. For TensorTypes::eHost it will only copy the
-data and perform a map, which will be executed during the record (as
-opposed to during the sequence eval/submit). This function cannot be
-carried out for TensorTypes::eHost.)doc";
+device memory. For TensorTypes::eDevice it will use a record operation
+for the memory to be syncd into GPU memory which means that the
+operation will be done in sync with GPU commands. For
+TensorTypes::eStaging it will only map the data into host memory which
+will happen during preEval before the recorded commands are
+dispatched. This operation won't have any effect on
+TensorTypes::eStaging.)doc";
 
 static const char *__doc_kp_OpTensorSyncDevice_OpTensorSyncDevice = R"doc()doc";
 
@@ -523,28 +544,25 @@ queues @param device Vulkan logical device for passing to Algorithm
 
 static const char *__doc_kp_OpTensorSyncDevice_init =
 R"doc(Performs basic checks such as ensuring that there is at least one
-tensor provided, that they are initialized and that they are not of
-type TensorTpes::eHost. For staging tensors in host memory, the map
-is performed during the init function.)doc";
-
-static const char *__doc_kp_OpTensorSyncDevice_mStagingTensors = R"doc()doc";
+tensor provided with min memory of 1 element.)doc";
 
 static const char *__doc_kp_OpTensorSyncDevice_postEval = R"doc(Does not perform any postEval commands.)doc";
 
 static const char *__doc_kp_OpTensorSyncDevice_preEval = R"doc(Does not perform any preEval commands.)doc";
 
 static const char *__doc_kp_OpTensorSyncDevice_record =
-R"doc(For device tensors, it records the copy command to the device tensor
-from the temporary staging tensor.)doc";
+R"doc(For device tensors, it records the copy command for the tensor to copy
+the data from its staging to device memory.)doc";
 
 static const char *__doc_kp_OpTensorSyncLocal =
-R"doc(Operation that syncs tensor's local data by mapping the data from
-device memory into the local vector. For TensorTypes::eDevice it will
-use a staging tensor to perform the copy. For TensorTypes::eHost it
-will only copy the data and perform a map, which will be executed
-during the postSubmit (there will be no copy during the sequence
-eval/submit). This function cannot be carried out for
-TensorTypes::eHost.)doc";
+R"doc(Operation that syncs tensor's local memory by mapping device data into
+the local CPU memory. For TensorTypes::eDevice it will use a record
+operation for the memory to be syncd into GPU memory which means that
+the operation will be done in sync with GPU commands. For
+TensorTypes::eStaging it will only map the data into host memory which
+will happen during preEval before the recorded commands are
+dispatched. This operation won't have any effect on
+TensorTypes::eStaging.)doc";
 
 static const char *__doc_kp_OpTensorSyncLocal_OpTensorSyncLocal = R"doc()doc";
 
@@ -560,10 +578,7 @@ queues @param device Vulkan logical device for passing to Algorithm
 
 static const char *__doc_kp_OpTensorSyncLocal_init =
 R"doc(Performs basic checks such as ensuring that there is at least one
-tensor provided, that they are initialized and that they are not of
-type TensorTpes::eHost.)doc";
-
-static const char *__doc_kp_OpTensorSyncLocal_mStagingTensors = R"doc()doc";
+tensor provided with min memory of 1 element.)doc";
 
 static const char *__doc_kp_OpTensorSyncLocal_postEval =
 R"doc(For host tensors it performs the map command from the host memory into
@@ -572,8 +587,8 @@ local memory.)doc";
 static const char *__doc_kp_OpTensorSyncLocal_preEval = R"doc(Does not perform any preEval commands.)doc";
 
 static const char *__doc_kp_OpTensorSyncLocal_record =
-R"doc(For device tensors, it records the copy command into the staging
-tensor from the device tensor.)doc";
+R"doc(For device tensors, it records the copy command for the tensor to copy
+the data from its device to staging memory.)doc";
 
 static const char *__doc_kp_Sequence = R"doc(Container of operations that can be sent to GPU as batch)doc";
 
@@ -699,8 +714,9 @@ static const char *__doc_kp_Tensor_Tensor_2 =
 R"doc(Default constructor with data provided which would be used to create
 the respective vulkan buffer and memory.
 
-@param data Vector of data that will be used by the tensor @param
-tensorType Type for the tensor which is of type TensorTypes)doc";
+@param data Non-zero-sized vector of data that will be used by the
+tensor @param tensorType Type for the tensor which is of type
+TensorTypes)doc";
 
 static const char *__doc_kp_Tensor_TensorTypes =
 R"doc(Type for tensors created: Device allows memory to be transferred from
@@ -714,6 +730,10 @@ static const char *__doc_kp_Tensor_TensorTypes_eHost = R"doc(< Type is host memo
 
 static const char *__doc_kp_Tensor_TensorTypes_eStorage = R"doc(< Type is Device memory (only))doc";
 
+static const char *__doc_kp_Tensor_allocateBindMemory = R"doc()doc";
+
+static const char *__doc_kp_Tensor_allocateMemoryCreateGPUResources = R"doc()doc";
+
 static const char *__doc_kp_Tensor_constructDescriptorBufferInfo =
 R"doc(Constructs a vulkan descriptor buffer info which can be used to
 specify and reference the underlying buffer component of the tensor
@@ -721,6 +741,8 @@ without exposing it.
 
 @return Descriptor buffer info with own buffer)doc";
 
+static const char *__doc_kp_Tensor_copyBuffer = R"doc()doc";
+
 static const char *__doc_kp_Tensor_createBuffer = R"doc()doc";
 
 static const char *__doc_kp_Tensor_data =
@@ -735,9 +757,13 @@ static const char *__doc_kp_Tensor_freeMemoryDestroyGPUResources =
 R"doc(Destroys and frees the GPU resources which include the buffer and
 memory.)doc";
 
-static const char *__doc_kp_Tensor_getBufferUsageFlags = R"doc()doc";
+static const char *__doc_kp_Tensor_getPrimaryBufferUsageFlags = R"doc()doc";
 
-static const char *__doc_kp_Tensor_getMemoryPropertyFlags = R"doc()doc";
+static const char *__doc_kp_Tensor_getPrimaryMemoryPropertyFlags = R"doc()doc";
+
+static const char *__doc_kp_Tensor_getStagingBufferUsageFlags = R"doc()doc";
+
+static const char *__doc_kp_Tensor_getStagingMemoryPropertyFlags = R"doc()doc";
 
 static const char *__doc_kp_Tensor_init =
 R"doc(Initialiser which calls the initialisation for all the respective
@@ -751,24 +777,32 @@ R"doc(Returns true if the tensor initialisation function has been carried
 out successful, which would mean that the buffer and memory will have
 been provisioned.)doc";
 
-static const char *__doc_kp_Tensor_mBuffer = R"doc()doc";
-
 static const char *__doc_kp_Tensor_mData = R"doc()doc";
 
 static const char *__doc_kp_Tensor_mDevice = R"doc()doc";
 
-static const char *__doc_kp_Tensor_mFreeBuffer = R"doc()doc";
+static const char *__doc_kp_Tensor_mFreePrimaryBuffer = R"doc()doc";
 
-static const char *__doc_kp_Tensor_mFreeMemory = R"doc()doc";
+static const char *__doc_kp_Tensor_mFreePrimaryMemory = R"doc()doc";
+
+static const char *__doc_kp_Tensor_mFreeStagingBuffer = R"doc()doc";
+
+static const char *__doc_kp_Tensor_mFreeStagingMemory = R"doc()doc";
 
 static const char *__doc_kp_Tensor_mIsInit = R"doc()doc";
 
-static const char *__doc_kp_Tensor_mMemory = R"doc()doc";
-
 static const char *__doc_kp_Tensor_mPhysicalDevice = R"doc()doc";
 
+static const char *__doc_kp_Tensor_mPrimaryBuffer = R"doc()doc";
+
+static const char *__doc_kp_Tensor_mPrimaryMemory = R"doc()doc";
+
 static const char *__doc_kp_Tensor_mShape = R"doc()doc";
 
+static const char *__doc_kp_Tensor_mStagingBuffer = R"doc()doc";
+
+static const char *__doc_kp_Tensor_mStagingMemory = R"doc()doc";
+
 static const char *__doc_kp_Tensor_mTensorType = R"doc()doc";
 
 static const char *__doc_kp_Tensor_mapDataFromHostMemory =
@@ -809,6 +843,24 @@ a staging buffer transfer, or to gather output (between others).
 createBarrier Whether to create a barrier that ensures the data is
 copied before further operations. Default is true.)doc";
 
+static const char *__doc_kp_Tensor_recordCopyFromDeviceToStaging =
+R"doc(Records a copy from the internal device memory to the staging memory
+using an optional barrier to wait for the operation. This function
+would only be relevant for kp::Tensors of type eDevice.
+
+@param commandBuffer Vulkan Command Buffer to record the commands into
+@param createBarrier Whether to create a barrier that ensures the data
+is copied before further operations. Default is true.)doc";
+
+static const char *__doc_kp_Tensor_recordCopyFromStagingToDevice =
+R"doc(Records a copy from the internal staging memory to the device memory
+using an optional barrier to wait for the operation. This function
+would only be relevant for kp::Tensors of type eDevice.
+
+@param commandBuffer Vulkan Command Buffer to record the commands into
+@param createBarrier Whether to create a barrier that ensures the data
+is copied before further operations. Default is true.)doc";
+
 static const char *__doc_kp_Tensor_setData =
 R"doc(Sets / resets the vector data of the tensor. This function does not
 perform any copies into GPU memory and is only performed on the host.)doc";
diff --git a/scripts/requirements.txt b/scripts/requirements.txt
index 6f31cb33d..4da042504 100644
--- a/scripts/requirements.txt
+++ b/scripts/requirements.txt
@@ -8,3 +8,4 @@ Sphinx==3.2.1
 sphinx_material==0.0.30
 breathe==4.20.0
 m2r2==0.2.5
+git+git://github.com/pybind/pybind11_mkdoc.git@master

From f31aceea57f3e5812b9374dc167163387fbe8871 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Wed, 10 Feb 2021 18:42:52 +0000
Subject: [PATCH 6/7] Added destroy and rebuild functions to python

---
 python/src/main.cpp | 35 +++++++++++++++++++++++------------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/python/src/main.cpp b/python/src/main.cpp
index 889084c7e..176b6e4e0 100644
--- a/python/src/main.cpp
+++ b/python/src/main.cpp
@@ -88,25 +88,24 @@ PYBIND11_MODULE(kp, m) {
 
 
     py::class_<kp::Sequence, std::shared_ptr<kp::Sequence>>(m, "Sequence")
-        .def("init", &kp::Sequence::init, "Initialises Vulkan resources within sequence using provided device.")
+        .def("init", &kp::Sequence::init, DOC(kp, Sequence, init))
         
         // record
-        .def("begin", &kp::Sequence::begin, "Clears previous commands and starts recording commands in sequence which can be run in batch.")
-        .def("end", &kp::Sequence::end, "Stops listening and recording for new commands.")
+        .def("begin", &kp::Sequence::begin, DOC(kp, Sequence, begin))
+        .def("end", &kp::Sequence::end, DOC(kp, Sequence, end))
         
         // eval
-        .def("eval", &kp::Sequence::eval, "Executes the currently recorded commands synchronously by waiting on Vulkan Fence.")
-        .def("eval_async", &kp::Sequence::evalAsync, "Executes the currently recorded commands asynchronously.")
-        .def("eval_await", &kp::Sequence::evalAwait, "Waits until the execution finishes using Vulkan Fence.")
+        .def("eval", &kp::Sequence::eval, DOC(kp, Sequence, eval))
+        .def("eval_async", &kp::Sequence::evalAsync, DOC(kp, Sequence, evalAsync))
+        .def("eval_await", &kp::Sequence::evalAwait, DOC(kp, Sequence, evalAwait))
         
         // status
-        .def("is_running", &kp::Sequence::isRunning, "Checks whether the Sequence operations are currently still executing.")
-        .def("is_rec", &kp::Sequence::isRecording, "Checks whether the Sequence is currently in recording mode.")
-        .def("is_init", &kp::Sequence::isInit, "Checks if the Sequence has been initialized")
+        .def("is_running", &kp::Sequence::isRunning, DOC(kp, Sequence, isRunning))
+        .def("is_rec", &kp::Sequence::isRecording, DOC(kp, Sequence, isRecording))
+        .def("is_init", &kp::Sequence::isInit, DOC(kp, Sequence, isInit))
         
         // record
-        .def("record_tensor_copy", &kp::Sequence::record<kp::OpTensorCopy>,
-            "Records operation to copy one tensor to one or many tensors")
+        .def("record_tensor_copy", &kp::Sequence::record<kp::OpTensorCopy>, DOC(kp, Sequence, record))
         .def("record_tensor_sync_device", &kp::Sequence::record<kp::OpTensorSyncDevice>,
             "Records operation to sync tensor from local memory to GPU memory")
         .def("record_tensor_sync_local", &kp::Sequence::record<kp::OpTensorSyncLocal>,
@@ -166,7 +165,19 @@ PYBIND11_MODULE(kp, m) {
         .def("rebuild", py::overload_cast<std::shared_ptr<kp::Tensor>, bool>(&kp::Manager::rebuild),
                 py::arg("tensor"), py::arg("syncDataToGPU") = true,
                 "Build and initialise tensor")
-        
+        .def("destroy", py::overload_cast<std::shared_ptr<kp::Tensor>>(&kp::Manager::destroy),
+                py::arg("tensor"), DOC(kp, Manager, destroy))
+        .def("destroy", py::overload_cast<std::vector<std::shared_ptr<kp::Tensor>>>(&kp::Manager::destroy),
+                py::arg("tensors"), DOC(kp, Manager, destroy, 2))
+        .def("destroy", py::overload_cast<std::vector<std::shared_ptr<kp::Sequence>>>(&kp::Manager::destroy),
+                py::arg("sequences"), DOC(kp, Manager, destroy, 3))
+        .def("destroy", py::overload_cast<std::shared_ptr<kp::Sequence>>(&kp::Manager::destroy),
+                py::arg("sequence"), DOC(kp, Manager, destroy, 4))
+        .def("destroy", py::overload_cast<const std::string &>(&kp::Manager::destroy),
+                py::arg("sequenceName"), DOC(kp, Manager, destroy, 5))
+        .def("destroy", py::overload_cast<const std::vector<std::string>&>(&kp::Manager::destroy),
+                py::arg("sequenceNames"), DOC(kp, Manager, destroy, 6))
+
         // Await functions
         .def("eval_await", &kp::Manager::evalOpAwait,
                 py::arg("sequenceName"), py::arg("waitFor") = UINT64_MAX,

From 09fcba1aeed5b7ce854188b58d4f6577d0f2ba4f Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Wed, 10 Feb 2021 18:55:11 +0000
Subject: [PATCH 7/7] Updated python tests

---
 python/test/test_kompute.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/python/test/test_kompute.py b/python/test/test_kompute.py
index 7050b9c22..e2cbb72c9 100644
--- a/python/test/test_kompute.py
+++ b/python/test/test_kompute.py
@@ -109,10 +109,19 @@ def test_sequence():
     seq.end()
     seq.eval()
 
+    mgr.destroy("op")
+
+    assert seq.is_init() == False
+
     assert tensor_out.data() == [2.0, 4.0, 6.0]
     assert np.all(tensor_out.numpy() == [2.0, 4.0, 6.0])
 
+    mgr.destroy(tensor_in_a)
+    mgr.destroy([tensor_in_b, tensor_out])
 
+    assert tensor_in_a.is_init() == False
+    assert tensor_in_b.is_init() == False
+    assert tensor_out.is_init() == False
 
 def test_workgroup():
     mgr = kp.Manager(0)
@@ -146,8 +155,17 @@ def test_workgroup():
     seq.end()
     seq.eval()
 
+    mgr.destroy(seq)
+
+    assert seq.is_init() == False
+
     mgr.eval_tensor_sync_local_def([tensor_a, tensor_b])
 
     assert np.all(tensor_a.numpy() == np.stack([np.arange(16)]*8, axis=1).ravel())
     assert np.all(tensor_b.numpy() == np.stack([np.arange(8)]*16, axis=0).ravel())
 
+    mgr.destroy([tensor_a, tensor_b])
+
+    assert tensor_a.is_init() == False
+    assert tensor_b.is_init() == False
+