From 860fda9fb526d2350f550964b78644236b20482e Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sun, 12 Sep 2021 09:37:50 +0100
Subject: [PATCH 01/19] Initil implementation

Signed-off-by: Alejandro Saucedo <axsauze@gmail.com>
---
 single_include/kompute/Kompute.hpp        | 120 ++++++++++++++++++++--
 src/Algorithm.cpp                         | 114 +++++---------------
 src/Manager.cpp                           |   1 +
 src/OpAlgoDispatch.cpp                    |   2 +-
 src/include/kompute/Algorithm.hpp         | 118 +++++++++++++++++++--
 src/include/kompute/operations/OpMult.hpp |   2 +-
 6 files changed, 247 insertions(+), 110 deletions(-)
diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp
index 12fe9cda9..a68cff1e6 100755
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@@ -1094,12 +1094,30 @@ class Algorithm
      * these can be modified but all new values must have the same vector size
      * as this initial value.
      */
+    template<typename S = float, typename P = float>
     Algorithm(std::shared_ptr<vk::Device> device,
               const std::vector<std::shared_ptr<Tensor>>& tensors = {},
               const std::vector<uint32_t>& spirv = {},
               const Workgroup& workgroup = {},
-              const Constants& specializationConstants = {},
-              const Constants& pushConstants = {});
+              const std::vector<S>& specializationConstants = {},
+              const std::vector<P>& pushConstants = {})
+    {
+        KP_LOG_DEBUG("Kompute Algorithm Constructor with device");
+
+        this->mDevice = device;
+
+        if (tensors.size() && spirv.size()) {
+            KP_LOG_INFO("Kompute Algorithm initialising with tensor size: {} and "
+                        "spirv size: {}",
+                        tensors.size(),
+                        spirv.size());
+            this->rebuild(
+              tensors, spirv, workgroup, specializationConstants, pushConstants);
+        } else {
+            KP_LOG_INFO("Kompute Algorithm constructor with empty tensors and or "
+                        "spirv so not rebuilding vulkan components");
+        }
+    }
 
     /**
      *  Rebuild function to reconstruct algorithm with configuration parameters
@@ -1116,11 +1134,57 @@ class Algorithm
      * these can be modified but all new values must have the same vector size
      * as this initial value.
      */
+    template<typename S = float, typename P = float>
     void rebuild(const std::vector<std::shared_ptr<Tensor>>& tensors,
                  const std::vector<uint32_t>& spirv,
                  const Workgroup& workgroup = {},
-                 const Constants& specializationConstants = {},
-                 const Constants& pushConstants = {});
+                 const std::vector<S>& specializationConstants = {},
+                 const std::vector<P>& pushConstants = {})
+    {
+        KP_LOG_DEBUG("Kompute Algorithm rebuild started");
+
+        this->mTensors = tensors;
+        this->mSpirv = spirv;
+
+        if (specializationConstants.size()) {
+            if (this->mSpecializationConstantsData) {
+                free(this->mSpecializationConstantsData);
+            }
+            uint32_t memorySize = sizeof(decltype(specializationConstants.back()));
+            uint32_t size = specializationConstants.size();
+            uint32_t totalSize = size * memorySize;
+            this->mSpecializationConstantsData = malloc(totalSize);
+            memcpy(this->mSpecializationConstantsData, specializationConstants.data(), totalSize);
+            this->mSpecializationConstantsDataTypeMemorySize = memorySize;
+            this->mSpecializationConstantsSize = size;
+        }
+
+        if (pushConstants.size()) {
+            if (this->mPushConstantsData) {
+                free(this->mPushConstantsData);
+            }
+            uint32_t memorySize = sizeof(decltype(pushConstants.back()));
+            uint32_t size = pushConstants.size();
+            uint32_t totalSize = size * memorySize;
+            this->mPushConstantsData = malloc(totalSize);
+            memcpy(this->mPushConstantsData, pushConstants.data(), totalSize);
+            this->mPushConstantsDataTypeMemorySize = memorySize;
+            this->mPushConstantsSize = size;
+        }
+
+        this->setWorkgroup(workgroup,
+                           this->mTensors.size() ? this->mTensors[0]->size() : 1);
+
+        // Descriptor pool is created first so if available then destroy all before
+        // rebuild
+        if (this->isInit()) {
+            this->destroy();
+        }
+
+        this->createParameters();
+        this->createShaderModule();
+        this->createPipeline();
+    }
 
     /**
      * Destructor for Algorithm which is responsible for freeing and desroying
@@ -1179,7 +1243,29 @@ class Algorithm
      * next bindPush(...) calls. The constants provided must be of the same size
      * as the ones created during initialization.
      */
-    void setPush(const Constants& pushConstants);
+    template<typename T>
+    void setPushConstants(const std::vector<T>& pushConstants)
+    {
+
+        if (pushConstants.size() != this->mPushConstantsSize) {
+            throw std::runtime_error(
+              fmt::format("Kompute Algorithm push "
+                          "constant provided is size {} but expected size {}",
+                          pushConstants.size(),
+                          this->mPushConstantsSize));
+        }
+        if (this->mPushConstantsData) {
+            free(this->mPushConstantsData);
+        }
+
+        uint32_t memorySize = sizeof(decltype(pushConstants.back()));
+        uint32_t size = pushConstants.size();
+        uint32_t totalSize = size * memorySize;
+        this->mPushConstantsData = malloc(totalSize);
+        memcpy(this->mPushConstantsData, pushConstants.data(), totalSize);
+        this->mPushConstantsDataTypeMemorySize = memorySize;
+        this->mPushConstantsSize = size;
+    }
 
     /**
      * Gets the current workgroup from the algorithm.
@@ -1194,13 +1280,23 @@ class Algorithm
      *
      * @returns The kp::Constants currently set for specialization constants
      */
-    const Constants& getSpecializationConstants();
+    template<typename T>
+    const std::vector<T> getSpecializationConstants()
+    {
+        return { (T*)this->mSpecializationConstantsData,
+            ((T*)this->mSpecializationConstantsData) + this->mSpecializationConstantsSize };
+    }
     /**
      * Gets the specialization constants of the current algorithm.
      *
      * @returns The kp::Constants currently set for push constants
      */
-    const Constants& getPush();
+    template<typename T>
+    const std::vector<T> getPushConstants()
+    {
+        return { (T*)this->mPushConstantsData,
+            ((T*)this->mPushConstantsData) + this->mPushConstantsSize };
+    }
     /**
      * Gets the current tensors that are used in the algorithm.
      *
@@ -1233,8 +1329,12 @@ class Algorithm
 
     // -------------- ALWAYS OWNED RESOURCES
     std::vector<uint32_t> mSpirv;
-    Constants mSpecializationConstants;
-    Constants mPushConstants;
+    void* mSpecializationConstantsData = nullptr;
+    uint32_t mSpecializationConstantsDataTypeMemorySize = 0;
+    uint32_t mSpecializationConstantsSize = 0;
+    void* mPushConstantsData = nullptr;
+    uint32_t mPushConstantsDataTypeMemorySize = 0;
+    uint32_t mPushConstantsSize = 0;
     Workgroup mWorkgroup;
 
     // Create util functions
@@ -1655,7 +1755,7 @@ class OpMult : public OpAlgoDispatch
           (uint32_t*)(shader_data::shaders_glsl_opmult_comp_spv +
             kp::shader_data::shaders_glsl_opmult_comp_spv_len));
 
-        algorithm->rebuild(tensors, spirv);
+        algorithm->rebuild<>(tensors, spirv);
     }
 
     /**
diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp
index 8d510bb9c..69ab5f7ad 100644
--- a/src/Algorithm.cpp
+++ b/src/Algorithm.cpp
@@ -5,30 +5,6 @@
 
 namespace kp {
 
-Algorithm::Algorithm(std::shared_ptr<vk::Device> device,
-                     const std::vector<std::shared_ptr<Tensor>>& tensors,
-                     const std::vector<uint32_t>& spirv,
-                     const Workgroup& workgroup,
-                     const Constants& specializationConstants,
-                     const Constants& pushConstants)
-{
-    KP_LOG_DEBUG("Kompute Algorithm Constructor with device");
-
-    this->mDevice = device;
-
-    if (tensors.size() && spirv.size()) {
-        KP_LOG_INFO("Kompute Algorithm initialising with tensor size: {} and "
-                    "spirv size: {}",
-                    tensors.size(),
-                    spirv.size());
-        this->rebuild(
-          tensors, spirv, workgroup, specializationConstants, pushConstants);
-    } else {
-        KP_LOG_INFO("Kompute Algorithm constructor with empty tensors and or "
-                    "spirv so not rebuilding vulkan components");
-    }
-}
-
 Algorithm::~Algorithm()
 {
     KP_LOG_DEBUG("Kompute Algorithm Destructor started");
@@ -36,33 +12,6 @@ Algorithm::~Algorithm()
     this->destroy();
 }
 
-void
-Algorithm::rebuild(const std::vector<std::shared_ptr<Tensor>>& tensors,
-                   const std::vector<uint32_t>& spirv,
-                   const Workgroup& workgroup,
-                   const Constants& specializationConstants,
-                   const Constants& pushConstants)
-{
-    KP_LOG_DEBUG("Kompute Algorithm rebuild started");
-
-    this->mTensors = tensors;
-    this->mSpirv = spirv;
-    this->mSpecializationConstants = specializationConstants;
-    this->mPushConstants = pushConstants;
-    this->setWorkgroup(workgroup,
-                       this->mTensors.size() ? this->mTensors[0]->size() : 1);
-
-    // Descriptor pool is created first so if available then destroy all before
-    // rebuild
-    if (this->isInit()) {
-        this->destroy();
-    }
-
-    this->createParameters();
-    this->createShaderModule();
-    this->createPipeline();
-}
-
 bool
 Algorithm::isInit()
 {
@@ -74,6 +23,13 @@ Algorithm::isInit()
 void
 Algorithm::destroy()
 {
+    if (this->mPushConstantsData) {
+        free(this->mPushConstantsData);
+    }
+
+    if (this->mSpecializationConstantsData) {
+        free(this->mSpecializationConstantsData);
+    }
 
     if (!this->mDevice) {
         KP_LOG_WARN("Kompute Algorithm destroy function reached with null "
@@ -279,10 +235,10 @@ Algorithm::createPipeline()
       this->mDescriptorSetLayout.get());
 
     vk::PushConstantRange pushConstantRange;
-    if (this->mPushConstants.size()) {
+    if (this->mPushConstantsSize) {
         pushConstantRange.setStageFlags(vk::ShaderStageFlagBits::eCompute);
         pushConstantRange.setOffset(0);
-        pushConstantRange.setSize(sizeof(float) * this->mPushConstants.size());
+        pushConstantRange.setSize(this->mPushConstantsDataTypeMemorySize * this->mPushConstantsSize);
 
         pipelineLayoutInfo.setPushConstantRangeCount(1);
         pipelineLayoutInfo.setPPushConstantRanges(&pushConstantRange);
@@ -295,11 +251,11 @@ Algorithm::createPipeline()
 
     std::vector<vk::SpecializationMapEntry> specializationEntries;
 
-    for (uint32_t i = 0; i < this->mSpecializationConstants.size(); i++) {
+    for (uint32_t i = 0; i < this->mSpecializationConstantsSize; i++) {
         vk::SpecializationMapEntry specializationEntry(
           static_cast<uint32_t>(i),
-          static_cast<uint32_t>(sizeof(float) * i),
-          sizeof(float));
+          static_cast<uint32_t>(this->mSpecializationConstantsDataTypeMemorySize * i),
+          this->mSpecializationConstantsDataTypeMemorySize);
 
         specializationEntries.push_back(specializationEntry);
     }
@@ -309,8 +265,8 @@ Algorithm::createPipeline()
     vk::SpecializationInfo specializationInfo(
       static_cast<uint32_t>(specializationEntries.size()),
       specializationEntries.data(),
-      sizeof(float) * this->mSpecializationConstants.size(),
-      this->mSpecializationConstants.data());
+      this->mSpecializationConstantsDataTypeMemorySize * this->mSpecializationConstantsSize,
+      this->mSpecializationConstantsData);
 
     vk::PipelineShaderStageCreateInfo shaderStage(
       vk::PipelineShaderStageCreateFlags(),
@@ -381,15 +337,22 @@ Algorithm::recordBindCore(const vk::CommandBuffer& commandBuffer)
 void
 Algorithm::recordBindPush(const vk::CommandBuffer& commandBuffer)
 {
-    if (this->mPushConstants.size()) {
+    if (this->mPushConstantsSize) {
         KP_LOG_DEBUG("Kompute Algorithm binding push constants size: {}",
-                     this->mPushConstants.size());
+                     this->mPushConstantsSize);
+        KP_LOG_DEBUG("{} {}",
+                     this->mPushConstantsDataTypeMemorySize,
+                     this->mPushConstantsData == nullptr);
+        KP_LOG_DEBUG("{}",
+                     ((float*)this->mPushConstantsData)[0]);
 
         commandBuffer.pushConstants(*this->mPipelineLayout,
                                     vk::ShaderStageFlagBits::eCompute,
                                     0,
-                                    this->mPushConstants.size() * sizeof(float),
-                                    this->mPushConstants.data());
+                                    this->mPushConstantsSize * this->mPushConstantsDataTypeMemorySize,
+                                    this->mPushConstantsData);
+        KP_LOG_DEBUG("Constants bound: {}",
+                     this->mPushConstantsSize);
     }
 }
 
@@ -426,39 +389,12 @@ Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize)
                 this->mWorkgroup[2]);
 }
 
-void
-Algorithm::setPush(const Constants& pushConstants)
-{
-
-    if (pushConstants.size() != this->mPushConstants.size()) {
-        throw std::runtime_error(
-          fmt::format("Kompute Algorithm push "
-                      "constant provided is size {} but expected size {}",
-                      pushConstants.size(),
-                      this->mPushConstants.size()));
-    }
-
-    this->mPushConstants = pushConstants;
-}
-
 const Workgroup&
 Algorithm::getWorkgroup()
 {
     return this->mWorkgroup;
 }
 
-const Constants&
-Algorithm::getSpecializationConstants()
-{
-    return this->mSpecializationConstants;
-}
-
-const Constants&
-Algorithm::getPush()
-{
-    return this->mPushConstants;
-}
-
 const std::vector<std::shared_ptr<Tensor>>&
 Algorithm::getTensors()
 {
diff --git a/src/Manager.cpp b/src/Manager.cpp
index 80f308983..8e8367c30 100644
--- a/src/Manager.cpp
+++ b/src/Manager.cpp
@@ -422,6 +422,7 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices,
     KP_LOG_DEBUG("Kompute Manager compute queue obtained");
 }
 
+// TODO: Update to template
 std::shared_ptr<Algorithm>
 Manager::algorithm(const std::vector<std::shared_ptr<Tensor>>& tensors,
                    const std::vector<uint32_t>& spirv,
diff --git a/src/OpAlgoDispatch.cpp b/src/OpAlgoDispatch.cpp
index 0fd323b7d..15bfc05c9 100644
--- a/src/OpAlgoDispatch.cpp
+++ b/src/OpAlgoDispatch.cpp
@@ -36,7 +36,7 @@ OpAlgoDispatch::record(const vk::CommandBuffer& commandBuffer)
     }
 
     if (this->mPushConstants.size()) {
-        this->mAlgorithm->setPush(this->mPushConstants);
+        this->mAlgorithm->setPushConstants(this->mPushConstants);
     }
 
     this->mAlgorithm->recordBindCore(commandBuffer);
diff --git a/src/include/kompute/Algorithm.hpp b/src/include/kompute/Algorithm.hpp
index 2ec2797a8..6bc49cef6 100644
--- a/src/include/kompute/Algorithm.hpp
+++ b/src/include/kompute/Algorithm.hpp
@@ -31,12 +31,30 @@ class Algorithm
      * these can be modified but all new values must have the same vector size
      * as this initial value.
      */
+    template<typename S = float, typename P = float>
     Algorithm(std::shared_ptr<vk::Device> device,
               const std::vector<std::shared_ptr<Tensor>>& tensors = {},
               const std::vector<uint32_t>& spirv = {},
               const Workgroup& workgroup = {},
-              const Constants& specializationConstants = {},
-              const Constants& pushConstants = {});
+              const std::vector<S>& specializationConstants = {},
+              const std::vector<P>& pushConstants = {})
+    {
+        KP_LOG_DEBUG("Kompute Algorithm Constructor with device");
+
+        this->mDevice = device;
+
+        if (tensors.size() && spirv.size()) {
+            KP_LOG_INFO("Kompute Algorithm initialising with tensor size: {} and "
+                        "spirv size: {}",
+                        tensors.size(),
+                        spirv.size());
+            this->rebuild(
+              tensors, spirv, workgroup, specializationConstants, pushConstants);
+        } else {
+            KP_LOG_INFO("Kompute Algorithm constructor with empty tensors and or "
+                        "spirv so not rebuilding vulkan components");
+        }
+    }
 
     /**
      *  Rebuild function to reconstruct algorithm with configuration parameters
@@ -53,11 +71,57 @@ class Algorithm
      * these can be modified but all new values must have the same vector size
      * as this initial value.
      */
+    template<typename S = float, typename P = float>
     void rebuild(const std::vector<std::shared_ptr<Tensor>>& tensors,
                  const std::vector<uint32_t>& spirv,
                  const Workgroup& workgroup = {},
-                 const Constants& specializationConstants = {},
-                 const Constants& pushConstants = {});
+                 const std::vector<S>& specializationConstants = {},
+                 const std::vector<P>& pushConstants = {})
+    {
+        KP_LOG_DEBUG("Kompute Algorithm rebuild started");
+
+        this->mTensors = tensors;
+        this->mSpirv = spirv;
+
+        if (specializationConstants.size()) {
+            if (this->mSpecializationConstantsData) {
+                free(this->mSpecializationConstantsData);
+            }
+            uint32_t memorySize = sizeof(decltype(specializationConstants.back()));
+            uint32_t size = specializationConstants.size();
+            uint32_t totalSize = size * memorySize;
+            this->mSpecializationConstantsData = malloc(totalSize);
+            memcpy(this->mSpecializationConstantsData, specializationConstants.data(), totalSize);
+            this->mSpecializationConstantsDataTypeMemorySize = memorySize;
+            this->mSpecializationConstantsSize = size;
+        }
+
+        if (pushConstants.size()) {
+            if (this->mPushConstantsData) {
+                free(this->mPushConstantsData);
+            }
+            uint32_t memorySize = sizeof(decltype(pushConstants.back()));
+            uint32_t size = pushConstants.size();
+            uint32_t totalSize = size * memorySize;
+            this->mPushConstantsData = malloc(totalSize);
+            memcpy(this->mPushConstantsData, pushConstants.data(), totalSize);
+            this->mPushConstantsDataTypeMemorySize = memorySize;
+            this->mPushConstantsSize = size;
+        }
+
+        this->setWorkgroup(workgroup,
+                           this->mTensors.size() ? this->mTensors[0]->size() : 1);
+
+        // Descriptor pool is created first so if available then destroy all before
+        // rebuild
+        if (this->isInit()) {
+            this->destroy();
+        }
+
+        this->createParameters();
+        this->createShaderModule();
+        this->createPipeline();
+    }
 
     /**
      * Destructor for Algorithm which is responsible for freeing and desroying
@@ -116,7 +180,29 @@ class Algorithm
      * next bindPush(...) calls. The constants provided must be of the same size
      * as the ones created during initialization.
      */
-    void setPush(const Constants& pushConstants);
+    template<typename T>
+    void setPushConstants(const std::vector<T>& pushConstants)
+    {
+
+        if (pushConstants.size() != this->mPushConstantsSize) {
+            throw std::runtime_error(
+              fmt::format("Kompute Algorithm push "
+                          "constant provided is size {} but expected size {}",
+                          pushConstants.size(),
+                          this->mPushConstantsSize));
+        }
+        if (this->mPushConstantsData) {
+            free(this->mPushConstantsData);
+        }
+
+        uint32_t memorySize = sizeof(decltype(pushConstants.back()));
+        uint32_t size = pushConstants.size();
+        uint32_t totalSize = size * memorySize;
+        this->mPushConstantsData = malloc(totalSize);
+        memcpy(this->mPushConstantsData, pushConstants.data(), totalSize);
+        this->mPushConstantsDataTypeMemorySize = memorySize;
+        this->mPushConstantsSize = size;
+    }
 
     /**
      * Gets the current workgroup from the algorithm.
@@ -131,13 +217,23 @@ class Algorithm
      *
      * @returns The kp::Constants currently set for specialization constants
      */
-    const Constants& getSpecializationConstants();
+    template<typename T>
+    const std::vector<T> getSpecializationConstants()
+    {
+        return { (T*)this->mSpecializationConstantsData,
+            ((T*)this->mSpecializationConstantsData) + this->mSpecializationConstantsSize };
+    }
     /**
      * Gets the specialization constants of the current algorithm.
      *
      * @returns The kp::Constants currently set for push constants
      */
-    const Constants& getPush();
+    template<typename T>
+    const std::vector<T> getPushConstants()
+    {
+        return { (T*)this->mPushConstantsData,
+            ((T*)this->mPushConstantsData) + this->mPushConstantsSize };
+    }
     /**
      * Gets the current tensors that are used in the algorithm.
      *
@@ -170,8 +266,12 @@ class Algorithm
 
     // -------------- ALWAYS OWNED RESOURCES
     std::vector<uint32_t> mSpirv;
-    Constants mSpecializationConstants;
-    Constants mPushConstants;
+    void* mSpecializationConstantsData = nullptr;
+    uint32_t mSpecializationConstantsDataTypeMemorySize = 0;
+    uint32_t mSpecializationConstantsSize = 0;
+    void* mPushConstantsData = nullptr;
+    uint32_t mPushConstantsDataTypeMemorySize = 0;
+    uint32_t mPushConstantsSize = 0;
     Workgroup mWorkgroup;
 
     // Create util functions
diff --git a/src/include/kompute/operations/OpMult.hpp b/src/include/kompute/operations/OpMult.hpp
index 97b29cad9..2d6b88057 100644
--- a/src/include/kompute/operations/OpMult.hpp
+++ b/src/include/kompute/operations/OpMult.hpp
@@ -45,7 +45,7 @@ class OpMult : public OpAlgoDispatch
           (uint32_t*)(shader_data::shaders_glsl_opmult_comp_spv +
             kp::shader_data::shaders_glsl_opmult_comp_spv_len));
 
-        algorithm->rebuild(tensors, spirv);
+        algorithm->rebuild<>(tensors, spirv);
     }
 
     /**

From 2e1275e085ad4ef2b61d3b4b77a199724e0f6d6b Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sun, 12 Sep 2021 09:42:07 +0100
Subject: [PATCH 02/19] Cleanup of logging

Signed-off-by: Alejandro Saucedo <axsauze@gmail.com>
---
 src/Algorithm.cpp | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp
index 69ab5f7ad..a59f34f75 100644
--- a/src/Algorithm.cpp
+++ b/src/Algorithm.cpp
@@ -340,19 +340,12 @@ Algorithm::recordBindPush(const vk::CommandBuffer& commandBuffer)
     if (this->mPushConstantsSize) {
         KP_LOG_DEBUG("Kompute Algorithm binding push constants size: {}",
                      this->mPushConstantsSize);
-        KP_LOG_DEBUG("{} {}",
-                     this->mPushConstantsDataTypeMemorySize,
-                     this->mPushConstantsData == nullptr);
-        KP_LOG_DEBUG("{}",
-                     ((float*)this->mPushConstantsData)[0]);
 
         commandBuffer.pushConstants(*this->mPipelineLayout,
                                     vk::ShaderStageFlagBits::eCompute,
                                     0,
                                     this->mPushConstantsSize * this->mPushConstantsDataTypeMemorySize,
                                     this->mPushConstantsData);
-        KP_LOG_DEBUG("Constants bound: {}",
-                     this->mPushConstantsSize);
     }
 }
 

From c23573eb47e4f9684baf9f3da4654a62c431c62e Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sun, 12 Sep 2021 09:51:36 +0100
Subject: [PATCH 03/19] Added template function for algorithm on manager

Signed-off-by: Alejandro Saucedo <axsauze@gmail.com>
---
 single_include/kompute/Kompute.hpp | 39 ++++++++++++++++++++++++++----
 src/Manager.cpp                    | 26 --------------------
 src/include/kompute/Manager.hpp    | 39 ++++++++++++++++++++++++++----
 3 files changed, 68 insertions(+), 36 deletions(-)

diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp
index a68cff1e6..97385e4fc 100755
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@@ -2181,6 +2181,16 @@ class Manager
         return tensor;
     }
 
+    std::shared_ptr<Algorithm> algorithm(
+      const std::vector<std::shared_ptr<Tensor>>& tensors = {},
+      const std::vector<uint32_t>& spirv = {},
+      const Workgroup& workgroup = {},
+      const std::vector<float>& specializationConstants = {},
+      const std::vector<float>& pushConstants = {})
+    {
+        return this->algorithm<>(tensors, spirv, workgroup, specializationConstants, pushConstants);
+    }
+
     /**
      * Create a managed algorithm that will be destroyed by this manager
      * if it hasn't been destroyed by its reference count going to zero.
@@ -2195,12 +2205,31 @@ class Manager
      * and defaults to an empty constant
      * @returns Shared pointer with initialised algorithm
      */
+    template<typename S = float, typename P = float>
     std::shared_ptr<Algorithm> algorithm(
-      const std::vector<std::shared_ptr<Tensor>>& tensors = {},
-      const std::vector<uint32_t>& spirv = {},
-      const Workgroup& workgroup = {},
-      const Constants& specializationConstants = {},
-      const Constants& pushConstants = {});
+      const std::vector<std::shared_ptr<Tensor>>& tensors,
+      const std::vector<uint32_t>& spirv,
+      const Workgroup& workgroup,
+      const std::vector<S>& specializationConstants,
+      const std::vector<P>& pushConstants)
+    {
+
+        KP_LOG_DEBUG("Kompute Manager algorithm creation triggered");
+
+        std::shared_ptr<Algorithm> algorithm{ new kp::Algorithm(
+          this->mDevice,
+          tensors,
+          spirv,
+          workgroup,
+          specializationConstants,
+          pushConstants) };
+
+        if (this->mManageResources) {
+            this->mManagedAlgorithms.push_back(algorithm);
+        }
+
+        return algorithm;
+    }
 
     /**
      * Destroy the GPU resources and all managed resources by manager.
diff --git a/src/Manager.cpp b/src/Manager.cpp
index 8e8367c30..a553d667f 100644
--- a/src/Manager.cpp
+++ b/src/Manager.cpp
@@ -422,32 +422,6 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices,
     KP_LOG_DEBUG("Kompute Manager compute queue obtained");
 }
 
-// TODO: Update to template
-std::shared_ptr<Algorithm>
-Manager::algorithm(const std::vector<std::shared_ptr<Tensor>>& tensors,
-                   const std::vector<uint32_t>& spirv,
-                   const Workgroup& workgroup,
-                   const Constants& specializationConstants,
-                   const Constants& pushConstants)
-{
-
-    KP_LOG_DEBUG("Kompute Manager algorithm creation triggered");
-
-    std::shared_ptr<Algorithm> algorithm{ new kp::Algorithm(
-      this->mDevice,
-      tensors,
-      spirv,
-      workgroup,
-      specializationConstants,
-      pushConstants) };
-
-    if (this->mManageResources) {
-        this->mManagedAlgorithms.push_back(algorithm);
-    }
-
-    return algorithm;
-}
-
 std::shared_ptr<Sequence>
 Manager::sequence(uint32_t queueIndex, uint32_t totalTimestamps)
 {
diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp
index 62f98d6d5..8a4244a2b 100644
--- a/src/include/kompute/Manager.hpp
+++ b/src/include/kompute/Manager.hpp
@@ -123,6 +123,16 @@ class Manager
         return tensor;
     }
 
+    std::shared_ptr<Algorithm> algorithm(
+      const std::vector<std::shared_ptr<Tensor>>& tensors = {},
+      const std::vector<uint32_t>& spirv = {},
+      const Workgroup& workgroup = {},
+      const std::vector<float>& specializationConstants = {},
+      const std::vector<float>& pushConstants = {})
+    {
+        return this->algorithm<>(tensors, spirv, workgroup, specializationConstants, pushConstants);
+    }
+
     /**
      * Create a managed algorithm that will be destroyed by this manager
      * if it hasn't been destroyed by its reference count going to zero.
@@ -137,12 +147,31 @@ class Manager
      * and defaults to an empty constant
      * @returns Shared pointer with initialised algorithm
      */
+    template<typename S = float, typename P = float>
     std::shared_ptr<Algorithm> algorithm(
-      const std::vector<std::shared_ptr<Tensor>>& tensors = {},
-      const std::vector<uint32_t>& spirv = {},
-      const Workgroup& workgroup = {},
-      const Constants& specializationConstants = {},
-      const Constants& pushConstants = {});
+      const std::vector<std::shared_ptr<Tensor>>& tensors,
+      const std::vector<uint32_t>& spirv,
+      const Workgroup& workgroup,
+      const std::vector<S>& specializationConstants,
+      const std::vector<P>& pushConstants)
+    {
+
+        KP_LOG_DEBUG("Kompute Manager algorithm creation triggered");
+
+        std::shared_ptr<Algorithm> algorithm{ new kp::Algorithm(
+          this->mDevice,
+          tensors,
+          spirv,
+          workgroup,
+          specializationConstants,
+          pushConstants) };
+
+        if (this->mManageResources) {
+            this->mManagedAlgorithms.push_back(algorithm);
+        }
+
+        return algorithm;
+    }
 
     /**
      * Destroy the GPU resources and all managed resources by manager.

From 858a70d9b8bf387070fd6c94a3553d573e6fdd8e Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sun, 12 Sep 2021 11:31:32 +0100
Subject: [PATCH 04/19] Added tests for push constants of all and mixed types

Signed-off-by: Alejandro Saucedo <axsauze@gmail.com>
---
 Makefile                                      |   8 +-
 single_include/kompute/Kompute.hpp            |  42 +++-
 src/Algorithm.cpp                             |   4 +-
 src/OpAlgoDispatch.cpp                        |  20 +-
 src/include/kompute/Algorithm.hpp             |  20 +-
 .../kompute/operations/OpAlgoDispatch.hpp     |  22 +-
 test/TestPushConstant.cpp                     | 223 ++++++++++++++++++
 7 files changed, 304 insertions(+), 35 deletions(-)

diff --git a/Makefile b/Makefile
index b6ff3ea58..0f23f75ec 100644
--- a/Makefile
+++ b/Makefile
@@ -12,8 +12,8 @@ VERSION := $(shell cat ./VERSION)
 VCPKG_WIN_PATH ?= "C:\\Users\\axsau\\Programming\\lib\\vcpkg\\scripts\\buildsystems\\vcpkg.cmake"
 VCPKG_UNIX_PATH ?= "/c/Users/axsau/Programming/lib/vcpkg/scripts/buildsystems/vcpkg.cmake"
 
-# Regext to pass to catch2 to filter tests
-FILTER_TESTS ?= "-TestAsyncOperations.TestManagerParallelExecution:TestSequence.SequenceTimestamps"
+# These are the tests that don't work with swiftshader but can be run directly with vulkan
+FILTER_TESTS ?= "-TestAsyncOperations.TestManagerParallelExecution:TestSequence.SequenceTimestamps:TestPushConstants:TestConstantsDouble"
 
 ifeq ($(OS),Windows_NT)     # is Windows_NT on XP, 2000, 7, Vista, 10...
 	CMAKE_BIN ?= "C:\Program Files\CMake\bin\cmake.exe"
@@ -105,7 +105,7 @@ mk_run_tests_cpu: mk_build_swiftshader_library mk_build_tests mk_run_tests_cpu_o
 VS_BUILD_TYPE ?= "Debug"
 # Run with multiprocessin / parallel build by default
 VS_CMAKE_EXTRA_FLAGS ?= ""
-VS_KOMPUTE_EXTRA_CXX_FLAGS ?= "/MP" # /MP is for faster multiprocessing builds. You should add "/MT" for submodule builds for compatibility with gtest
+VS_KOMPUTE_EXTRA_CXX_FLAGS ?= "/MT" # /MP is for faster multiprocessing builds. You should add "/MT" for submodule builds for compatibility with gtest
 VS_INSTALL_PATH ?= "build/src/CMakeFiles/Export/" # Set to "" if prefer default
 
 vs_cmake:
@@ -116,7 +116,7 @@ vs_cmake:
 		-DKOMPUTE_EXTRA_CXX_FLAGS=$(VS_KOMPUTE_EXTRA_CXX_FLAGS) \
 		-DCMAKE_INSTALL_PREFIX=$(VS_INSTALL_PATH) \
 		-DKOMPUTE_OPT_INSTALL=1 \
-		-DKOMPUTE_OPT_REPO_SUBMODULE_BUILD=0 \
+		-DKOMPUTE_OPT_REPO_SUBMODULE_BUILD=1 \
 		-DKOMPUTE_OPT_BUILD_TESTS=1 \
 		-DKOMPUTE_OPT_BUILD_SHADERS=1 \
 		-DKOMPUTE_OPT_BUILD_SINGLE_HEADER=1 \
diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp
index 97385e4fc..8cbfd404f 100755
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@@ -1246,23 +1246,29 @@ class Algorithm
     template<typename T>
     void setPushConstants(const std::vector<T>& pushConstants)
     {
+        uint32_t memorySize = sizeof(decltype(pushConstants.back()));
+        uint32_t size = pushConstants.size();
+        uint32_t totalSize = memorySize * size;
+        uint32_t previousTotalSize = this->mPushConstantsDataTypeMemorySize * this->mPushConstantsSize;
 
-        if (pushConstants.size() != this->mPushConstantsSize) {
+        if (totalSize != previousTotalSize) {
             throw std::runtime_error(
               fmt::format("Kompute Algorithm push "
-                          "constant provided is size {} but expected size {}",
-                          pushConstants.size(),
-                          this->mPushConstantsSize));
+                          "constant total memory size provided is {} but expected {} bytes",
+                          totalSize,
+                          previousTotalSize));
         }
         if (this->mPushConstantsData) {
             free(this->mPushConstantsData);
         }
 
-        uint32_t memorySize = sizeof(decltype(pushConstants.back()));
-        uint32_t size = pushConstants.size();
+        this->setPushConstants(pushConstants.data(), size, memorySize);
+    }
+
+    void setPushConstants(void* data, uint32_t size, uint32_t memorySize) {
         uint32_t totalSize = size * memorySize;
         this->mPushConstantsData = malloc(totalSize);
-        memcpy(this->mPushConstantsData, pushConstants.data(), totalSize);
+        memcpy(this->mPushConstantsData, data, totalSize);
         this->mPushConstantsDataTypeMemorySize = memorySize;
         this->mPushConstantsSize = size;
     }
@@ -1675,8 +1681,24 @@ class OpAlgoDispatch : public OpBase
      * @param algorithm The algorithm object to use for dispatch
      * @param pushConstants The push constants to use for override
      */
+    template<typename T = float>
     OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm,
-            const kp::Constants& pushConstants = {});
+            const std::vector<T>& pushConstants = {})
+    {
+        KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor");
+
+        this->mAlgorithm = algorithm;
+
+        if (pushConstants.size()) {
+            uint32_t memorySize = sizeof(decltype(pushConstants.back()));
+            uint32_t size = pushConstants.size();
+            uint32_t totalSize = size * memorySize;
+            this->mPushConstantsData = malloc(totalSize);
+            memcpy(this->mPushConstantsData, pushConstants.data(), totalSize);
+            this->mPushConstantsDataTypeMemorySize = memorySize;
+            this->mPushConstantsSize = size;
+        }
+    }
 
     /**
      * Default destructor, which is in charge of destroying the algorithm
@@ -1713,7 +1735,9 @@ class OpAlgoDispatch : public OpBase
 private:
     // -------------- ALWAYS OWNED RESOURCES
     std::shared_ptr<Algorithm> mAlgorithm;
-    Constants mPushConstants;
+    void* mPushConstantsData = nullptr;
+    uint32_t mPushConstantsDataTypeMemorySize = 0;
+    uint32_t mPushConstantsSize = 0;
 };
 
 } // End namespace kp
diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp
index a59f34f75..9179cffbd 100644
--- a/src/Algorithm.cpp
+++ b/src/Algorithm.cpp
@@ -338,8 +338,8 @@ void
 Algorithm::recordBindPush(const vk::CommandBuffer& commandBuffer)
 {
     if (this->mPushConstantsSize) {
-        KP_LOG_DEBUG("Kompute Algorithm binding push constants size: {}",
-                     this->mPushConstantsSize);
+        KP_LOG_DEBUG("Kompute Algorithm binding push constants memory size: {}",
+                     this->mPushConstantsSize * this->mPushConstantsDataTypeMemorySize);
 
         commandBuffer.pushConstants(*this->mPipelineLayout,
                                     vk::ShaderStageFlagBits::eCompute,
diff --git a/src/OpAlgoDispatch.cpp b/src/OpAlgoDispatch.cpp
index 15bfc05c9..c6099ff85 100644
--- a/src/OpAlgoDispatch.cpp
+++ b/src/OpAlgoDispatch.cpp
@@ -5,18 +5,13 @@
 
 namespace kp {
 
-OpAlgoDispatch::OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm,
-                               const kp::Constants& pushConstants)
-{
-    KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor");
-
-    this->mAlgorithm = algorithm;
-    this->mPushConstants = pushConstants;
-}
-
 OpAlgoDispatch::~OpAlgoDispatch()
 {
     KP_LOG_DEBUG("Kompute OpAlgoDispatch destructor started");
+
+    if (this->mPushConstantsData) {
+        free(this->mPushConstantsData);
+    }
 }
 
 void
@@ -35,8 +30,11 @@ OpAlgoDispatch::record(const vk::CommandBuffer& commandBuffer)
           vk::PipelineStageFlagBits::eComputeShader);
     }
 
-    if (this->mPushConstants.size()) {
-        this->mAlgorithm->setPushConstants(this->mPushConstants);
+    if (this->mPushConstantsSize) {
+        this->mAlgorithm->setPushConstants(
+                this->mPushConstantsData,
+                this->mPushConstantsSize,
+                this->mPushConstantsDataTypeMemorySize);
     }
 
     this->mAlgorithm->recordBindCore(commandBuffer);
diff --git a/src/include/kompute/Algorithm.hpp b/src/include/kompute/Algorithm.hpp
index 6bc49cef6..a0b2ba146 100644
--- a/src/include/kompute/Algorithm.hpp
+++ b/src/include/kompute/Algorithm.hpp
@@ -183,23 +183,29 @@ class Algorithm
     template<typename T>
     void setPushConstants(const std::vector<T>& pushConstants)
     {
+        uint32_t memorySize = sizeof(decltype(pushConstants.back()));
+        uint32_t size = pushConstants.size();
+        uint32_t totalSize = memorySize * size;
+        uint32_t previousTotalSize = this->mPushConstantsDataTypeMemorySize * this->mPushConstantsSize;
 
-        if (pushConstants.size() != this->mPushConstantsSize) {
+        if (totalSize != previousTotalSize) {
             throw std::runtime_error(
               fmt::format("Kompute Algorithm push "
-                          "constant provided is size {} but expected size {}",
-                          pushConstants.size(),
-                          this->mPushConstantsSize));
+                          "constant total memory size provided is {} but expected {} bytes",
+                          totalSize,
+                          previousTotalSize));
         }
         if (this->mPushConstantsData) {
             free(this->mPushConstantsData);
         }
 
-        uint32_t memorySize = sizeof(decltype(pushConstants.back()));
-        uint32_t size = pushConstants.size();
+        this->setPushConstants(pushConstants.data(), size, memorySize);
+    }
+
+    void setPushConstants(void* data, uint32_t size, uint32_t memorySize) {
         uint32_t totalSize = size * memorySize;
         this->mPushConstantsData = malloc(totalSize);
-        memcpy(this->mPushConstantsData, pushConstants.data(), totalSize);
+        memcpy(this->mPushConstantsData, data, totalSize);
         this->mPushConstantsDataTypeMemorySize = memorySize;
         this->mPushConstantsSize = size;
     }
diff --git a/src/include/kompute/operations/OpAlgoDispatch.hpp b/src/include/kompute/operations/OpAlgoDispatch.hpp
index 600b6116c..48acd6014 100644
--- a/src/include/kompute/operations/OpAlgoDispatch.hpp
+++ b/src/include/kompute/operations/OpAlgoDispatch.hpp
@@ -25,8 +25,24 @@ class OpAlgoDispatch : public OpBase
      * @param algorithm The algorithm object to use for dispatch
      * @param pushConstants The push constants to use for override
      */
+    template<typename T = float>
     OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm,
-            const kp::Constants& pushConstants = {});
+            const std::vector<T>& pushConstants = {})
+    {
+        KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor");
+
+        this->mAlgorithm = algorithm;
+
+        if (pushConstants.size()) {
+            uint32_t memorySize = sizeof(decltype(pushConstants.back()));
+            uint32_t size = pushConstants.size();
+            uint32_t totalSize = size * memorySize;
+            this->mPushConstantsData = malloc(totalSize);
+            memcpy(this->mPushConstantsData, pushConstants.data(), totalSize);
+            this->mPushConstantsDataTypeMemorySize = memorySize;
+            this->mPushConstantsSize = size;
+        }
+    }
 
     /**
      * Default destructor, which is in charge of destroying the algorithm
@@ -63,7 +79,9 @@ class OpAlgoDispatch : public OpBase
 private:
     // -------------- ALWAYS OWNED RESOURCES
     std::shared_ptr<Algorithm> mAlgorithm;
-    Constants mPushConstants;
+    void* mPushConstantsData = nullptr;
+    uint32_t mPushConstantsDataTypeMemorySize = 0;
+    uint32_t mPushConstantsSize = 0;
 };
 
 } // End namespace kp
diff --git a/test/TestPushConstant.cpp b/test/TestPushConstant.cpp
index 83b3d3d83..6d32fccaf 100644
--- a/test/TestPushConstant.cpp
+++ b/test/TestPushConstant.cpp
@@ -137,3 +137,226 @@ TEST(TestPushConstants, TestConstantsWrongSize)
         }
     }
 }
+
+// TODO: Ensure different types are considered for push constants
+// TEST(TestPushConstants, TestConstantsWrongType)
+// {
+//     {
+//         std::string shader(R"(
+//           #version 450
+//           layout(push_constant) uniform PushConstants {
+//             float x;
+//             float y;
+//             float z;
+//           } pcs;
+//           layout (local_size_x = 1) in;
+//           layout(set = 0, binding = 0) buffer a { float pa[]; };
+//           void main() {
+//               pa[0] += pcs.x;
+//               pa[1] += pcs.y;
+//               pa[2] += pcs.z;
+//           })");
+// 
+//         std::vector<uint32_t> spirv = compileSource(shader);
+// 
+//         std::shared_ptr<kp::Sequence> sq = nullptr;
+// 
+//         {
+//             kp::Manager mgr;
+// 
+//             std::shared_ptr<kp::TensorT<float>> tensor =
+//               mgr.tensor({ 0, 0, 0 });
+// 
+//             std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(
+//               { tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.0 });
+// 
+//             sq = mgr.sequence()->record<kp::OpTensorSyncDevice>({ tensor });
+// 
+//             EXPECT_THROW(sq->record<kp::OpAlgoDispatch>(
+//                            algo, std::vector<uint32_t>{ 1, 2, 3 }),
+//                          std::runtime_error);
+//         }
+//     }
+// }
+
+TEST(TestPushConstants, TestConstantsMixedTypes)
+{
+    {
+        std::string shader(R"(
+          #version 450
+          layout(push_constant) uniform PushConstants {
+            float x;
+            uint y;
+            int z;
+          } pcs;
+          layout (local_size_x = 1) in;
+          layout(set = 0, binding = 0) buffer a { float pa[]; };
+          void main() {
+              pa[0] += pcs.x;
+              pa[1] += pcs.y - 2147483000;
+              pa[2] += pcs.z;
+          })");
+
+        struct Params{float x; uint32_t y; int32_t z;};
+
+        std::vector<uint32_t> spirv = compileSource(shader);
+
+        std::shared_ptr<kp::Sequence> sq = nullptr;
+
+        {
+            kp::Manager mgr;
+
+            std::shared_ptr<kp::TensorT<float>> tensor =
+              mgr.tensorT<float>({ 0, 0, 0 });
+
+            std::shared_ptr<kp::Algorithm> algo = mgr.algorithm<float, Params>(
+              { tensor }, spirv, kp::Workgroup({ 1 }), {}, {{ 0, 0, 0 }});
+
+            sq = mgr.sequence()->eval<kp::OpTensorSyncDevice>({ tensor });
+
+            // We need to run this in sequence to avoid race condition
+            // We can't use atomicAdd as swiftshader doesn't support it for
+            // float
+            sq->eval<kp::OpAlgoDispatch>(algo, std::vector<Params>{{ 15.32, 2147483650, 10 }});
+            sq->eval<kp::OpAlgoDispatch>(algo, std::vector<Params>{{ 30.32, 2147483650, -3 }});
+            sq->eval<kp::OpTensorSyncLocal>({ tensor });
+
+            EXPECT_EQ(tensor->vector(), std::vector<float>({ 45.64, 1300, 7 }));
+        }
+    }
+}
+
+TEST(TestPushConstants, TestConstantsInt)
+{
+    {
+        std::string shader(R"(
+          #version 450
+          layout(push_constant) uniform PushConstants {
+            int x;
+            int y;
+            int z;
+          } pcs;
+          layout (local_size_x = 1) in;
+          layout(set = 0, binding = 0) buffer a { int pa[]; };
+          void main() {
+              pa[0] += pcs.x;
+              pa[1] += pcs.y;
+              pa[2] += pcs.z;
+          })");
+
+        std::vector<uint32_t> spirv = compileSource(shader);
+
+        std::shared_ptr<kp::Sequence> sq = nullptr;
+
+        {
+            kp::Manager mgr;
+
+            std::shared_ptr<kp::TensorT<int32_t>> tensor =
+              mgr.tensorT<int32_t>({ -1, -1, -1 });
+
+            std::shared_ptr<kp::Algorithm> algo = mgr.algorithm<int32_t , int32_t>(
+              { tensor }, spirv, kp::Workgroup({ 1 }), {}, {{ 0, 0, 0 }});
+
+            sq = mgr.sequence()->eval<kp::OpTensorSyncDevice>({ tensor });
+
+            // We need to run this in sequence to avoid race condition
+            // We can't use atomicAdd as swiftshader doesn't support it for
+            // float
+            sq->eval<kp::OpAlgoDispatch>(algo, std::vector<int32_t>{{ -1, -1, -1 }});
+            sq->eval<kp::OpAlgoDispatch>(algo, std::vector<int32_t>{{ -1, -1, -1 }});
+            sq->eval<kp::OpTensorSyncLocal>({ tensor });
+
+            EXPECT_EQ(tensor->vector(), std::vector<int32_t>({ -3, -3, -3 }));
+        }
+    }
+}
+
+TEST(TestPushConstants, TestConstantsUnsignedInt)
+{
+    {
+        std::string shader(R"(
+          #version 450
+          layout(push_constant) uniform PushConstants {
+            uint x;
+            uint y;
+            uint z;
+          } pcs;
+          layout (local_size_x = 1) in;
+          layout(set = 0, binding = 0) buffer a { uint pa[]; };
+          void main() {
+              pa[0] += pcs.x;
+              pa[1] += pcs.y;
+              pa[2] += pcs.z;
+          })");
+
+        std::vector<uint32_t> spirv = compileSource(shader);
+
+        std::shared_ptr<kp::Sequence> sq = nullptr;
+
+        {
+            kp::Manager mgr;
+
+            std::shared_ptr<kp::TensorT<uint32_t>> tensor =
+              mgr.tensorT<uint32_t>({ 0, 0, 0 });
+
+            std::shared_ptr<kp::Algorithm> algo = mgr.algorithm<uint32_t , uint32_t>(
+              { tensor }, spirv, kp::Workgroup({ 1 }), {}, {{ 0, 0, 0 }});
+
+            sq = mgr.sequence()->eval<kp::OpTensorSyncDevice>({ tensor });
+
+            // We need to run this in sequence to avoid race condition
+            // We can't use atomicAdd as swiftshader doesn't support it for
+            // float
+            sq->eval<kp::OpAlgoDispatch>(algo, std::vector<uint32_t>{{ 2147483650, 2147483650, 2147483650 }});
+            sq->eval<kp::OpAlgoDispatch>(algo, std::vector<uint32_t>{{ 5, 5, 5 }});
+            sq->eval<kp::OpTensorSyncLocal>({ tensor });
+
+            EXPECT_EQ(tensor->vector(), std::vector<uint32_t>({ 2147483655, 2147483655, 2147483655 }));
+        }
+    }
+}
+
+TEST(TestPushConstants, TestConstantsDouble)
+{
+    {
+        std::string shader(R"(
+          #version 450
+          layout(push_constant) uniform PushConstants {
+            double x;
+            double y;
+            double z;
+          } pcs;
+          layout (local_size_x = 1) in;
+          layout(set = 0, binding = 0) buffer a { double pa[]; };
+          void main() {
+              pa[0] += pcs.x;
+              pa[1] += pcs.y;
+              pa[2] += pcs.z;
+          })");
+
+        std::vector<uint32_t> spirv = compileSource(shader);
+
+        std::shared_ptr<kp::Sequence> sq = nullptr;
+
+        {
+            kp::Manager mgr;
+
+            std::shared_ptr<kp::TensorT<double>> tensor =
+              mgr.tensorT<double>({ 0, 0, 0 });
+
+            std::shared_ptr<kp::Algorithm> algo = mgr.algorithm<double, double>(
+              { tensor }, spirv, kp::Workgroup({ 1 }), {}, {{ 0, 0, 0 }});
+
+            sq = mgr.sequence()->eval<kp::OpTensorSyncDevice>({ tensor });
+
+            // We need to run this in sequence to avoid race condition
+            // We can't use atomicAdd as swiftshader doesn't support it for
+            // float
+            sq->eval<kp::OpAlgoDispatch>(algo, std::vector<double>{{ 1.1111222233334444, 2.1111222233334444, 3.1111222233334444 }});
+            sq->eval<kp::OpAlgoDispatch>(algo, std::vector<double>{{ 1.1111222233334444, 2.1111222233334444, 3.1111222233334444 }});
+            sq->eval<kp::OpTensorSyncLocal>({ tensor });
+
+            EXPECT_EQ(tensor->vector(), std::vector<double>({ 2.2222444466668888, 4.2222444466668888, 6.2222444466668888 }));
+        }
+    }
+}

From 76fc7cd1c4a67f1cff38714b1c5fc320d11cbbef Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sun, 12 Sep 2021 11:32:19 +0100
Subject: [PATCH 05/19] Renamed

Signed-off-by: Alejandro Saucedo <axsauze@gmail.com>
---
 test/TestPushConstant.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/TestPushConstant.cpp b/test/TestPushConstant.cpp
index 6d32fccaf..23d24c010 100644
--- a/test/TestPushConstant.cpp
+++ b/test/TestPushConstant.cpp
@@ -197,7 +197,7 @@ TEST(TestPushConstants, TestConstantsMixedTypes)
               pa[2] += pcs.z;
           })");
 
-        struct Params{float x; uint32_t y; int32_t z;};
+        struct TestConsts{float x; uint32_t y; int32_t z;};
 
         std::vector<uint32_t> spirv = compileSource(shader);
 
@@ -209,7 +209,7 @@ TEST(TestPushConstants, TestConstantsMixedTypes)
             std::shared_ptr<kp::TensorT<float>> tensor =
               mgr.tensorT<float>({ 0, 0, 0 });
 
-            std::shared_ptr<kp::Algorithm> algo = mgr.algorithm<float, Params>(
+            std::shared_ptr<kp::Algorithm> algo = mgr.algorithm<float, TestConsts>(
               { tensor }, spirv, kp::Workgroup({ 1 }), {}, {{ 0, 0, 0 }});
 
             sq = mgr.sequence()->eval<kp::OpTensorSyncDevice>({ tensor });
@@ -217,8 +217,8 @@ TEST(TestPushConstants, TestConstantsMixedTypes)
             // We need to run this in sequence to avoid race condition
             // We can't use atomicAdd as swiftshader doesn't support it for
             // float
-            sq->eval<kp::OpAlgoDispatch>(algo, std::vector<Params>{{ 15.32, 2147483650, 10 }});
-            sq->eval<kp::OpAlgoDispatch>(algo, std::vector<Params>{{ 30.32, 2147483650, -3 }});
+            sq->eval<kp::OpAlgoDispatch>(algo, std::vector<TestConsts>{{ 15.32, 2147483650, 10 }});
+            sq->eval<kp::OpAlgoDispatch>(algo, std::vector<TestConsts>{{ 30.32, 2147483650, -3 }});
             sq->eval<kp::OpTensorSyncLocal>({ tensor });
 
             EXPECT_EQ(tensor->vector(), std::vector<float>({ 45.64, 1300, 7 }));

From a30b6c53dd9fd9c2f9741c074be143a20391f954 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sun, 12 Sep 2021 11:33:57 +0100
Subject: [PATCH 06/19] Format

Signed-off-by: Alejandro Saucedo <axsauze@gmail.com>
---
 test/TestPushConstant.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/test/TestPushConstant.cpp b/test/TestPushConstant.cpp
index 23d24c010..c885f1d87 100644
--- a/test/TestPushConstant.cpp
+++ b/test/TestPushConstant.cpp
@@ -197,7 +197,11 @@ TEST(TestPushConstants, TestConstantsMixedTypes)
               pa[2] += pcs.z;
           })");
 
-        struct TestConsts{float x; uint32_t y; int32_t z;};
+        struct TestConsts{
+            float x;
+            uint32_t y;
+            int32_t z;
+        };
 
         std::vector<uint32_t> spirv = compileSource(shader);
 

From ac0f30191b3cd7082b64092905c8241de5ca1336 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sun, 12 Sep 2021 11:50:09 +0100
Subject: [PATCH 07/19] Updated push consts

Signed-off-by: Alejandro Saucedo <axsauze@gmail.com>
---
 test/TestMultipleAlgoExecutions.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp
index 7f63c208f..1fe6a6664 100644
--- a/test/TestMultipleAlgoExecutions.cpp
+++ b/test/TestMultipleAlgoExecutions.cpp
@@ -220,7 +220,7 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)
     EXPECT_EQ(tensorA->vector(), std::vector<float>({ 3, 3, 3 }));
 }
 
-TEST(TestAlgoUtils, TestAlgorithmUtilFunctions)
+TEST(TestMultipleAlgoExecutions, TestAlgorithmUtilFunctions)
 {
 
     kp::Manager mgr;
@@ -273,6 +273,6 @@ TEST(TestAlgoUtils, TestAlgorithmUtilFunctions)
                                    pushConsts);
 
     EXPECT_EQ(algorithm->getWorkgroup(), workgroup);
-    EXPECT_EQ(algorithm->getPush(), pushConsts);
-    EXPECT_EQ(algorithm->getSpecializationConstants(), specConsts);
+    EXPECT_EQ(algorithm->getPushConstants<float>(), pushConsts);
+    EXPECT_EQ(algorithm->getSpecializationConstants<float>(), specConsts);
 }

From 5ed26913011bcc665d75cf33b62168231e076033 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sun, 12 Sep 2021 12:11:33 +0100
Subject: [PATCH 08/19] Updated push const to validate same size params

Signed-off-by: Alejandro Saucedo <axsauze@gmail.com>
---
 single_include/kompute/Kompute.hpp | 11 ++++++-----
 src/include/kompute/Algorithm.hpp  | 11 ++++++-----
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp
index 8cbfd404f..67efbe708 100755
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@@ -1248,6 +1248,12 @@ class Algorithm
     {
         uint32_t memorySize = sizeof(decltype(pushConstants.back()));
         uint32_t size = pushConstants.size();
+
+        this->setPushConstants(pushConstants.data(), size, memorySize);
+    }
+
+    void setPushConstants(void* data, uint32_t size, uint32_t memorySize) {
+
         uint32_t totalSize = memorySize * size;
         uint32_t previousTotalSize = this->mPushConstantsDataTypeMemorySize * this->mPushConstantsSize;
 
@@ -1262,11 +1268,6 @@ class Algorithm
             free(this->mPushConstantsData);
         }
 
-        this->setPushConstants(pushConstants.data(), size, memorySize);
-    }
-
-    void setPushConstants(void* data, uint32_t size, uint32_t memorySize) {
-        uint32_t totalSize = size * memorySize;
         this->mPushConstantsData = malloc(totalSize);
         memcpy(this->mPushConstantsData, data, totalSize);
         this->mPushConstantsDataTypeMemorySize = memorySize;
diff --git a/src/include/kompute/Algorithm.hpp b/src/include/kompute/Algorithm.hpp
index a0b2ba146..9dd70b1b5 100644
--- a/src/include/kompute/Algorithm.hpp
+++ b/src/include/kompute/Algorithm.hpp
@@ -185,6 +185,12 @@ class Algorithm
     {
         uint32_t memorySize = sizeof(decltype(pushConstants.back()));
         uint32_t size = pushConstants.size();
+
+        this->setPushConstants(pushConstants.data(), size, memorySize);
+    }
+
+    void setPushConstants(void* data, uint32_t size, uint32_t memorySize) {
+
         uint32_t totalSize = memorySize * size;
         uint32_t previousTotalSize = this->mPushConstantsDataTypeMemorySize * this->mPushConstantsSize;
 
@@ -199,11 +205,6 @@ class Algorithm
             free(this->mPushConstantsData);
         }
 
-        this->setPushConstants(pushConstants.data(), size, memorySize);
-    }
-
-    void setPushConstants(void* data, uint32_t size, uint32_t memorySize) {
-        uint32_t totalSize = size * memorySize;
         this->mPushConstantsData = malloc(totalSize);
         memcpy(this->mPushConstantsData, data, totalSize);
         this->mPushConstantsDataTypeMemorySize = memorySize;

From 5193975e5c299d397af4dbe3f60da8f912697c73 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sun, 12 Sep 2021 12:22:42 +0100
Subject: [PATCH 09/19] Updated push const to validate same size params

Signed-off-by: Alejandro Saucedo <axsauze@gmail.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 0f23f75ec..64ae11155 100644
--- a/Makefile
+++ b/Makefile
@@ -13,7 +13,7 @@ VCPKG_WIN_PATH ?= "C:\\Users\\axsau\\Programming\\lib\\vcpkg\\scripts\\buildsyst
 VCPKG_UNIX_PATH ?= "/c/Users/axsau/Programming/lib/vcpkg/scripts/buildsystems/vcpkg.cmake"
 
 # These are the tests that don't work with swiftshader but can be run directly with vulkan
-FILTER_TESTS ?= "-TestAsyncOperations.TestManagerParallelExecution:TestSequence.SequenceTimestamps:TestPushConstants:TestConstantsDouble"
+FILTER_TESTS ?= "-TestAsyncOperations.TestManagerParallelExecution:TestSequence.SequenceTimestamps:TestPushConstants.TestConstantsDouble"
 
 ifeq ($(OS),Windows_NT)     # is Windows_NT on XP, 2000, 7, Vista, 10...
 	CMAKE_BIN ?= "C:\Program Files\CMake\bin\cmake.exe"

From 6113d286a9177b41e012f3a291651ff06a0f2607 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sun, 12 Sep 2021 13:13:20 +0100
Subject: [PATCH 10/19] Updated python to build

Signed-off-by: Alejandro Saucedo <axsauze@gmail.com>
---
 python/src/main.cpp                 |  1 -
 test/TestSpecializationConstant.cpp | 48 +++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/python/src/main.cpp b/python/src/main.cpp
index 846576adb..43c369555 100644
--- a/python/src/main.cpp
+++ b/python/src/main.cpp
@@ -61,7 +61,6 @@ PYBIND11_MODULE(kp, m) {
     py::class_<kp::Algorithm, std::shared_ptr<kp::Algorithm>>(m, "Algorithm", DOC(kp, Algorithm, Algorithm))
         .def("get_tensors", &kp::Algorithm::getTensors, DOC(kp, Algorithm, getTensors))
         .def("destroy", &kp::Algorithm::destroy, DOC(kp, Algorithm, destroy))
-        .def("get_spec_consts", &kp::Algorithm::getSpecializationConstants, DOC(kp, Algorithm, getSpecializationConstants))
         .def("is_init", &kp::Algorithm::isInit, DOC(kp, Algorithm, isInit));
 
     py::class_<kp::Tensor, std::shared_ptr<kp::Tensor>>(m, "Tensor", DOC(kp, Tensor))
diff --git a/test/TestSpecializationConstant.cpp b/test/TestSpecializationConstant.cpp
index 15da143a0..f57c221ab 100644
--- a/test/TestSpecializationConstant.cpp
+++ b/test/TestSpecializationConstant.cpp
@@ -53,3 +53,51 @@ TEST(TestSpecializationConstants, TestTwoConstants)
         }
     }
 }
+
+TEST(TestSpecializationConstants, TestConstantsInt)
+{
+    {
+        std::string shader(R"(
+          #version 450
+          layout (constant_id = 0) const float cOne = 1;
+          layout (constant_id = 1) const float cTwo = 1;
+          layout (local_size_x = 1) in;
+          layout(set = 0, binding = 0) buffer a { float pa[]; };
+          layout(set = 0, binding = 1) buffer b { float pb[]; };
+          void main() {
+              uint index = gl_GlobalInvocationID.x;
+              pa[index] = cOne;
+              pb[index] = cTwo;
+          })");
+
+        std::vector<uint32_t> spirv = compileSource(shader);
+
+        std::shared_ptr<kp::Sequence> sq = nullptr;
+
+        {
+            kp::Manager mgr;
+
+            std::shared_ptr<kp::TensorT<float>> tensorA =
+              mgr.tensor({ 0, 0, 0 });
+            std::shared_ptr<kp::TensorT<float>> tensorB =
+              mgr.tensor({ 0, 0, 0 });
+
+            std::vector<std::shared_ptr<kp::Tensor>> params = { tensorA,
+                                                                tensorB };
+
+            kp::Constants spec = kp::Constants({ 5.0, 0.3 });
+
+            std::shared_ptr<kp::Algorithm> algo =
+              mgr.algorithm(params, spirv, {}, spec);
+
+            sq = mgr.sequence()
+                   ->record<kp::OpTensorSyncDevice>(params)
+                   ->record<kp::OpAlgoDispatch>(algo)
+                   ->record<kp::OpTensorSyncLocal>(params)
+                   ->eval();
+
+            EXPECT_EQ(tensorA->vector(), std::vector<float>({ 5, 5, 5 }));
+            EXPECT_EQ(tensorB->vector(), std::vector<float>({ 0.3, 0.3, 0.3 }));
+        }
+    }
+}

From 1972f2c8f8a10d5656574e0d9f8123edfd3b50ea Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sun, 12 Sep 2021 13:57:54 +0100
Subject: [PATCH 11/19] Updated python build

Signed-off-by: Alejandro Saucedo <axsauze@gmail.com>
---
 python/src/main.cpp | 100 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 96 insertions(+), 4 deletions(-)

diff --git a/python/src/main.cpp b/python/src/main.cpp
index 43c369555..b0ef31191 100644
--- a/python/src/main.cpp
+++ b/python/src/main.cpp
@@ -178,8 +178,8 @@ PYBIND11_MODULE(kp, m) {
                              const std::vector<std::shared_ptr<kp::Tensor>>& tensors,
                              const py::bytes& spirv,
                              const kp::Workgroup& workgroup,
-                             const kp::Constants& spec_consts,
-                             const kp::Constants& push_consts) {
+                             const std::vector<float>& spec_consts,
+                             const std::vector<float>& push_consts) {
                     py::buffer_info info(py::buffer(spirv).request());
                     const char *data = reinterpret_cast<const char *>(info.ptr);
                     size_t length = static_cast<size_t>(info.size);
@@ -190,8 +190,100 @@ PYBIND11_MODULE(kp, m) {
             py::arg("tensors"),
             py::arg("spirv"),
             py::arg("workgroup") = kp::Workgroup(),
-            py::arg("spec_consts") = kp::Constants(),
-            py::arg("push_consts") = kp::Constants())
+            py::arg("spec_consts") = std::vector<float>(),
+            py::arg("push_consts") = std::vector<float>())
+        .def("algorithm_t", [np](kp::Manager& self,
+                             const std::vector<std::shared_ptr<kp::Tensor>>& tensors,
+                             const py::bytes& spirv,
+                             const kp::Workgroup& workgroup,
+                             const py::array& spec_consts,
+                             const py::array& push_consts) {
+
+                py::buffer_info info(py::buffer(spirv).request());
+                const char *data = reinterpret_cast<const char *>(info.ptr);
+                size_t length = static_cast<size_t>(info.size);
+                std::vector<uint32_t> spirvVec((uint32_t*)data, (uint32_t*)(data + length));
+
+                const py::buffer_info pushInfo        = push_consts.request();
+                const py::buffer_info specInfo        = spec_consts.request();
+
+                KP_LOG_DEBUG("Kompute Python Manager creating Algorithm_T with "
+                        "push consts data size {} dtype {} and spec const data size {} dtype {}",
+                        push_consts.size(), std::string(py::str(push_consts.dtype())),
+                        spec_consts.size(), std::string(py::str(spec_consts.dtype())));
+
+                // We have to iterate across a combination of parameters due to the lack of support for templating
+                if (spec_consts.dtype() == py::dtype::of<std::float_t>()) {
+                    std::vector<float> specConstsVec((float*)specInfo.ptr, ((float*)specInfo.ptr) + specInfo.size);
+                    if (spec_consts.dtype() == py::dtype::of<std::float_t>()) {
+                        std::vector<float> pushConstsVec((float*)pushInfo.ptr, ((float*)pushInfo.ptr) + pushInfo.size);
+                        return self.algorithm(tensors, spirvVec, workgroup, specConstsVec, pushConstsVec);
+                    } else if (spec_consts.dtype() == py::dtype::of<std::int32_t>()) {
+                        std::vector<int32_t> pushConstsVec((int32_t*)pushInfo.ptr, ((int32_t*)pushInfo.ptr) + pushInfo.size);
+                        return self.algorithm(tensors, spirvVec, workgroup, specConstsVec, pushConstsVec);
+                    } else if (spec_consts.dtype() == py::dtype::of<std::uint32_t>()) {
+                        std::vector<uint32_t> pushConstsVec((uint32_t*)pushInfo.ptr, ((uint32_t*)pushInfo.ptr) + pushInfo.size);
+                        return self.algorithm(tensors, spirvVec, workgroup, specConstsVec, pushConstsVec);
+                    } else if (spec_consts.dtype() == py::dtype::of<std::double_t>()) {
+                        std::vector<double> pushConstsVec((double*)pushInfo.ptr, ((double*)pushInfo.ptr) + pushInfo.size);
+                        return self.algorithm(tensors, spirvVec, workgroup, specConstsVec, pushConstsVec);
+                    }
+                } else if (spec_consts.dtype() == py::dtype::of<std::int32_t>()) {
+                    std::vector<int32_t> specconstsvec((int32_t*)specInfo.ptr, ((int32_t*)specInfo.ptr) + specInfo.size);
+                    if (spec_consts.dtype() == py::dtype::of<std::float_t>()) {
+                        std::vector<float> pushconstsvec((float*)pushInfo.ptr, ((float*)pushInfo.ptr) + pushInfo.size);
+                        return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec);
+                    } else if (spec_consts.dtype() == py::dtype::of<std::int32_t>()) {
+                        std::vector<int32_t> pushconstsvec((int32_t*)pushInfo.ptr, ((int32_t*)pushInfo.ptr) + pushInfo.size);
+                        return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec);
+                    } else if (spec_consts.dtype() == py::dtype::of<std::uint32_t>()) {
+                        std::vector<uint32_t> pushconstsvec((uint32_t*)pushInfo.ptr, ((uint32_t*)pushInfo.ptr) + pushInfo.size);
+                        return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec);
+                    } else if (spec_consts.dtype() == py::dtype::of<std::double_t>()) {
+                        std::vector<double> pushconstsvec((double*)pushInfo.ptr, ((double*)pushInfo.ptr) + pushInfo.size);
+                        return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec);
+                    }
+                } else if (spec_consts.dtype() == py::dtype::of<std::uint32_t>()) {
+                    std::vector<uint32_t> specconstsvec((uint32_t*)specInfo.ptr, ((uint32_t*)specInfo.ptr) + specInfo.size);
+                    if (spec_consts.dtype() == py::dtype::of<std::float_t>()) {
+                        std::vector<float> pushconstsvec((float*)pushInfo.ptr, ((float*)pushInfo.ptr) + pushInfo.size);
+                        return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec);
+                    } else if (spec_consts.dtype() == py::dtype::of<std::int32_t>()) {
+                        std::vector<int32_t> pushconstsvec((int32_t*)pushInfo.ptr, ((int32_t*)pushInfo.ptr) + pushInfo.size);
+                        return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec);
+                    } else if (spec_consts.dtype() == py::dtype::of<std::uint32_t>()) {
+                        std::vector<uint32_t> pushconstsvec((uint32_t*)pushInfo.ptr, ((uint32_t*)pushInfo.ptr) + pushInfo.size);
+                        return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec);
+                    } else if (spec_consts.dtype() == py::dtype::of<std::double_t>()) {
+                        std::vector<double> pushconstsvec((double*)pushInfo.ptr, ((double*)pushInfo.ptr) + pushInfo.size);
+                        return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec);
+                    }
+                } else if (spec_consts.dtype() == py::dtype::of<std::double_t>()) {
+                    std::vector<double> specconstsvec((double*)specInfo.ptr, ((double*)specInfo.ptr) + specInfo.size);
+                    if (spec_consts.dtype() == py::dtype::of<std::float_t>()) {
+                        std::vector<float> pushconstsvec((float*)pushInfo.ptr, ((float*)pushInfo.ptr) + pushInfo.size);
+                        return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec);
+                    } else if (spec_consts.dtype() == py::dtype::of<std::int32_t>()) {
+                        std::vector<float> pushconstsvec((int32_t*)pushInfo.ptr, ((int32_t*)pushInfo.ptr) + pushInfo.size);
+                        return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec);
+                    } else if (spec_consts.dtype() == py::dtype::of<std::uint32_t>()) {
+                        std::vector<float> pushconstsvec((uint32_t*)pushInfo.ptr, ((uint32_t*)pushInfo.ptr) + pushInfo.size);
+                        return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec);
+                    } else if (spec_consts.dtype() == py::dtype::of<std::double_t>()) {
+                        std::vector<float> pushconstsvec((double*)pushInfo.ptr, ((double*)pushInfo.ptr) + pushInfo.size);
+                        return self.algorithm(tensors, spirvVec, workgroup, specconstsvec, pushconstsvec);
+                    }
+                } else {
+                    // If reach then no valid dtype supported
+                    throw std::runtime_error("Kompute Python no valid dtype supported");
+                }
+            },
+            DOC(kp, Manager, algorithm),
+            py::arg("tensors"),
+            py::arg("spirv"),
+            py::arg("workgroup") = kp::Workgroup(),
+            py::arg("spec_consts") = std::vector<float>(),
+            py::arg("push_consts") = std::vector<float>())
         .def("list_devices", [](kp::Manager& self){
             const std::vector<vk::PhysicalDevice> devices = self.listDevices();
             py::list list;

From 3d320ff687a29b0165a431287958c06a9bc8a641 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sun, 12 Sep 2021 14:07:20 +0100
Subject: [PATCH 12/19] Updating python

Signed-off-by: Alejandro Saucedo <axsauze@gmail.com>
---
 single_include/kompute/Kompute.hpp | 520 -----------------------------
 src/OpAlgoDispatch.cpp             |   1 +
 2 files changed, 1 insertion(+), 520 deletions(-)

diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp
index 67efbe708..b202ab580 100755
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@@ -1793,523 +1793,3 @@ class OpMult : public OpAlgoDispatch
 };
 
 } // End namespace kp
-
-// SPDX-License-Identifier: Apache-2.0
-
-namespace kp {
-
-/**
- *  Container of operations that can be sent to GPU as batch
- */
-class Sequence : public std::enable_shared_from_this<Sequence>
-{
-  public:
-    /**
-     * Main constructor for sequence which requires core vulkan components to
-     * generate all dependent resources.
-     *
-     * @param physicalDevice Vulkan physical device
-     * @param device Vulkan logical device
-     * @param computeQueue Vulkan compute queue
-     * @param queueIndex Vulkan compute queue index in device
-     * @param totalTimestamps Maximum number of timestamps to allocate
-     */
-    Sequence(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
-             std::shared_ptr<vk::Device> device,
-             std::shared_ptr<vk::Queue> computeQueue,
-             uint32_t queueIndex,
-             uint32_t totalTimestamps = 0);
-    /**
-     * Destructor for sequence which is responsible for cleaning all subsequent
-     * owned operations.
-     */
-    ~Sequence();
-
-    /**
-     * Record function for operation to be added to the GPU queue in batch. This
-     * template requires classes to be derived from the OpBase class. This
-     * function also requires the Sequence to be recording, otherwise it will
-     * not be able to add the operation.
-     *
-     * @param op Object derived from kp::BaseOp that will be recoreded by the
-     * sequence which will be used when the operation is evaluated.
-     * @return shared_ptr<Sequence> of the Sequence class itself
-     */
-    std::shared_ptr<Sequence> record(std::shared_ptr<OpBase> op);
-
-    /**
-     * Record function for operation to be added to the GPU queue in batch. This
-     * template requires classes to be derived from the OpBase class. This
-     * function also requires the Sequence to be recording, otherwise it will
-     * not be able to add the operation.
-     *
-     * @param tensors Vector of tensors to use for the operation
-     * @param TArgs Template parameters that are used to initialise operation
-     * which allows for extensible configurations on initialisation.
-     * @return shared_ptr<Sequence> of the Sequence class itself
-     */
-    template<typename T, typename... TArgs>
-    std::shared_ptr<Sequence> record(
-      std::vector<std::shared_ptr<Tensor>> tensors,
-      TArgs&&... params)
-    {
-        std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
-        return this->record(op);
-    }
-    /**
-     * Record function for operation to be added to the GPU queue in batch. This
-     * template requires classes to be derived from the OpBase class. This
-     * function also requires the Sequence to be recording, otherwise it will
-     * not be able to add the operation.
-     *
-     * @param algorithm Algorithm to use for the record often used for OpAlgo
-     * operations
-     * @param TArgs Template parameters that are used to initialise operation
-     * which allows for extensible configurations on initialisation.
-     * @return shared_ptr<Sequence> of the Sequence class itself
-     */
-    template<typename T, typename... TArgs>
-    std::shared_ptr<Sequence> record(std::shared_ptr<Algorithm> algorithm,
-                                     TArgs&&... params)
-    {
-        std::shared_ptr<T> op{ new T(algorithm,
-                                     std::forward<TArgs>(params)...) };
-        return this->record(op);
-    }
-
-    /**
-     * Eval sends all the recorded and stored operations in the vector of
-     * operations into the gpu as a submit job synchronously (with a barrier).
-     *
-     * @return shared_ptr<Sequence> of the Sequence class itself
-     */
-    std::shared_ptr<Sequence> eval();
-
-    /**
-     * Resets all the recorded and stored operations, records the operation
-     * provided and submits into the gpu as a submit job synchronously (with a
-     * barrier).
-     *
-     * @return shared_ptr<Sequence> of the Sequence class itself
-     */
-    std::shared_ptr<Sequence> eval(std::shared_ptr<OpBase> op);
-
-    /**
-     * Eval sends all the recorded and stored operations in the vector of
-     * operations into the gpu as a submit job with a barrier.
-     *
-     * @param tensors Vector of tensors to use for the operation
-     * @param TArgs Template parameters that are used to initialise operation
-     * which allows for extensible configurations on initialisation.
-     * @return shared_ptr<Sequence> of the Sequence class itself
-     */
-    template<typename T, typename... TArgs>
-    std::shared_ptr<Sequence> eval(std::vector<std::shared_ptr<Tensor>> tensors,
-                                   TArgs&&... params)
-    {
-        std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
-        return this->eval(op);
-    }
-    /**
-     * Eval sends all the recorded and stored operations in the vector of
-     * operations into the gpu as a submit job with a barrier.
-     *
-     * @param algorithm Algorithm to use for the record often used for OpAlgo
-     * operations
-     * @param TArgs Template parameters that are used to initialise operation
-     * which allows for extensible configurations on initialisation.
-     * @return shared_ptr<Sequence> of the Sequence class itself
-     */
-    template<typename T, typename... TArgs>
-    std::shared_ptr<Sequence> eval(std::shared_ptr<Algorithm> algorithm,
-                                   TArgs&&... params)
-    {
-        std::shared_ptr<T> op{ new T(algorithm,
-                                     std::forward<TArgs>(params)...) };
-        return this->eval(op);
-    }
-
-    /**
-     * Eval Async sends all the recorded and stored operations in the vector of
-     * operations into the gpu as a submit job without a barrier. EvalAwait()
-     * must ALWAYS be called after to ensure the sequence is terminated
-     * correctly.
-     *
-     * @return Boolean stating whether execution was successful.
-     */
-    std::shared_ptr<Sequence> evalAsync();
-    /**
-     * Clears currnet operations to record provided one in the vector of
-     * operations into the gpu as a submit job without a barrier. EvalAwait()
-     * must ALWAYS be called after to ensure the sequence is terminated
-     * correctly.
-     *
-     * @return Boolean stating whether execution was successful.
-     */
-    std::shared_ptr<Sequence> evalAsync(std::shared_ptr<OpBase> op);
-    /**
-     * Eval sends all the recorded and stored operations in the vector of
-     * operations into the gpu as a submit job with a barrier.
-     *
-     * @param tensors Vector of tensors to use for the operation
-     * @param TArgs Template parameters that are used to initialise operation
-     * which allows for extensible configurations on initialisation.
-     * @return shared_ptr<Sequence> of the Sequence class itself
-     */
-    template<typename T, typename... TArgs>
-    std::shared_ptr<Sequence> evalAsync(
-      std::vector<std::shared_ptr<Tensor>> tensors,
-      TArgs&&... params)
-    {
-        std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
-        return this->evalAsync(op);
-    }
-    /**
-     * Eval sends all the recorded and stored operations in the vector of
-     * operations into the gpu as a submit job with a barrier.
-     *
-     * @param algorithm Algorithm to use for the record often used for OpAlgo
-     * operations
-     * @param TArgs Template parameters that are used to initialise operation
-     * which allows for extensible configurations on initialisation.
-     * @return shared_ptr<Sequence> of the Sequence class itself
-     */
-    template<typename T, typename... TArgs>
-    std::shared_ptr<Sequence> evalAsync(std::shared_ptr<Algorithm> algorithm,
-                                        TArgs&&... params)
-    {
-        std::shared_ptr<T> op{ new T(algorithm,
-                                     std::forward<TArgs>(params)...) };
-        return this->evalAsync(op);
-    }
-
-    /**
-     * Eval Await waits for the fence to finish processing and then once it
-     * finishes, it runs the postEval of all operations.
-     *
-     * @param waitFor Number of milliseconds to wait before timing out.
-     * @return shared_ptr<Sequence> of the Sequence class itself
-     */
-    std::shared_ptr<Sequence> evalAwait(uint64_t waitFor = UINT64_MAX);
-
-    /**
-     * Clear function clears all operations currently recorded and starts
-     * recording again.
-     */
-    void clear();
-
-    /**
-     * Return the timestamps that were latched at the beginning and
-     * after each operation during the last eval() call.
-     */
-    std::vector<std::uint64_t> getTimestamps();
-
-    /**
-     * Begins recording commands for commands to be submitted into the command
-     * buffer.
-     *
-     * @return Boolean stating whether execution was successful.
-     */
-    void begin();
-
-    /**
-     * Ends the recording and stops recording commands when the record command
-     * is sent.
-     *
-     * @return Boolean stating whether execution was successful.
-     */
-    void end();
-
-    /**
-     * Returns true if the sequence is currently in recording activated.
-     *
-     * @return Boolean stating if recording ongoing.
-     */
-    bool isRecording();
-
-    /**
-     * Returns true if the sequence has been initialised, and it's based on the
-     * GPU resources being refrenced.
-     *
-     * @return Boolean stating if is initialized
-     */
-    bool isInit();
-
-    /**
-     * Clears command buffer and triggers re-record of all the current
-     * operations saved, which is useful if the underlying kp::Tensors or
-     * kp::Algorithms are modified and need to be re-recorded.
-     */
-    void rerecord();
-
-    /**
-     * Returns true if the sequence is currently running - mostly used for async
-     * workloads.
-     *
-     * @return Boolean stating if currently running.
-     */
-    bool isRunning();
-
-    /**
-     * Destroys and frees the GPU resources which include the buffer and memory
-     * and sets the sequence as init=False.
-     */
-    void destroy();
-
-  private:
-    // -------------- NEVER OWNED RESOURCES
-    std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice = nullptr;
-    std::shared_ptr<vk::Device> mDevice = nullptr;
-    std::shared_ptr<vk::Queue> mComputeQueue = nullptr;
-    uint32_t mQueueIndex = -1;
-
-    // -------------- OPTIONALLY OWNED RESOURCES
-    std::shared_ptr<vk::CommandPool> mCommandPool = nullptr;
-    bool mFreeCommandPool = false;
-    std::shared_ptr<vk::CommandBuffer> mCommandBuffer = nullptr;
-    bool mFreeCommandBuffer = false;
-
-    // -------------- ALWAYS OWNED RESOURCES
-    vk::Fence mFence;
-    std::vector<std::shared_ptr<OpBase>> mOperations;
-    std::shared_ptr<vk::QueryPool> timestampQueryPool = nullptr;
-
-    // State
-    bool mRecording = false;
-    bool mIsRunning = false;
-
-    // Create functions
-    void createCommandPool();
-    void createCommandBuffer();
-    void createTimestampQueryPool(uint32_t totalTimestamps);
-};
-
-} // End namespace kp
-
-// SPDX-License-Identifier: Apache-2.0
-
-#include <set>
-#include <unordered_map>
-
-#define KP_DEFAULT_SESSION "DEFAULT"
-
-namespace kp {
-
-/**
-    Base orchestrator which creates and manages device and child components
-*/
-class Manager
-{
-  public:
-    /**
-        Base constructor and default used which creates the base resources
-       including choosing the device 0 by default.
-    */
-    Manager();
-
-    /**
-     * Similar to base constructor but allows for further configuration to use
-     * when creating the Vulkan resources.
-     *
-     * @param physicalDeviceIndex The index of the physical device to use
-     * @param familyQueueIndices (Optional) List of queue indices to add for
-     * explicit allocation
-     * @param desiredExtensions The desired extensions to load from
-     * physicalDevice
-     */
-    Manager(uint32_t physicalDeviceIndex,
-            const std::vector<uint32_t>& familyQueueIndices = {},
-            const std::vector<std::string>& desiredExtensions = {});
-
-    /**
-     * Manager constructor which allows your own vulkan application to integrate
-     * with the kompute use.
-     *
-     * @param instance Vulkan compute instance to base this application
-     * @param physicalDevice Vulkan physical device to use for application
-     * @param device Vulkan logical device to use for all base resources
-     * @param physicalDeviceIndex Index for vulkan physical device used
-     */
-    Manager(std::shared_ptr<vk::Instance> instance,
-            std::shared_ptr<vk::PhysicalDevice> physicalDevice,
-            std::shared_ptr<vk::Device> device);
-
-    /**
-     * Manager destructor which would ensure all owned resources are destroyed
-     * unless explicitly stated that resources should not be destroyed or freed.
-     */
-    ~Manager();
-
-    /**
-     * Create a managed sequence that will be destroyed by this manager
-     * if it hasn't been destroyed by its reference count going to zero.
-     *
-     * @param queueIndex The queue to use from the available queues
-     * @param nrOfTimestamps The maximum number of timestamps to allocate.
-     * If zero (default), disables latching of timestamps.
-     * @returns Shared pointer with initialised sequence
-     */
-    std::shared_ptr<Sequence> sequence(uint32_t queueIndex = 0,
-                                       uint32_t totalTimestamps = 0);
-
-    /**
-     * Create a managed tensor that will be destroyed by this manager
-     * if it hasn't been destroyed by its reference count going to zero.
-     *
-     * @param data The data to initialize the tensor with
-     * @param tensorType The type of tensor to initialize
-     * @returns Shared pointer with initialised tensor
-     */
-    template<typename T>
-    std::shared_ptr<TensorT<T>> tensorT(
-      const std::vector<T>& data,
-      Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
-    {
-        KP_LOG_DEBUG("Kompute Manager tensor creation triggered");
-
-        std::shared_ptr<TensorT<T>> tensor{ new kp::TensorT<T>(
-          this->mPhysicalDevice, this->mDevice, data, tensorType) };
-
-        if (this->mManageResources) {
-            this->mManagedTensors.push_back(tensor);
-        }
-
-        return tensor;
-    }
-
-    std::shared_ptr<TensorT<float>> tensor(
-      const std::vector<float>& data,
-      Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
-    {
-        return this->tensorT<float>(data, tensorType);
-    }
-
-    std::shared_ptr<Tensor> tensor(
-      void* data,
-      uint32_t elementTotalCount,
-      uint32_t elementMemorySize,
-      const Tensor::TensorDataTypes& dataType,
-      Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
-    {
-        std::shared_ptr<Tensor> tensor{ new kp::Tensor(this->mPhysicalDevice,
-                                                       this->mDevice,
-                                                       data,
-                                                       elementTotalCount,
-                                                       elementMemorySize,
-                                                       dataType,
-                                                       tensorType) };
-
-        if (this->mManageResources) {
-            this->mManagedTensors.push_back(tensor);
-        }
-
-        return tensor;
-    }
-
-    std::shared_ptr<Algorithm> algorithm(
-      const std::vector<std::shared_ptr<Tensor>>& tensors = {},
-      const std::vector<uint32_t>& spirv = {},
-      const Workgroup& workgroup = {},
-      const std::vector<float>& specializationConstants = {},
-      const std::vector<float>& pushConstants = {})
-    {
-        return this->algorithm<>(tensors, spirv, workgroup, specializationConstants, pushConstants);
-    }
-
-    /**
-     * Create a managed algorithm that will be destroyed by this manager
-     * if it hasn't been destroyed by its reference count going to zero.
-     *
-     * @param tensors (optional) The tensors to initialise the algorithm with
-     * @param spirv (optional) The SPIRV bytes for the algorithm to dispatch
-     * @param workgroup (optional) kp::Workgroup for algorithm to use, and
-     * defaults to (tensor[0].size(), 1, 1)
-     * @param specializationConstants (optional) kp::Constant to use for
-     * specialization constants, and defaults to an empty constant
-     * @param pushConstants (optional) kp::Constant to use for push constants,
-     * and defaults to an empty constant
-     * @returns Shared pointer with initialised algorithm
-     */
-    template<typename S = float, typename P = float>
-    std::shared_ptr<Algorithm> algorithm(
-      const std::vector<std::shared_ptr<Tensor>>& tensors,
-      const std::vector<uint32_t>& spirv,
-      const Workgroup& workgroup,
-      const std::vector<S>& specializationConstants,
-      const std::vector<P>& pushConstants)
-    {
-
-        KP_LOG_DEBUG("Kompute Manager algorithm creation triggered");
-
-        std::shared_ptr<Algorithm> algorithm{ new kp::Algorithm(
-          this->mDevice,
-          tensors,
-          spirv,
-          workgroup,
-          specializationConstants,
-          pushConstants) };
-
-        if (this->mManageResources) {
-            this->mManagedAlgorithms.push_back(algorithm);
-        }
-
-        return algorithm;
-    }
-
-    /**
-     * Destroy the GPU resources and all managed resources by manager.
-     **/
-    void destroy();
-    /**
-     * Run a pseudo-garbage collection to release all the managed resources
-     * that have been already freed due to these reaching to zero ref count.
-     **/
-    void clear();
-
-    /**
-     * Information about the current device.
-     *
-     * @return vk::PhysicalDeviceProperties containing information about the device
-     **/
-    vk::PhysicalDeviceProperties getDeviceProperties() const;
-
-    /**
-     * List the devices available in the current vulkan instance.
-     *
-     * @return vector of physical devices containing their respective properties
-     **/
-    std::vector<vk::PhysicalDevice> listDevices() const;
-
-  private:
-    // -------------- OPTIONALLY OWNED RESOURCES
-    std::shared_ptr<vk::Instance> mInstance = nullptr;
-    bool mFreeInstance = false;
-    std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice = nullptr;
-    std::shared_ptr<vk::Device> mDevice = nullptr;
-    bool mFreeDevice = false;
-
-    // -------------- ALWAYS OWNED RESOURCES
-    std::vector<std::weak_ptr<Tensor>> mManagedTensors;
-    std::vector<std::weak_ptr<Sequence>> mManagedSequences;
-    std::vector<std::weak_ptr<Algorithm>> mManagedAlgorithms;
-
-    std::vector<uint32_t> mComputeQueueFamilyIndices;
-    std::vector<std::shared_ptr<vk::Queue>> mComputeQueues;
-
-    bool mManageResources = false;
-
-#if DEBUG
-#ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS
-    vk::DebugReportCallbackEXT mDebugReportCallback;
-    vk::DispatchLoaderDynamic mDebugDispatcher;
-#endif
-#endif
-
-    // Create functions
-    void createInstance();
-    void createDevice(const std::vector<uint32_t>& familyQueueIndices = {},
-                      uint32_t hysicalDeviceIndex = 0,
-                      const std::vector<std::string>& desiredExtensions = {});
-};
-
-} // End namespace kp
diff --git a/src/OpAlgoDispatch.cpp b/src/OpAlgoDispatch.cpp
index c6099ff85..88d6e55fb 100644
--- a/src/OpAlgoDispatch.cpp
+++ b/src/OpAlgoDispatch.cpp
@@ -10,6 +10,7 @@ OpAlgoDispatch::~OpAlgoDispatch()
     KP_LOG_DEBUG("Kompute OpAlgoDispatch destructor started");
 
     if (this->mPushConstantsData) {
+        KP_LOG_DEBUG("Kompute freeing push constants data");
         free(this->mPushConstantsData);
     }
 }

From 559b83e07fafa13bb672b760857c4f56a17e9873 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sun, 12 Sep 2021 14:11:57 +0100
Subject: [PATCH 13/19] Fixing deleted sequence from header

Signed-off-by: Alejandro Saucedo <axsauze@gmail.com>
---
 single_include/kompute/Kompute.hpp | 520 +++++++++++++++++++++++++++++
 1 file changed, 520 insertions(+)

diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp
index b202ab580..67efbe708 100755
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@@ -1793,3 +1793,523 @@ class OpMult : public OpAlgoDispatch
 };
 
 } // End namespace kp
+
+// SPDX-License-Identifier: Apache-2.0
+
+namespace kp {
+
+/**
+ *  Container of operations that can be sent to GPU as batch
+ */
+class Sequence : public std::enable_shared_from_this<Sequence>
+{
+  public:
+    /**
+     * Main constructor for sequence which requires core vulkan components to
+     * generate all dependent resources.
+     *
+     * @param physicalDevice Vulkan physical device
+     * @param device Vulkan logical device
+     * @param computeQueue Vulkan compute queue
+     * @param queueIndex Vulkan compute queue index in device
+     * @param totalTimestamps Maximum number of timestamps to allocate
+     */
+    Sequence(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
+             std::shared_ptr<vk::Device> device,
+             std::shared_ptr<vk::Queue> computeQueue,
+             uint32_t queueIndex,
+             uint32_t totalTimestamps = 0);
+    /**
+     * Destructor for sequence which is responsible for cleaning all subsequent
+     * owned operations.
+     */
+    ~Sequence();
+
+    /**
+     * Record function for operation to be added to the GPU queue in batch. This
+     * template requires classes to be derived from the OpBase class. This
+     * function also requires the Sequence to be recording, otherwise it will
+     * not be able to add the operation.
+     *
+     * @param op Object derived from kp::BaseOp that will be recoreded by the
+     * sequence which will be used when the operation is evaluated.
+     * @return shared_ptr<Sequence> of the Sequence class itself
+     */
+    std::shared_ptr<Sequence> record(std::shared_ptr<OpBase> op);
+
+    /**
+     * Record function for operation to be added to the GPU queue in batch. This
+     * template requires classes to be derived from the OpBase class. This
+     * function also requires the Sequence to be recording, otherwise it will
+     * not be able to add the operation.
+     *
+     * @param tensors Vector of tensors to use for the operation
+     * @param TArgs Template parameters that are used to initialise operation
+     * which allows for extensible configurations on initialisation.
+     * @return shared_ptr<Sequence> of the Sequence class itself
+     */
+    template<typename T, typename... TArgs>
+    std::shared_ptr<Sequence> record(
+      std::vector<std::shared_ptr<Tensor>> tensors,
+      TArgs&&... params)
+    {
+        std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
+        return this->record(op);
+    }
+    /**
+     * Record function for operation to be added to the GPU queue in batch. This
+     * template requires classes to be derived from the OpBase class. This
+     * function also requires the Sequence to be recording, otherwise it will
+     * not be able to add the operation.
+     *
+     * @param algorithm Algorithm to use for the record often used for OpAlgo
+     * operations
+     * @param TArgs Template parameters that are used to initialise operation
+     * which allows for extensible configurations on initialisation.
+     * @return shared_ptr<Sequence> of the Sequence class itself
+     */
+    template<typename T, typename... TArgs>
+    std::shared_ptr<Sequence> record(std::shared_ptr<Algorithm> algorithm,
+                                     TArgs&&... params)
+    {
+        std::shared_ptr<T> op{ new T(algorithm,
+                                     std::forward<TArgs>(params)...) };
+        return this->record(op);
+    }
+
+    /**
+     * Eval sends all the recorded and stored operations in the vector of
+     * operations into the gpu as a submit job synchronously (with a barrier).
+     *
+     * @return shared_ptr<Sequence> of the Sequence class itself
+     */
+    std::shared_ptr<Sequence> eval();
+
+    /**
+     * Resets all the recorded and stored operations, records the operation
+     * provided and submits into the gpu as a submit job synchronously (with a
+     * barrier).
+     *
+     * @return shared_ptr<Sequence> of the Sequence class itself
+     */
+    std::shared_ptr<Sequence> eval(std::shared_ptr<OpBase> op);
+
+    /**
+     * Eval sends all the recorded and stored operations in the vector of
+     * operations into the gpu as a submit job with a barrier.
+     *
+     * @param tensors Vector of tensors to use for the operation
+     * @param TArgs Template parameters that are used to initialise operation
+     * which allows for extensible configurations on initialisation.
+     * @return shared_ptr<Sequence> of the Sequence class itself
+     */
+    template<typename T, typename... TArgs>
+    std::shared_ptr<Sequence> eval(std::vector<std::shared_ptr<Tensor>> tensors,
+                                   TArgs&&... params)
+    {
+        std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
+        return this->eval(op);
+    }
+    /**
+     * Eval sends all the recorded and stored operations in the vector of
+     * operations into the gpu as a submit job with a barrier.
+     *
+     * @param algorithm Algorithm to use for the record often used for OpAlgo
+     * operations
+     * @param TArgs Template parameters that are used to initialise operation
+     * which allows for extensible configurations on initialisation.
+     * @return shared_ptr<Sequence> of the Sequence class itself
+     */
+    template<typename T, typename... TArgs>
+    std::shared_ptr<Sequence> eval(std::shared_ptr<Algorithm> algorithm,
+                                   TArgs&&... params)
+    {
+        std::shared_ptr<T> op{ new T(algorithm,
+                                     std::forward<TArgs>(params)...) };
+        return this->eval(op);
+    }
+
+    /**
+     * Eval Async sends all the recorded and stored operations in the vector of
+     * operations into the gpu as a submit job without a barrier. EvalAwait()
+     * must ALWAYS be called after to ensure the sequence is terminated
+     * correctly.
+     *
+     * @return Boolean stating whether execution was successful.
+     */
+    std::shared_ptr<Sequence> evalAsync();
+    /**
+     * Clears currnet operations to record provided one in the vector of
+     * operations into the gpu as a submit job without a barrier. EvalAwait()
+     * must ALWAYS be called after to ensure the sequence is terminated
+     * correctly.
+     *
+     * @return Boolean stating whether execution was successful.
+     */
+    std::shared_ptr<Sequence> evalAsync(std::shared_ptr<OpBase> op);
+    /**
+     * Eval sends all the recorded and stored operations in the vector of
+     * operations into the gpu as a submit job with a barrier.
+     *
+     * @param tensors Vector of tensors to use for the operation
+     * @param TArgs Template parameters that are used to initialise operation
+     * which allows for extensible configurations on initialisation.
+     * @return shared_ptr<Sequence> of the Sequence class itself
+     */
+    template<typename T, typename... TArgs>
+    std::shared_ptr<Sequence> evalAsync(
+      std::vector<std::shared_ptr<Tensor>> tensors,
+      TArgs&&... params)
+    {
+        std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
+        return this->evalAsync(op);
+    }
+    /**
+     * Eval sends all the recorded and stored operations in the vector of
+     * operations into the gpu as a submit job with a barrier.
+     *
+     * @param algorithm Algorithm to use for the record often used for OpAlgo
+     * operations
+     * @param TArgs Template parameters that are used to initialise operation
+     * which allows for extensible configurations on initialisation.
+     * @return shared_ptr<Sequence> of the Sequence class itself
+     */
+    template<typename T, typename... TArgs>
+    std::shared_ptr<Sequence> evalAsync(std::shared_ptr<Algorithm> algorithm,
+                                        TArgs&&... params)
+    {
+        std::shared_ptr<T> op{ new T(algorithm,
+                                     std::forward<TArgs>(params)...) };
+        return this->evalAsync(op);
+    }
+
+    /**
+     * Eval Await waits for the fence to finish processing and then once it
+     * finishes, it runs the postEval of all operations.
+     *
+     * @param waitFor Number of milliseconds to wait before timing out.
+     * @return shared_ptr<Sequence> of the Sequence class itself
+     */
+    std::shared_ptr<Sequence> evalAwait(uint64_t waitFor = UINT64_MAX);
+
+    /**
+     * Clear function clears all operations currently recorded and starts
+     * recording again.
+     */
+    void clear();
+
+    /**
+     * Return the timestamps that were latched at the beginning and
+     * after each operation during the last eval() call.
+     */
+    std::vector<std::uint64_t> getTimestamps();
+
+    /**
+     * Begins recording commands for commands to be submitted into the command
+     * buffer.
+     *
+     * @return Boolean stating whether execution was successful.
+     */
+    void begin();
+
+    /**
+     * Ends the recording and stops recording commands when the record command
+     * is sent.
+     *
+     * @return Boolean stating whether execution was successful.
+     */
+    void end();
+
+    /**
+     * Returns true if the sequence is currently in recording activated.
+     *
+     * @return Boolean stating if recording ongoing.
+     */
+    bool isRecording();
+
+    /**
+     * Returns true if the sequence has been initialised, and it's based on the
+     * GPU resources being refrenced.
+     *
+     * @return Boolean stating if is initialized
+     */
+    bool isInit();
+
+    /**
+     * Clears command buffer and triggers re-record of all the current
+     * operations saved, which is useful if the underlying kp::Tensors or
+     * kp::Algorithms are modified and need to be re-recorded.
+     */
+    void rerecord();
+
+    /**
+     * Returns true if the sequence is currently running - mostly used for async
+     * workloads.
+     *
+     * @return Boolean stating if currently running.
+     */
+    bool isRunning();
+
+    /**
+     * Destroys and frees the GPU resources which include the buffer and memory
+     * and sets the sequence as init=False.
+     */
+    void destroy();
+
+  private:
+    // -------------- NEVER OWNED RESOURCES
+    std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice = nullptr;
+    std::shared_ptr<vk::Device> mDevice = nullptr;
+    std::shared_ptr<vk::Queue> mComputeQueue = nullptr;
+    uint32_t mQueueIndex = -1;
+
+    // -------------- OPTIONALLY OWNED RESOURCES
+    std::shared_ptr<vk::CommandPool> mCommandPool = nullptr;
+    bool mFreeCommandPool = false;
+    std::shared_ptr<vk::CommandBuffer> mCommandBuffer = nullptr;
+    bool mFreeCommandBuffer = false;
+
+    // -------------- ALWAYS OWNED RESOURCES
+    vk::Fence mFence;
+    std::vector<std::shared_ptr<OpBase>> mOperations;
+    std::shared_ptr<vk::QueryPool> timestampQueryPool = nullptr;
+
+    // State
+    bool mRecording = false;
+    bool mIsRunning = false;
+
+    // Create functions
+    void createCommandPool();
+    void createCommandBuffer();
+    void createTimestampQueryPool(uint32_t totalTimestamps);
+};
+
+} // End namespace kp
+
+// SPDX-License-Identifier: Apache-2.0
+
+#include <set>
+#include <unordered_map>
+
+#define KP_DEFAULT_SESSION "DEFAULT"
+
+namespace kp {
+
+/**
+    Base orchestrator which creates and manages device and child components
+*/
+class Manager
+{
+  public:
+    /**
+        Base constructor and default used which creates the base resources
+       including choosing the device 0 by default.
+    */
+    Manager();
+
+    /**
+     * Similar to base constructor but allows for further configuration to use
+     * when creating the Vulkan resources.
+     *
+     * @param physicalDeviceIndex The index of the physical device to use
+     * @param familyQueueIndices (Optional) List of queue indices to add for
+     * explicit allocation
+     * @param desiredExtensions The desired extensions to load from
+     * physicalDevice
+     */
+    Manager(uint32_t physicalDeviceIndex,
+            const std::vector<uint32_t>& familyQueueIndices = {},
+            const std::vector<std::string>& desiredExtensions = {});
+
+    /**
+     * Manager constructor which allows your own vulkan application to integrate
+     * with the kompute use.
+     *
+     * @param instance Vulkan compute instance to base this application
+     * @param physicalDevice Vulkan physical device to use for application
+     * @param device Vulkan logical device to use for all base resources
+     * @param physicalDeviceIndex Index for vulkan physical device used
+     */
+    Manager(std::shared_ptr<vk::Instance> instance,
+            std::shared_ptr<vk::PhysicalDevice> physicalDevice,
+            std::shared_ptr<vk::Device> device);
+
+    /**
+     * Manager destructor which would ensure all owned resources are destroyed
+     * unless explicitly stated that resources should not be destroyed or freed.
+     */
+    ~Manager();
+
+    /**
+     * Create a managed sequence that will be destroyed by this manager
+     * if it hasn't been destroyed by its reference count going to zero.
+     *
+     * @param queueIndex The queue to use from the available queues
+     * @param nrOfTimestamps The maximum number of timestamps to allocate.
+     * If zero (default), disables latching of timestamps.
+     * @returns Shared pointer with initialised sequence
+     */
+    std::shared_ptr<Sequence> sequence(uint32_t queueIndex = 0,
+                                       uint32_t totalTimestamps = 0);
+
+    /**
+     * Create a managed tensor that will be destroyed by this manager
+     * if it hasn't been destroyed by its reference count going to zero.
+     *
+     * @param data The data to initialize the tensor with
+     * @param tensorType The type of tensor to initialize
+     * @returns Shared pointer with initialised tensor
+     */
+    template<typename T>
+    std::shared_ptr<TensorT<T>> tensorT(
+      const std::vector<T>& data,
+      Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
+    {
+        KP_LOG_DEBUG("Kompute Manager tensor creation triggered");
+
+        std::shared_ptr<TensorT<T>> tensor{ new kp::TensorT<T>(
+          this->mPhysicalDevice, this->mDevice, data, tensorType) };
+
+        if (this->mManageResources) {
+            this->mManagedTensors.push_back(tensor);
+        }
+
+        return tensor;
+    }
+
+    std::shared_ptr<TensorT<float>> tensor(
+      const std::vector<float>& data,
+      Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
+    {
+        return this->tensorT<float>(data, tensorType);
+    }
+
+    std::shared_ptr<Tensor> tensor(
+      void* data,
+      uint32_t elementTotalCount,
+      uint32_t elementMemorySize,
+      const Tensor::TensorDataTypes& dataType,
+      Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
+    {
+        std::shared_ptr<Tensor> tensor{ new kp::Tensor(this->mPhysicalDevice,
+                                                       this->mDevice,
+                                                       data,
+                                                       elementTotalCount,
+                                                       elementMemorySize,
+                                                       dataType,
+                                                       tensorType) };
+
+        if (this->mManageResources) {
+            this->mManagedTensors.push_back(tensor);
+        }
+
+        return tensor;
+    }
+
+    std::shared_ptr<Algorithm> algorithm(
+      const std::vector<std::shared_ptr<Tensor>>& tensors = {},
+      const std::vector<uint32_t>& spirv = {},
+      const Workgroup& workgroup = {},
+      const std::vector<float>& specializationConstants = {},
+      const std::vector<float>& pushConstants = {})
+    {
+        return this->algorithm<>(tensors, spirv, workgroup, specializationConstants, pushConstants);
+    }
+
+    /**
+     * Create a managed algorithm that will be destroyed by this manager
+     * if it hasn't been destroyed by its reference count going to zero.
+     *
+     * @param tensors (optional) The tensors to initialise the algorithm with
+     * @param spirv (optional) The SPIRV bytes for the algorithm to dispatch
+     * @param workgroup (optional) kp::Workgroup for algorithm to use, and
+     * defaults to (tensor[0].size(), 1, 1)
+     * @param specializationConstants (optional) kp::Constant to use for
+     * specialization constants, and defaults to an empty constant
+     * @param pushConstants (optional) kp::Constant to use for push constants,
+     * and defaults to an empty constant
+     * @returns Shared pointer with initialised algorithm
+     */
+    template<typename S = float, typename P = float>
+    std::shared_ptr<Algorithm> algorithm(
+      const std::vector<std::shared_ptr<Tensor>>& tensors,
+      const std::vector<uint32_t>& spirv,
+      const Workgroup& workgroup,
+      const std::vector<S>& specializationConstants,
+      const std::vector<P>& pushConstants)
+    {
+
+        KP_LOG_DEBUG("Kompute Manager algorithm creation triggered");
+
+        std::shared_ptr<Algorithm> algorithm{ new kp::Algorithm(
+          this->mDevice,
+          tensors,
+          spirv,
+          workgroup,
+          specializationConstants,
+          pushConstants) };
+
+        if (this->mManageResources) {
+            this->mManagedAlgorithms.push_back(algorithm);
+        }
+
+        return algorithm;
+    }
+
+    /**
+     * Destroy the GPU resources and all managed resources by manager.
+     **/
+    void destroy();
+    /**
+     * Run a pseudo-garbage collection to release all the managed resources
+     * that have been already freed due to these reaching to zero ref count.
+     **/
+    void clear();
+
+    /**
+     * Information about the current device.
+     *
+     * @return vk::PhysicalDeviceProperties containing information about the device
+     **/
+    vk::PhysicalDeviceProperties getDeviceProperties() const;
+
+    /**
+     * List the devices available in the current vulkan instance.
+     *
+     * @return vector of physical devices containing their respective properties
+     **/
+    std::vector<vk::PhysicalDevice> listDevices() const;
+
+  private:
+    // -------------- OPTIONALLY OWNED RESOURCES
+    std::shared_ptr<vk::Instance> mInstance = nullptr;
+    bool mFreeInstance = false;
+    std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice = nullptr;
+    std::shared_ptr<vk::Device> mDevice = nullptr;
+    bool mFreeDevice = false;
+
+    // -------------- ALWAYS OWNED RESOURCES
+    std::vector<std::weak_ptr<Tensor>> mManagedTensors;
+    std::vector<std::weak_ptr<Sequence>> mManagedSequences;
+    std::vector<std::weak_ptr<Algorithm>> mManagedAlgorithms;
+
+    std::vector<uint32_t> mComputeQueueFamilyIndices;
+    std::vector<std::shared_ptr<vk::Queue>> mComputeQueues;
+
+    bool mManageResources = false;
+
+#if DEBUG
+#ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS
+    vk::DebugReportCallbackEXT mDebugReportCallback;
+    vk::DispatchLoaderDynamic mDebugDispatcher;
+#endif
+#endif
+
+    // Create functions
+    void createInstance();
+    void createDevice(const std::vector<uint32_t>& familyQueueIndices = {},
+                      uint32_t hysicalDeviceIndex = 0,
+                      const std::vector<std::string>& desiredExtensions = {});
+};
+
+} // End namespace kp

From b9e40d50285ac3192b26e1ed6859c6241b7eb09e Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sun, 12 Sep 2021 14:21:33 +0100
Subject: [PATCH 14/19] Updated algo to align with memory management

Signed-off-by: Alejandro Saucedo <axsauze@gmail.com>
---
 src/Algorithm.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp
index 9179cffbd..6caccf9bb 100644
--- a/src/Algorithm.cpp
+++ b/src/Algorithm.cpp
@@ -23,13 +23,13 @@ Algorithm::isInit()
 void
 Algorithm::destroy()
 {
-    if (this->mPushConstantsData) {
-        free(this->mPushConstantsData);
-    }
-
-    if (this->mSpecializationConstantsData) {
-        free(this->mSpecializationConstantsData);
-    }
+    // We don't have to free memory on destroy as it's freed by the commandBuffer destructor
+    // if (this->mPushConstantsData) {
+    //     free(this->mPushConstantsData);
+    // }
+    // if (this->mSpecializationConstantsData) {
+    //     free(this->mSpecializationConstantsData);
+    // }
 
     if (!this->mDevice) {
         KP_LOG_WARN("Kompute Algorithm destroy function reached with null "

From 2d4c2f733371afb4875b712cbd3f8e04ef33da05 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sun, 12 Sep 2021 15:00:07 +0100
Subject: [PATCH 15/19] Fully functional python

Signed-off-by: Alejandro Saucedo <axsauze@gmail.com>
---
 python/src/main.cpp         | 30 ++++++++++++++++++++++++++-
 python/test/test_kompute.py | 41 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/python/src/main.cpp b/python/src/main.cpp
index b0ef31191..82a4bff5e 100644
--- a/python/src/main.cpp
+++ b/python/src/main.cpp
@@ -14,6 +14,31 @@ namespace py = pybind11;
 //used in Core.hpp
 py::object kp_debug, kp_info, kp_warning, kp_error;
 
+std::unique_ptr<kp::OpAlgoDispatch> opAlgoDispatchPyInit(
+                        std::shared_ptr<kp::Algorithm>& algorithm,
+                        const py::array& push_consts) {
+    const py::buffer_info info        = push_consts.request();
+    KP_LOG_DEBUG("Kompute Python Manager creating tensor_T with push_consts size {} dtype {}",
+            push_consts.size(), std::string(py::str(push_consts.dtype())));
+
+
+    if (push_consts.dtype() == py::dtype::of<std::float_t>()) {
+        std::vector<float> dataVec((float*)info.ptr, ((float*)info.ptr) + info.size);
+        return std::unique_ptr<kp::OpAlgoDispatch>{new kp::OpAlgoDispatch(algorithm, dataVec)};
+    } else if (push_consts.dtype() == py::dtype::of<std::uint32_t>()) {
+        std::vector<uint32_t> dataVec((uint32_t*)info.ptr, ((uint32_t*)info.ptr) + info.size);
+        return std::unique_ptr<kp::OpAlgoDispatch>{new kp::OpAlgoDispatch(algorithm, dataVec)};
+    } else if (push_consts.dtype() == py::dtype::of<std::int32_t>()) {
+        std::vector<int32_t> dataVec((int32_t*)info.ptr, ((int32_t*)info.ptr) + info.size);
+        return std::unique_ptr<kp::OpAlgoDispatch>{new kp::OpAlgoDispatch(algorithm, dataVec)};
+    } else if (push_consts.dtype() == py::dtype::of<std::double_t>()) {
+        std::vector<double> dataVec((double*)info.ptr, ((double*)info.ptr) + info.size);
+        return std::unique_ptr<kp::OpAlgoDispatch>{new kp::OpAlgoDispatch(algorithm, dataVec)};
+    } else {
+        throw std::runtime_error("Kompute Python no valid dtype supported");
+    }
+}
+
 PYBIND11_MODULE(kp, m) {
 
     // The logging modules are used in the Kompute.hpp file
@@ -51,7 +76,10 @@ PYBIND11_MODULE(kp, m) {
             m, "OpAlgoDispatch", py::base<kp::OpBase>(), DOC(kp, OpAlgoDispatch))
         .def(py::init<const std::shared_ptr<kp::Algorithm>&,const kp::Constants&>(),
                 DOC(kp, OpAlgoDispatch, OpAlgoDispatch),
-                py::arg("algorithm"), py::arg("push_consts") = kp::Constants());
+                py::arg("algorithm"), py::arg("push_consts") = kp::Constants())
+        .def(py::init(&opAlgoDispatchPyInit),
+                DOC(kp, OpAlgoDispatch, OpAlgoDispatch),
+                py::arg("algorithm"), py::arg("push_consts"));
 
     py::class_<kp::OpMult, std::shared_ptr<kp::OpMult>>(
             m, "OpMult", py::base<kp::OpBase>(), DOC(kp, OpMult))
diff --git a/python/test/test_kompute.py b/python/test/test_kompute.py
index eaf6b28db..385933f26 100644
--- a/python/test/test_kompute.py
+++ b/python/test/test_kompute.py
@@ -197,10 +197,49 @@ def test_pushconsts():
         .record(kp.OpTensorSyncDevice([tensor]))
         .record(kp.OpAlgoDispatch(algo))
         .record(kp.OpAlgoDispatch(algo, [0.3, 0.2, 0.1]))
+        .record(kp.OpAlgoDispatch(algo, [0.3, 0.2, 0.1]))
         .record(kp.OpTensorSyncLocal([tensor]))
         .eval())
 
-    assert np.all(tensor.data() == np.array([0.4, 0.4, 0.4], dtype=np.float32))
+    assert np.allclose(tensor.data(), np.array([0.7, 0.6, 0.5], dtype=np.float32))
+
+
+def test_pushconsts_int():
+
+    spirv = compile_source("""
+          #version 450
+          layout(push_constant) uniform PushConstants {
+            int x;
+            int  y;
+            int  z;
+          } pcs;
+          layout (local_size_x = 1) in;
+          layout(set = 0, binding = 0) buffer a { int  pa[]; };
+          void main() {
+              pa[0] += pcs.x;
+              pa[1] += pcs.y;
+              pa[2] += pcs.z;
+          }
+    """)
+
+    mgr = kp.Manager()
+
+    tensor = mgr.tensor_t(np.array([0, 0, 0], dtype=np.int32))
+
+    spec_consts = np.array([], dtype=np.int32)
+    push_consts = np.array([-1, -1, -1], dtype=np.int32)
+
+    algo = mgr.algorithm_t([tensor], spirv, (1, 1, 1), spec_consts, push_consts)
+
+    (mgr.sequence()
+        .record(kp.OpTensorSyncDevice([tensor]))
+        .record(kp.OpAlgoDispatch(algo))
+        .record(kp.OpAlgoDispatch(algo, np.array([-1, -1, -1], dtype=np.int32)))
+        .record(kp.OpAlgoDispatch(algo, np.array([-1, -1, -1], dtype=np.int32)))
+        .record(kp.OpTensorSyncLocal([tensor]))
+        .eval())
+
+    assert np.all(tensor.data() == np.array([-3, -3, -3], dtype=np.int32))
 
 
 def test_workgroup():

From 1b2f42c3aca71bbfdf2503c228f348fe2cdb01db Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sun, 12 Sep 2021 15:18:11 +0100
Subject: [PATCH 16/19] Added specconst test for int

Signed-off-by: Alejandro Saucedo <axsauze@gmail.com>
---
 test/TestSpecializationConstant.cpp | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/test/TestSpecializationConstant.cpp b/test/TestSpecializationConstant.cpp
index f57c221ab..a1dd5587d 100644
--- a/test/TestSpecializationConstant.cpp
+++ b/test/TestSpecializationConstant.cpp
@@ -59,11 +59,11 @@ TEST(TestSpecializationConstants, TestConstantsInt)
     {
         std::string shader(R"(
           #version 450
-          layout (constant_id = 0) const float cOne = 1;
-          layout (constant_id = 1) const float cTwo = 1;
+          layout (constant_id = 0) const int cOne = 1;
+          layout (constant_id = 1) const int cTwo = 1;
           layout (local_size_x = 1) in;
-          layout(set = 0, binding = 0) buffer a { float pa[]; };
-          layout(set = 0, binding = 1) buffer b { float pb[]; };
+          layout(set = 0, binding = 0) buffer a { int pa[]; };
+          layout(set = 0, binding = 1) buffer b { int pb[]; };
           void main() {
               uint index = gl_GlobalInvocationID.x;
               pa[index] = cOne;
@@ -77,18 +77,18 @@ TEST(TestSpecializationConstants, TestConstantsInt)
         {
             kp::Manager mgr;
 
-            std::shared_ptr<kp::TensorT<float>> tensorA =
-              mgr.tensor({ 0, 0, 0 });
-            std::shared_ptr<kp::TensorT<float>> tensorB =
-              mgr.tensor({ 0, 0, 0 });
+            std::shared_ptr<kp::TensorT<int32_t>> tensorA =
+              mgr.tensorT<int32_t>({ 0, 0, 0 });
+            std::shared_ptr<kp::TensorT<int32_t>> tensorB =
+              mgr.tensorT<int32_t>({ 0, 0, 0 });
 
             std::vector<std::shared_ptr<kp::Tensor>> params = { tensorA,
                                                                 tensorB };
 
-            kp::Constants spec = kp::Constants({ 5.0, 0.3 });
+            std::vector<int32_t> spec({ -1, -2 });
 
             std::shared_ptr<kp::Algorithm> algo =
-              mgr.algorithm(params, spirv, {}, spec);
+              mgr.algorithm(params, spirv, {}, spec, {});
 
             sq = mgr.sequence()
                    ->record<kp::OpTensorSyncDevice>(params)
@@ -96,8 +96,9 @@ TEST(TestSpecializationConstants, TestConstantsInt)
                    ->record<kp::OpTensorSyncLocal>(params)
                    ->eval();
 
-            EXPECT_EQ(tensorA->vector(), std::vector<float>({ 5, 5, 5 }));
-            EXPECT_EQ(tensorB->vector(), std::vector<float>({ 0.3, 0.3, 0.3 }));
+            EXPECT_EQ(tensorA->vector(), std::vector<int32_t>({ -1, -1, -1 }));
+            EXPECT_EQ(tensorB->vector(), std::vector<int32_t>({ -2, -2, -2 }));
         }
     }
 }
+

From 990ccd5f3b8ada4e7744e14ac1658b1f6f8124d5 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sun, 12 Sep 2021 16:06:49 +0100
Subject: [PATCH 17/19] Added docstrings for new functions

Signed-off-by: Alejandro Saucedo <axsauze@gmail.com>
---
 src/include/kompute/Algorithm.hpp | 18 +++++++++++++-----
 src/include/kompute/Manager.hpp   | 18 ++++++++++++++++--
 2 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/src/include/kompute/Algorithm.hpp b/src/include/kompute/Algorithm.hpp
index 9dd70b1b5..cd9d913f0 100644
--- a/src/include/kompute/Algorithm.hpp
+++ b/src/include/kompute/Algorithm.hpp
@@ -24,12 +24,12 @@ class Algorithm
      *  @param spirv (optional) The spirv code to use to create the algorithm
      *  @param workgroup (optional) The kp::Workgroup to use for the dispatch
      * which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set.
-     *  @param specializationConstants (optional) The kp::Constants to use to
+     *  @param specializationConstants (optional) The templatable param is to be used to
      * initialize the specialization constants which cannot be changed once set.
-     *  @param pushConstants (optional) The kp::Constants to use when
+     *  @param pushConstants (optional) This templatable param is to be used when
      * initializing the pipeline, which set the size of the push constants -
-     * these can be modified but all new values must have the same vector size
-     * as this initial value.
+     * these can be modified but all new values must have the same data type and length
+     * as otherwise it will result in errors.
      */
     template<typename S = float, typename P = float>
     Algorithm(std::shared_ptr<vk::Device> device,
@@ -176,7 +176,7 @@ class Algorithm
      * Sets the push constants to the new value provided to use in the next
      * bindPush()
      *
-     * @param The kp::Constant to use to set the push constants to use in the
+     * @param pushConstants The templatable vector is to be used to set the push constants to use in the
      * next bindPush(...) calls. The constants provided must be of the same size
      * as the ones created during initialization.
      */
@@ -189,6 +189,14 @@ class Algorithm
         this->setPushConstants(pushConstants.data(), size, memorySize);
     }
 
+    /**
+     * Sets the push constants to the new value provided to use in the next
+     * bindPush() with the raw memory block location and memory size to be used.
+     *
+     * @param data The raw data point to copy the data from, without modifying the pointer.
+     * @param size The number of data elements provided in the data
+     * @param memorySize The memory size of each of the data elements in bytes.
+     */
     void setPushConstants(void* data, uint32_t size, uint32_t memorySize) {
 
         uint32_t totalSize = memorySize * size;
diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp
index 8a4244a2b..d9b850efd 100644
--- a/src/include/kompute/Manager.hpp
+++ b/src/include/kompute/Manager.hpp
@@ -123,6 +123,20 @@ class Manager
         return tensor;
     }
 
+    /**
+     * Default non-template function that can be used to create algorithm objects
+     * which provides default types to the push and spec constants as floats.
+     *
+     * @param tensors (optional) The tensors to initialise the algorithm with
+     * @param spirv (optional) The SPIRV bytes for the algorithm to dispatch
+     * @param workgroup (optional) kp::Workgroup for algorithm to use, and
+     * defaults to (tensor[0].size(), 1, 1)
+     * @param specializationConstants (optional) float vector to use for
+     * specialization constants, and defaults to an empty constant
+     * @param pushConstants (optional) float vector to use for push constants,
+     * and defaults to an empty constant
+     * @returns Shared pointer with initialised algorithm
+     */
     std::shared_ptr<Algorithm> algorithm(
       const std::vector<std::shared_ptr<Tensor>>& tensors = {},
       const std::vector<uint32_t>& spirv = {},
@@ -141,9 +155,9 @@ class Manager
      * @param spirv (optional) The SPIRV bytes for the algorithm to dispatch
      * @param workgroup (optional) kp::Workgroup for algorithm to use, and
      * defaults to (tensor[0].size(), 1, 1)
-     * @param specializationConstants (optional) kp::Constant to use for
+     * @param specializationConstants (optional) templatable vector parameter to use for
      * specialization constants, and defaults to an empty constant
-     * @param pushConstants (optional) kp::Constant to use for push constants,
+     * @param pushConstants (optional) templatable vector parameter to use for push constants,
      * and defaults to an empty constant
      * @returns Shared pointer with initialised algorithm
      */

From 932620091cb178477ab63db8fd0df2487ebcff38 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sun, 12 Sep 2021 16:51:43 +0100
Subject: [PATCH 18/19] Updated docs and renamig kp::Constants

Signed-off-by: Alejandro Saucedo <axsauze@gmail.com>
---
 README.md                                     |  6 +-
 docs/index.rst                                |  5 +-
 docs/overview/advanced-examples.rst           |  6 +-
 docs/overview/variable-types.rst              | 92 +++++++++++++++++++
 .../app/src/main/cpp/KomputeModelML.cpp       |  2 +-
 examples/logistic_regression/src/Main.cpp     |  2 +-
 python/src/docstrings.hpp                     | 12 +--
 python/src/main.cpp                           |  4 +-
 single_include/kompute/Kompute.hpp            | 44 ++++++---
 src/include/kompute/Algorithm.hpp             |  8 +-
 test/TestLogisticRegression.cpp               |  4 +-
 test/TestMultipleAlgoExecutions.cpp           | 10 +-
 test/TestPushConstant.cpp                     | 12 +--
 test/TestSpecializationConstant.cpp           |  2 +-
 14 files changed, 162 insertions(+), 47 deletions(-)
 create mode 100644 docs/overview/variable-types.rst

diff --git a/README.md b/README.md
index 71711973b..88ddbd36e 100644
--- a/README.md
+++ b/README.md
@@ -89,9 +89,9 @@ void kompute(const std::string& shader) {
 
     // 3. Create algorithm based on shader (supports buffers & push/spec constants)
     kp::Workgroup workgroup({3, 1, 1});
-    kp::Constants specConsts({ 2 });
-    kp::Constants pushConstsA({ 2.0 });
-    kp::Constants pushConstsB({ 3.0 });
+    std::vector<float> specConsts({ 2 });
+    std::vector<float> pushConstsA({ 2.0 });
+    std::vector<float> pushConstsB({ 3.0 });
 
     auto algorithm = mgr.algorithm(params,
                                    // See documentation shader section for compileSource
diff --git a/docs/index.rst b/docs/index.rst
index 9a62e7814..f3358a00f 100755
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -38,9 +38,10 @@ Documentation Index (as per sidebar)
 
 .. toctree::
     :titlesonly:
-    :caption: Concepts & Deep Dives:
+    :caption: Advanced Concepts & Deep Dives:
 
-    CI, Docker Images Docs & Tests <overview/ci-tests.rst>
+    CI, Docker Images Docs & Tests <overview/ci-tests>
+    Variable Types for Tensors, and Push/Spec Constants <overview/variable-types>
     Asynchronous & Parallel Operations <overview/async-parallel>
     Mobile App Integration (Android) <overview/mobile-android>
     Game Engine Integration (Godot Engine) <overview/game-engine-godot>
diff --git a/docs/overview/advanced-examples.rst b/docs/overview/advanced-examples.rst
index 68b901f76..ee44c5821 100644
--- a/docs/overview/advanced-examples.rst
+++ b/docs/overview/advanced-examples.rst
@@ -71,13 +71,13 @@ The example below shows how you can enable the "VK_EXT_shader_atomic_float" exte
            sq = mgr.sequence()
                   ->record<kp::OpTensorSyncDevice>({ tensor })
                   ->record<kp::OpAlgoDispatch>(algo,
-                                               kp::Constants{ 0.1, 0.2, 0.3 })
+                                               std::vector<float>{ 0.1, 0.2, 0.3 })
                   ->record<kp::OpAlgoDispatch>(algo,
-                                               kp::Constants{ 0.3, 0.2, 0.1 })
+                                               std::vector<float>{ 0.3, 0.2, 0.1 })
                   ->record<kp::OpTensorSyncLocal>({ tensor })
                   ->eval();
 
-           EXPECT_EQ(tensor->data(), kp::Constants({ 0.4, 0.4, 0.4 }));
+           EXPECT_EQ(tensor->data(), std::vector<float>({ 0.4, 0.4, 0.4 }));
        }
    }
 
diff --git a/docs/overview/variable-types.rst b/docs/overview/variable-types.rst
new file mode 100644
index 000000000..a4c6481a7
--- /dev/null
+++ b/docs/overview/variable-types.rst
@@ -0,0 +1,92 @@
+
+Variable Types for Tensors and Constants
+=============
+
+By default the initial interfaces you may interact with, will be primarily using float values by default, which is enough to get through the basic conceptual examples. However as real world applications are being developed, more specialized types may be required for kp::Tensor, as well as for SpecializationConstants and PushConstants.
+
+Before diving into the practical classes and interfaces that can be used to take advantage of the variable type support of Kompute, we want to provide some high level intution on what each of these components are.
+
+Variable Tensor Types
+------
+
+For the kp::Tensor class, Kompute provides under the hood an interface to have more seamless interaction with multiple different underlying data types. This is done through the introduction of the class kp::TensorT<type> and parent class kp::Tensor, however you as a developer you will be primarily interacting with the top level kp::Tensor class, as this is what is provided through the high level kp::Manager class.
+
+The kp::Tensor class does provide an "integrated" experience, which allows users to "seamlessly" retrieve the underlying data through the `data()` and `vector()` functions. This is done by leveraging C++ templates, as well as limiting the types that can be used, which are namely:
+
+* float
+* uint32
+* int32
+* double
+* bool
+
+Any other data type provided would result in an error, and for the time being Kompute will focus on primarily provide support for these classes.
+
+The tests under `TestTensor.cpp` and `test_tensor_types.py` provide an overview of how users can take advantage of these features using std::vector for C++ and numpy array for Python.
+
+C++ Tensor Types Usage
+^^^^^^^
+
+Below you can see how it is possible to define different types in C++.
+
+.. literalinclude:: ../../test/TestTensor.cpp
+   :language: cpp
+   :lines: 21-
+
+Python Tensor Types Usage
+^^^^^^^^^^^^^^^^^
+
+.. literalinclude:: ../../python/test/test_tensor_types.py
+   :language: python
+   :lines: 26-46
+
+Variable Push Constants
+----
+
+Push constants are a relatively non-expensive way to provide dynamic data to a GPU Algorithm (shader) as further CPU compute is performed. Although Push Constants are a more efficient way to provide data, it is also a limited manner as there is a memory limit for push constants.
+
+Push constants with Kompute are flexible as it is possible to pass user-defined structs in C++. In Python it is limited to providing numpy arrays with multiple elements of the same type.
+
+C++ Push Consts Types Usage
+^^^^^^^
+
+As mentioned above, this test under `TestPushConstants.cpp` shows how it is possible to use user-defined structs for multiple elements from different types, which is not possible for specialized constants or tensors.
+
+These are defined in the `algorithm` function of the `kp::Manager`, and once it push constant is set, all other push constants provided have to consist of the same types and element size.
+
+More specifically, when passing a custom struct it is possible to pass a single element, or alternatively passing multiple scalar values as part of the vector, and access them as outlined in the rest of the tests.
+
+.. literalinclude:: ../../test/TestPushConstant.cpp
+   :language: cpp
+   :lines: 182-231
+
+
+Python Push Consts Types Usage
+^^^^^^^^^^^^^^^^^
+
+In python the push constants are limited to a single list of elements of the same type. These are provided by passing a numpy array to the `algorithm` function or the `kp::OpAlgoDispatch` operation.
+
+.. literalinclude:: ../../python/test/test_tensor_types.py
+   :language: python
+   :lines: 207-242
+
+Variable Specialization Constants
+------
+
+Specialization constants are analogous to push constants, but these are not dynamic, can only be set on initialization or rebuild of `kp::Algorithm` and cannot be changed unless a `rebuild` is carried out.
+
+The usage of specailization constants is very similar to the push constants, but the only limitation are:
+
+* These are defined using the constant_id in the glsl shader
+* Spec constants do not support complex types (i.e. user defined struct)
+* Kompute supports an array of elements of same type for specialization constants
+
+C++ Push Consts Types Usage
+^^^^
+
+The specialization constant example shows how it is possible to define as a std::vector.
+
+.. literalinclude:: ../../test/TestSpecializationConstant.cpp
+   :language: cpp
+   :lines: 57-
+
+
diff --git a/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp b/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp
index 647cd5236..ef604909b 100755
--- a/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp
+++ b/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp
@@ -47,7 +47,7 @@ void KomputeModelML::train(std::vector<float> yData, std::vector<float> xIData,
                         + kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));
 
         std::shared_ptr<kp::Algorithm> algo =
-                mgr.algorithm(params, spirv, kp::Workgroup({ 5 }), kp::Constants({ 5.0 }));
+                mgr.algorithm(params, spirv, kp::Workgroup({ 5 }), std::vector<float>({ 5.0 }));
 
         mgr.sequence()->eval<kp::OpTensorSyncDevice>(params);
 
diff --git a/examples/logistic_regression/src/Main.cpp b/examples/logistic_regression/src/Main.cpp
index c7cc827ba..ec52439ed 100755
--- a/examples/logistic_regression/src/Main.cpp
+++ b/examples/logistic_regression/src/Main.cpp
@@ -41,7 +41,7 @@ int main()
                     + kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));
 
     std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(
-            params, spirv, kp::Workgroup({ 5 }), kp::Constants({ 5.0 }));
+            params, spirv, kp::Workgroup({ 5 }), std::vector<float>({ 5.0 }));
 
     mgr.sequence()->eval<kp::OpTensorSyncDevice>(params);
 
diff --git a/python/src/docstrings.hpp b/python/src/docstrings.hpp
index 8f2a7fe6b..fb1def637 100644
--- a/python/src/docstrings.hpp
+++ b/python/src/docstrings.hpp
@@ -36,9 +36,9 @@ tensors (optional) The tensors to use to create the descriptor
 resources @param spirv (optional) The spirv code to use to create the
 algorithm @param workgroup (optional) The kp::Workgroup to use for the
 dispatch which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if
-not set. @param specializationConstants (optional) The kp::Constants
+not set. @param specializationConstants (optional) The std::vector<float>
 to use to initialize the specialization constants which cannot be
-changed once set. @param pushConstants (optional) The kp::Constants to
+changed once set. @param pushConstants (optional) The std::vector<float> to
 use when initializing the pipeline, which set the size of the push
 constants - these can be modified but all new values must have the
 same vector size as this initial value.)doc";
@@ -54,12 +54,12 @@ static const char *__doc_kp_Algorithm_destroy = R"doc()doc";
 static const char *__doc_kp_Algorithm_getPush =
 R"doc(Gets the specialization constants of the current algorithm.
 
-@returns The kp::Constants currently set for push constants)doc";
+@returns The std::vector<float> currently set for push constants)doc";
 
 static const char *__doc_kp_Algorithm_getSpecializationConstants =
 R"doc(Gets the specialization constants of the current algorithm.
 
-@returns The kp::Constants currently set for specialization constants)doc";
+@returns The std::vector<float> currently set for specialization constants)doc";
 
 static const char *__doc_kp_Algorithm_getTensors =
 R"doc(Gets the current tensors that are used in the algorithm.
@@ -127,9 +127,9 @@ parameters to create the underlying resources.
 @param spirv The spirv code to use to create the algorithm @param
 workgroup (optional) The kp::Workgroup to use for the dispatch which
 defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set. @param
-specializationConstants (optional) The kp::Constants to use to
+specializationConstants (optional) The std::vector<float> to use to
 initialize the specialization constants which cannot be changed once
-set. @param pushConstants (optional) The kp::Constants to use when
+set. @param pushConstants (optional) The std::vector<float> to use when
 initializing the pipeline, which set the size of the push constants -
 these can be modified but all new values must have the same vector
 size as this initial value.)doc";
diff --git a/python/src/main.cpp b/python/src/main.cpp
index 82a4bff5e..f2bb13090 100644
--- a/python/src/main.cpp
+++ b/python/src/main.cpp
@@ -74,9 +74,9 @@ PYBIND11_MODULE(kp, m) {
 
     py::class_<kp::OpAlgoDispatch, std::shared_ptr<kp::OpAlgoDispatch>>(
             m, "OpAlgoDispatch", py::base<kp::OpBase>(), DOC(kp, OpAlgoDispatch))
-        .def(py::init<const std::shared_ptr<kp::Algorithm>&,const kp::Constants&>(),
+        .def(py::init<const std::shared_ptr<kp::Algorithm>&,const std::vector<float>&>(),
                 DOC(kp, OpAlgoDispatch, OpAlgoDispatch),
-                py::arg("algorithm"), py::arg("push_consts") = kp::Constants())
+                py::arg("algorithm"), py::arg("push_consts") = std::vector<float>())
         .def(py::init(&opAlgoDispatchPyInit),
                 DOC(kp, OpAlgoDispatch, OpAlgoDispatch),
                 py::arg("algorithm"), py::arg("push_consts"));
diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp
index 67efbe708..012eae394 100755
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@@ -1087,12 +1087,12 @@ class Algorithm
      *  @param spirv (optional) The spirv code to use to create the algorithm
      *  @param workgroup (optional) The kp::Workgroup to use for the dispatch
      * which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set.
-     *  @param specializationConstants (optional) The kp::Constants to use to
+     *  @param specializationConstants (optional) The templatable param is to be used to
      * initialize the specialization constants which cannot be changed once set.
-     *  @param pushConstants (optional) The kp::Constants to use when
+     *  @param pushConstants (optional) This templatable param is to be used when
      * initializing the pipeline, which set the size of the push constants -
-     * these can be modified but all new values must have the same vector size
-     * as this initial value.
+     * these can be modified but all new values must have the same data type and length
+     * as otherwise it will result in errors.
      */
     template<typename S = float, typename P = float>
     Algorithm(std::shared_ptr<vk::Device> device,
@@ -1127,9 +1127,9 @@ class Algorithm
      *  @param spirv The spirv code to use to create the algorithm
      *  @param workgroup (optional) The kp::Workgroup to use for the dispatch
      * which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set.
-     *  @param specializationConstants (optional) The kp::Constants to use to
+     *  @param specializationConstants (optional) The std::vector<float> to use to
      * initialize the specialization constants which cannot be changed once set.
-     *  @param pushConstants (optional) The kp::Constants to use when
+     *  @param pushConstants (optional) The std::vector<float> to use when
      * initializing the pipeline, which set the size of the push constants -
      * these can be modified but all new values must have the same vector size
      * as this initial value.
@@ -1239,7 +1239,7 @@ class Algorithm
      * Sets the push constants to the new value provided to use in the next
      * bindPush()
      *
-     * @param The kp::Constant to use to set the push constants to use in the
+     * @param pushConstants The templatable vector is to be used to set the push constants to use in the
      * next bindPush(...) calls. The constants provided must be of the same size
      * as the ones created during initialization.
      */
@@ -1252,6 +1252,14 @@ class Algorithm
         this->setPushConstants(pushConstants.data(), size, memorySize);
     }
 
+    /**
+     * Sets the push constants to the new value provided to use in the next
+     * bindPush() with the raw memory block location and memory size to be used.
+     *
+     * @param data The raw data point to copy the data from, without modifying the pointer.
+     * @param size The number of data elements provided in the data
+     * @param memorySize The memory size of each of the data elements in bytes.
+     */
     void setPushConstants(void* data, uint32_t size, uint32_t memorySize) {
 
         uint32_t totalSize = memorySize * size;
@@ -1285,7 +1293,7 @@ class Algorithm
     /**
      * Gets the specialization constants of the current algorithm.
      *
-     * @returns The kp::Constants currently set for specialization constants
+     * @returns The std::vector<float> currently set for specialization constants
      */
     template<typename T>
     const std::vector<T> getSpecializationConstants()
@@ -1296,7 +1304,7 @@ class Algorithm
     /**
      * Gets the specialization constants of the current algorithm.
      *
-     * @returns The kp::Constants currently set for push constants
+     * @returns The std::vector<float> currently set for push constants
      */
     template<typename T>
     const std::vector<T> getPushConstants()
@@ -2206,6 +2214,20 @@ class Manager
         return tensor;
     }
 
+    /**
+     * Default non-template function that can be used to create algorithm objects
+     * which provides default types to the push and spec constants as floats.
+     *
+     * @param tensors (optional) The tensors to initialise the algorithm with
+     * @param spirv (optional) The SPIRV bytes for the algorithm to dispatch
+     * @param workgroup (optional) kp::Workgroup for algorithm to use, and
+     * defaults to (tensor[0].size(), 1, 1)
+     * @param specializationConstants (optional) float vector to use for
+     * specialization constants, and defaults to an empty constant
+     * @param pushConstants (optional) float vector to use for push constants,
+     * and defaults to an empty constant
+     * @returns Shared pointer with initialised algorithm
+     */
     std::shared_ptr<Algorithm> algorithm(
       const std::vector<std::shared_ptr<Tensor>>& tensors = {},
       const std::vector<uint32_t>& spirv = {},
@@ -2224,9 +2246,9 @@ class Manager
      * @param spirv (optional) The SPIRV bytes for the algorithm to dispatch
      * @param workgroup (optional) kp::Workgroup for algorithm to use, and
      * defaults to (tensor[0].size(), 1, 1)
-     * @param specializationConstants (optional) kp::Constant to use for
+     * @param specializationConstants (optional) templatable vector parameter to use for
      * specialization constants, and defaults to an empty constant
-     * @param pushConstants (optional) kp::Constant to use for push constants,
+     * @param pushConstants (optional) templatable vector parameter to use for push constants,
      * and defaults to an empty constant
      * @returns Shared pointer with initialised algorithm
      */
diff --git a/src/include/kompute/Algorithm.hpp b/src/include/kompute/Algorithm.hpp
index cd9d913f0..be17a2d09 100644
--- a/src/include/kompute/Algorithm.hpp
+++ b/src/include/kompute/Algorithm.hpp
@@ -64,9 +64,9 @@ class Algorithm
      *  @param spirv The spirv code to use to create the algorithm
      *  @param workgroup (optional) The kp::Workgroup to use for the dispatch
      * which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set.
-     *  @param specializationConstants (optional) The kp::Constants to use to
+     *  @param specializationConstants (optional) The std::vector<float> to use to
      * initialize the specialization constants which cannot be changed once set.
-     *  @param pushConstants (optional) The kp::Constants to use when
+     *  @param pushConstants (optional) The std::vector<float> to use when
      * initializing the pipeline, which set the size of the push constants -
      * these can be modified but all new values must have the same vector size
      * as this initial value.
@@ -230,7 +230,7 @@ class Algorithm
     /**
      * Gets the specialization constants of the current algorithm.
      *
-     * @returns The kp::Constants currently set for specialization constants
+     * @returns The std::vector<float> currently set for specialization constants
      */
     template<typename T>
     const std::vector<T> getSpecializationConstants()
@@ -241,7 +241,7 @@ class Algorithm
     /**
      * Gets the specialization constants of the current algorithm.
      *
-     * @returns The kp::Constants currently set for push constants
+     * @returns The std::vector<float> currently set for push constants
      */
     template<typename T>
     const std::vector<T> getPushConstants()
diff --git a/test/TestLogisticRegression.cpp b/test/TestLogisticRegression.cpp
index c1818ec27..9b736213f 100644
--- a/test/TestLogisticRegression.cpp
+++ b/test/TestLogisticRegression.cpp
@@ -48,7 +48,7 @@ TEST(TestLogisticRegression, TestMainLogisticRegression)
                         test_shaders_glsl_test_logistic_regression_comp_spv_len));
 
         std::shared_ptr<kp::Algorithm> algorithm = mgr.algorithm(
-          params, spirv, kp::Workgroup({ 5 }), kp::Constants({ 5.0 }));
+          params, spirv, kp::Workgroup({ 5 }), std::vector<float>({ 5.0 }));
 
         std::shared_ptr<kp::Sequence> sq =
           mgr.sequence()
@@ -127,7 +127,7 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy)
                         shaders_glsl_logisticregression_comp_spv_len));
 
         std::shared_ptr<kp::Algorithm> algorithm =
-          mgr.algorithm(params, spirv, kp::Workgroup(), kp::Constants({ 5.0 }));
+          mgr.algorithm(params, spirv, kp::Workgroup(), std::vector<float>({ 5.0 }));
 
         std::shared_ptr<kp::Sequence> sq =
           mgr.sequence()
diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp
index 1fe6a6664..40d190c62 100644
--- a/test/TestMultipleAlgoExecutions.cpp
+++ b/test/TestMultipleAlgoExecutions.cpp
@@ -49,9 +49,9 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality)
     };
 
     kp::Workgroup workgroup({ 3, 1, 1 });
-    kp::Constants specConsts({ 2 });
-    kp::Constants pushConstsA({ 2.0 });
-    kp::Constants pushConstsB({ 3.0 });
+    std::vector<float> specConsts({ 2 });
+    std::vector<float> pushConstsA({ 2.0 });
+    std::vector<float> pushConstsB({ 3.0 });
 
     auto algorithm = mgr.algorithm(params,
                                    compileSource(shader),
@@ -263,8 +263,8 @@ TEST(TestMultipleAlgoExecutions, TestAlgorithmUtilFunctions)
     };
 
     kp::Workgroup workgroup({ 3, 1, 1 });
-    kp::Constants specConsts({ 2 });
-    kp::Constants pushConsts({ 2.0 });
+    std::vector<float> specConsts({ 2 });
+    std::vector<float> pushConsts({ 2.0 });
 
     auto algorithm = mgr.algorithm(params,
                                    compileSource(shader),
diff --git a/test/TestPushConstant.cpp b/test/TestPushConstant.cpp
index c885f1d87..4742cd187 100644
--- a/test/TestPushConstant.cpp
+++ b/test/TestPushConstant.cpp
@@ -44,11 +44,11 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchOverride)
             // We need to run this in sequence to avoid race condition
             // We can't use atomicAdd as swiftshader doesn't support it for
             // float
-            sq->eval<kp::OpAlgoDispatch>(algo, kp::Constants{ 0.1, 0.2, 0.3 });
-            sq->eval<kp::OpAlgoDispatch>(algo, kp::Constants{ 0.3, 0.2, 0.1 });
+            sq->eval<kp::OpAlgoDispatch>(algo, std::vector<float>{ 0.1, 0.2, 0.3 });
+            sq->eval<kp::OpAlgoDispatch>(algo, std::vector<float>{ 0.3, 0.2, 0.1 });
             sq->eval<kp::OpTensorSyncLocal>({ tensor });
 
-            EXPECT_EQ(tensor->vector(), kp::Constants({ 0.4, 0.4, 0.4 }));
+            EXPECT_EQ(tensor->vector(), std::vector<float>({ 0.4, 0.4, 0.4 }));
         }
     }
 }
@@ -90,10 +90,10 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchNoOverride)
             // We can't use atomicAdd as swiftshader doesn't support it for
             // float
             sq->eval<kp::OpAlgoDispatch>(algo);
-            sq->eval<kp::OpAlgoDispatch>(algo, kp::Constants{ 0.3, 0.2, 0.1 });
+            sq->eval<kp::OpAlgoDispatch>(algo, std::vector<float>{ 0.3, 0.2, 0.1 });
             sq->eval<kp::OpTensorSyncLocal>({ tensor });
 
-            EXPECT_EQ(tensor->vector(), kp::Constants({ 0.4, 0.4, 0.4 }));
+            EXPECT_EQ(tensor->vector(), std::vector<float>({ 0.4, 0.4, 0.4 }));
         }
     }
 }
@@ -132,7 +132,7 @@ TEST(TestPushConstants, TestConstantsWrongSize)
             sq = mgr.sequence()->record<kp::OpTensorSyncDevice>({ tensor });
 
             EXPECT_THROW(sq->record<kp::OpAlgoDispatch>(
-                           algo, kp::Constants{ 0.1, 0.2, 0.3 }),
+                           algo, std::vector<float>{ 0.1, 0.2, 0.3 }),
                          std::runtime_error);
         }
     }
diff --git a/test/TestSpecializationConstant.cpp b/test/TestSpecializationConstant.cpp
index a1dd5587d..abc507e7e 100644
--- a/test/TestSpecializationConstant.cpp
+++ b/test/TestSpecializationConstant.cpp
@@ -37,7 +37,7 @@ TEST(TestSpecializationConstants, TestTwoConstants)
             std::vector<std::shared_ptr<kp::Tensor>> params = { tensorA,
                                                                 tensorB };
 
-            kp::Constants spec = kp::Constants({ 5.0, 0.3 });
+            std::vector<float> spec = std::vector<float>({ 5.0, 0.3 });
 
             std::shared_ptr<kp::Algorithm> algo =
               mgr.algorithm(params, spirv, {}, spec);

From c758ec0c202d9856be2863e12f00f9a3598ccd91 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sun, 12 Sep 2021 16:56:44 +0100
Subject: [PATCH 19/19] Updated algo python

Signed-off-by: Alejandro Saucedo <axsauze@gmail.com>
---
 python/src/main.cpp         | 2 +-
 python/test/test_kompute.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/src/main.cpp b/python/src/main.cpp
index f2bb13090..d0447fe8e 100644
--- a/python/src/main.cpp
+++ b/python/src/main.cpp
@@ -220,7 +220,7 @@ PYBIND11_MODULE(kp, m) {
             py::arg("workgroup") = kp::Workgroup(),
             py::arg("spec_consts") = std::vector<float>(),
             py::arg("push_consts") = std::vector<float>())
-        .def("algorithm_t", [np](kp::Manager& self,
+        .def("algorithm", [np](kp::Manager& self,
                              const std::vector<std::shared_ptr<kp::Tensor>>& tensors,
                              const py::bytes& spirv,
                              const kp::Workgroup& workgroup,
diff --git a/python/test/test_kompute.py b/python/test/test_kompute.py
index 385933f26..8660b0151 100644
--- a/python/test/test_kompute.py
+++ b/python/test/test_kompute.py
@@ -229,7 +229,7 @@ def test_pushconsts_int():
     spec_consts = np.array([], dtype=np.int32)
     push_consts = np.array([-1, -1, -1], dtype=np.int32)
 
-    algo = mgr.algorithm_t([tensor], spirv, (1, 1, 1), spec_consts, push_consts)
+    algo = mgr.algorithm([tensor], spirv, (1, 1, 1), spec_consts, push_consts)
 
     (mgr.sequence()
         .record(kp.OpTensorSyncDevice([tensor]))