From fa5dc43b443f41c6438766036fc8b58aa3bfadcc Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sat, 6 Mar 2021 12:02:08 +0000
Subject: [PATCH 01/16] Updated compile_shader to compileShader

---
 examples/array_multiplication/src/Main.cpp           |  2 +-
 .../kompute_summator/KomputeSummatorNode.cpp         |  2 +-
 .../gdnative_shared/src/KomputeSummator.cpp          |  2 +-
 single_include/kompute/Kompute.hpp                   |  4 ++--
 src/Shader.cpp                                       |  6 +++---
 src/include/kompute/Shader.hpp                       |  4 ++--
 test/TestAsyncOperations.cpp                         |  4 ++--
 test/TestDestroy.cpp                                 |  6 +++---
 test/TestMultipleAlgoExecutions.cpp                  | 12 ++++++------
 test/TestOpShadersFromStringAndFile.cpp              |  2 +-
 test/TestPushConstant.cpp                            |  6 +++---
 test/TestSequence.cpp                                |  2 +-
 test/TestShaderResources.cpp                         |  2 +-
 test/TestSpecializationConstant.cpp                  |  2 +-
 14 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/examples/array_multiplication/src/Main.cpp b/examples/array_multiplication/src/Main.cpp
index fd823bca8..acb76898c 100755
--- a/examples/array_multiplication/src/Main.cpp
+++ b/examples/array_multiplication/src/Main.cpp
@@ -39,7 +39,7 @@ int main()
 
     std::vector<std::shared_ptr<kp::Tensor>> params = { tensorInA, tensorInB, tensorOut };
 
-    std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(params, kp::Shader::compile_source(shader));
+    std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(params, kp::Shader::compileSource(shader));
 
     mgr.sequence()
         ->record<kp::OpTensorSyncDevice>(params)
diff --git a/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp b/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp
index f50c56d5c..e901ef816 100644
--- a/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp
+++ b/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp
@@ -54,7 +54,7 @@ void KomputeSummatorNode::_init() {
         std::shared_ptr<kp::Algorithm> algo =
           mgr.algorithm(
                 { this->mPrimaryTensor, this->mSecondaryTensor },
-                kp::Shader::compile_source(shader));
+                kp::Shader::compileSource(shader));
 
 
         // First we ensure secondary tensor loads to GPU
diff --git a/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp b/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp
index ece095c8e..99aabb338 100644
--- a/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp
+++ b/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp
@@ -58,7 +58,7 @@ void KomputeSummator::_init() {
         // Then we run the operation with both tensors
         this->mSequence->record<kp::OpAlgoCreate>(
             { this->mPrimaryTensor, this->mSecondaryTensor }, 
-            kp::Shader::compile_source(shader));
+            kp::Shader::compileSource(shader));
 
         // We map the result back to local 
         this->mSequence->record<kp::OpTensorSyncLocal>(
diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp
index 38213bb6e..593390dbe 100755
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@@ -762,7 +762,7 @@ class Shader
      * GLSL compiler
      * @return The compiled SPIR-V binary in unsigned int32 format
      */
-    static std::vector<uint32_t> compile_sources(
+    static std::vector<uint32_t> compileSources(
       const std::vector<std::string>& sources,
       const std::vector<std::string>& files = {},
       const std::string& entryPoint = "main",
@@ -783,7 +783,7 @@ class Shader
      * GLSL compiler
      * @return The compiled SPIR-V binary in unsigned int32 format
      */
-    static std::vector<uint32_t> compile_source(
+    static std::vector<uint32_t> compileSource(
       const std::string& source,
       const std::string& entryPoint = "main",
       std::vector<std::pair<std::string, std::string>> definitions = {},
diff --git a/src/Shader.cpp b/src/Shader.cpp
index 968e53234..bedac0165 100644
--- a/src/Shader.cpp
+++ b/src/Shader.cpp
@@ -5,7 +5,7 @@
 namespace kp {
 
 std::vector<uint32_t>
-Shader::compile_sources(
+Shader::compileSources(
   const std::vector<std::string>& sources,
   const std::vector<std::string>& files,
   const std::string& entryPoint,
@@ -92,13 +92,13 @@ Shader::compile_sources(
 }
 
 std::vector<uint32_t>
-Shader::compile_source(
+Shader::compileSource(
   const std::string& source,
   const std::string& entryPoint,
   std::vector<std::pair<std::string, std::string>> definitions,
   const TBuiltInResource& resource)
 {
-    return compile_sources({ source },
+    return compileSources({ source },
                            std::vector<std::string>({}),
                            entryPoint,
                            definitions,
diff --git a/src/include/kompute/Shader.hpp b/src/include/kompute/Shader.hpp
index 9fd1709be..9ecab24cd 100644
--- a/src/include/kompute/Shader.hpp
+++ b/src/include/kompute/Shader.hpp
@@ -39,7 +39,7 @@ class Shader
      * GLSL compiler
      * @return The compiled SPIR-V binary in unsigned int32 format
      */
-    static std::vector<uint32_t> compile_sources(
+    static std::vector<uint32_t> compileSources(
       const std::vector<std::string>& sources,
       const std::vector<std::string>& files = {},
       const std::string& entryPoint = "main",
@@ -60,7 +60,7 @@ class Shader
      * GLSL compiler
      * @return The compiled SPIR-V binary in unsigned int32 format
      */
-    static std::vector<uint32_t> compile_source(
+    static std::vector<uint32_t> compileSource(
       const std::string& source,
       const std::string& entryPoint = "main",
       std::vector<std::pair<std::string, std::string>> definitions = {},
diff --git a/test/TestAsyncOperations.cpp b/test/TestAsyncOperations.cpp
index b1919ce52..2f8c7d819 100644
--- a/test/TestAsyncOperations.cpp
+++ b/test/TestAsyncOperations.cpp
@@ -37,7 +37,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
         }
     )");
 
-    std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
+    std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
 
     std::vector<float> data(size, 0.0);
     std::vector<float> resultSync(size, 100000000);
@@ -145,7 +145,7 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution)
         }
     )");
 
-    std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
+    std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
 
     std::vector<float> data(size, 0.0);
     std::vector<float> resultAsync(size, 100000000);
diff --git a/test/TestDestroy.cpp b/test/TestDestroy.cpp
index fee3854c4..0b948d64f 100644
--- a/test/TestDestroy.cpp
+++ b/test/TestDestroy.cpp
@@ -16,7 +16,7 @@ TEST(TestDestroy, TestDestroyTensorSingle)
           pa[index] = pa[index] + 1;
       })");
 
-    std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
+    std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
 
     {
         std::shared_ptr<kp::Sequence> sq = nullptr;
@@ -57,7 +57,7 @@ TEST(TestDestroy, TestDestroyTensorVector)
           pa[index] = pa[index] + 1;
           pb[index] = pb[index] + 2;
       })");
-    std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
+    std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
 
     {
         std::shared_ptr<kp::Sequence> sq = nullptr;
@@ -101,7 +101,7 @@ TEST(TestDestroy, TestDestroySequenceSingle)
           pa[index] = pa[index] + 1;
       })");
 
-    std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
+    std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
 
     {
         std::shared_ptr<kp::Sequence> sq = nullptr;
diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp
index b94591308..63dd5f7fe 100644
--- a/test/TestMultipleAlgoExecutions.cpp
+++ b/test/TestMultipleAlgoExecutions.cpp
@@ -49,7 +49,7 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality)
     kp::Constants pushConstsB({ 3.0 });
 
     auto algorithm = mgr.algorithm(
-      params, kp::Shader::compile_source(shader), workgroup, specConsts, pushConstsA);
+      params, kp::Shader::compileSource(shader), workgroup, specConsts, pushConstsA);
 
     // 3. Run operation with string shader synchronously
     mgr.sequence()
@@ -84,7 +84,7 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord)
           pa[index] = pa[index] + 1;
       })");
 
-    std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
+    std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
 
     {
         mgr.sequence()
@@ -114,7 +114,7 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords)
           pa[index] = pa[index] + 1;
       })");
 
-    std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
+    std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
 
     std::shared_ptr<kp::Algorithm> algorithm =
       mgr.algorithm({ tensorA }, spirv);
@@ -150,7 +150,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences)
           pa[index] = pa[index] + 1;
       })");
 
-    std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
+    std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
 
     std::shared_ptr<kp::Algorithm> algorithm =
       mgr.algorithm({ tensorA }, spirv);
@@ -185,7 +185,7 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)
           pa[index] = pa[index] + 1;
       })");
 
-    std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
+    std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
 
     std::shared_ptr<kp::Algorithm> algorithm =
       mgr.algorithm({ tensorA }, spirv);
@@ -221,7 +221,7 @@ TEST(TestMultipleAlgoExecutions, SequenceAlgoDestroyOutsideManagerScope)
                   pa[index] = pa[index] + 1;
               })");
 
-            std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
+            std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
 
             std::shared_ptr<kp::Algorithm> algorithm =
               mgr.algorithm({ tensorA }, spirv);
diff --git a/test/TestOpShadersFromStringAndFile.cpp b/test/TestOpShadersFromStringAndFile.cpp
index 3e6856a21..e766c8efb 100644
--- a/test/TestOpShadersFromStringAndFile.cpp
+++ b/test/TestOpShadersFromStringAndFile.cpp
@@ -27,7 +27,7 @@ TEST(TestOpAlgoCreate, ShaderRawDataFromConstructor)
         }
     )");
 
-    std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
+    std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
 
     std::vector<std::shared_ptr<kp::Tensor>> params = { tensorA, tensorB };
 
diff --git a/test/TestPushConstant.cpp b/test/TestPushConstant.cpp
index f51f8cc42..b37fe4d72 100644
--- a/test/TestPushConstant.cpp
+++ b/test/TestPushConstant.cpp
@@ -22,7 +22,7 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchOverride)
               pa[2] += pcs.z;
           })");
 
-        std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
+        std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
 
         std::shared_ptr<kp::Sequence> sq = nullptr;
 
@@ -65,7 +65,7 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchNoOverride)
               pa[2] += pcs.z;
           })");
 
-        std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
+        std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
 
         std::shared_ptr<kp::Sequence> sq = nullptr;
 
@@ -108,7 +108,7 @@ TEST(TestPushConstants, TestConstantsWrongSize)
               pa[2] += pcs.z;
           })");
 
-        std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
+        std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
 
         std::shared_ptr<kp::Sequence> sq = nullptr;
 
diff --git a/test/TestSequence.cpp b/test/TestSequence.cpp
index b8afd1ad6..7d70a477b 100644
--- a/test/TestSequence.cpp
+++ b/test/TestSequence.cpp
@@ -66,7 +66,7 @@ TEST(TestSequence, RerecordSequence)
 
     sq->eval<kp::OpTensorSyncDevice>({ tensorA, tensorB, tensorOut });
 
-    std::vector<uint32_t> spirv = kp::Shader::compile_source(R"(
+    std::vector<uint32_t> spirv = kp::Shader::compileSource(R"(
         #version 450
 
         layout (local_size_x = 1) in;
diff --git a/test/TestShaderResources.cpp b/test/TestShaderResources.cpp
index b0013ef80..536f4ca0c 100644
--- a/test/TestShaderResources.cpp
+++ b/test/TestShaderResources.cpp
@@ -25,7 +25,7 @@ static const std::string shaderString = (R"(
 )");
 
 void compileShaderWithGivenResources(const std::string shaderString, const TBuiltInResource resources) {
-    kp::Shader::compile_source(shaderString,  std::string("main"), std::vector<std::pair<std::string,std::string>>({}), resources);
+    kp::Shader::compileSource(shaderString,  std::string("main"), std::vector<std::pair<std::string,std::string>>({}), resources);
 }
 
 
diff --git a/test/TestSpecializationConstant.cpp b/test/TestSpecializationConstant.cpp
index e66f9d52e..2c6e284d2 100644
--- a/test/TestSpecializationConstant.cpp
+++ b/test/TestSpecializationConstant.cpp
@@ -18,7 +18,7 @@ TEST(TestSpecializationConstants, TestTwoConstants)
               pb[index] = cTwo;
           })");
 
-        std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
+        std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
 
         std::shared_ptr<kp::Sequence> sq = nullptr;
 

From f569bae998a8805f23f8ca7d53ec2c4c9b142dce Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sat, 6 Mar 2021 12:02:26 +0000
Subject: [PATCH 02/16] Updated python docstrings

---
 python/src/docstrings.hpp | 145 ++++++++++++++++++++++++--------------
 python/src/main.cpp       | 137 +++++++++++++++++++++--------------
 2 files changed, 178 insertions(+), 104 deletions(-)

diff --git a/python/src/docstrings.hpp b/python/src/docstrings.hpp
index bf98e6581..a5bda0a4d 100644
--- a/python/src/docstrings.hpp
+++ b/python/src/docstrings.hpp
@@ -247,8 +247,10 @@ static const char *__doc_kp_Manager_sequence =
 R"doc(Create a managed sequence that will be destroyed by this manager if it
 hasn't been destroyed by its reference count going to zero.
 
-@param queueIndex The queue to use from the available queues @returns
-Shared pointer with initialised sequence)doc";
+@param queueIndex The queue to use from the available queues @param
+nrOfTimestamps The maximum number of timestamps to allocate. If zero
+(default), disables latching of timestamps. @returns Shared pointer
+with initialised sequence)doc";
 
 static const char *__doc_kp_Manager_tensor =
 R"doc(Create a managed tensor that will be destroyed by this manager if it
@@ -264,18 +266,26 @@ of algorithm and parameter components which can be used with shaders.
 By default it enables the user to provide a dynamic number of tensors
 which are then passed as inputs.)doc";
 
-static const char *__doc_kp_OpAlgoDispatch_OpAlgoDispatch = R"doc()doc";
+static const char *__doc_kp_OpAlgoDispatch_OpAlgoDispatch =
+R"doc(Constructor that stores the algorithm to use as well as the relevant
+push constants to override when recording.
+
+@param algorithm The algorithm object to use for dispatch @param
+pushConstants The push constants to use for override)doc";
 
 static const char *__doc_kp_OpAlgoDispatch_mAlgorithm = R"doc()doc";
 
 static const char *__doc_kp_OpAlgoDispatch_mPushConstants = R"doc()doc";
 
 static const char *__doc_kp_OpAlgoDispatch_postEval =
-R"doc(Executes after the recorded commands are submitted, and performs a
-copy of the GPU Device memory into the staging buffer so the output
-data can be retrieved.)doc";
+R"doc(Does not perform any postEval commands.
 
-static const char *__doc_kp_OpAlgoDispatch_preEval = R"doc(Does not perform any preEval commands.)doc";
+@param commandBuffer The command buffer to record the command into.)doc";
+
+static const char *__doc_kp_OpAlgoDispatch_preEval =
+R"doc(Does not perform any preEval commands.
+
+@param commandBuffer The command buffer to record the command into.)doc";
 
 static const char *__doc_kp_OpAlgoDispatch_record =
 R"doc(This records the commands that are to be sent to the GPU. This
@@ -283,7 +293,9 @@ includes the barriers that ensure the memory has been copied before
 going in and out of the shader, as well as the dispatch operation that
 sends the shader processing to the gpu. This function also records the
 GPU memory copy of the output data for the staging buffer so it can be
-read by the host.)doc";
+read by the host.
+
+@param commandBuffer The command buffer to record the command into.)doc";
 
 static const char *__doc_kp_OpBase =
 R"doc(Base Operation which provides the high level interface that Kompute
@@ -299,7 +311,9 @@ the commands to the GPU for processing, and can be used to perform any
 tear-down steps required as the computation iteration finishes. It's
 worth noting that there are situations where eval can be called
 multiple times, so the resources that are destroyed should not require
-a re-init unless explicitly provided by the user.)doc";
+a re-init unless explicitly provided by the user.
+
+@param commandBuffer The command buffer to record the command into.)doc";
 
 static const char *__doc_kp_OpBase_preEval =
 R"doc(Pre eval is called before the Sequence has called eval and submitted
@@ -307,12 +321,16 @@ the commands to the GPU for processing, and can be used to perform any
 per-eval setup steps required as the computation iteration begins.
 It's worth noting that there are situations where eval can be called
 multiple times, so the resources that are created should be idempotent
-in case it's called multiple times in a row.)doc";
+in case it's called multiple times in a row.
+
+@param commandBuffer The command buffer to record the command into.)doc";
 
 static const char *__doc_kp_OpBase_record =
 R"doc(The record function is intended to only send a record command or run
 commands that are expected to record operations that are to be
-submitted as a batch into the GPU.)doc";
+submitted as a batch into the GPU.
+
+@param commandBuffer The command buffer to record the command into.)doc";
 
 static const char *__doc_kp_OpMult =
 R"doc(Operation that performs multiplication on two tensors and outpus on
@@ -323,12 +341,9 @@ R"doc(Default constructor with parameters that provides the bare minimum
 requirements for the operations to be able to create and manage their
 sub-components.
 
-@param physicalDevice Vulkan physical device used to find device
-queues @param device Vulkan logical device for passing to Algorithm
-@param commandBuffer Vulkan Command Buffer to record commands into
 @param tensors Tensors that are to be used in this operation @param
-komputeWorkgroup Optional parameter to specify the layout for
-processing)doc";
+algorithm An algorithm that will be overridden with the OpMult shader
+data and the tensors provided which are expected to be 3)doc";
 
 static const char *__doc_kp_OpTensorCopy =
 R"doc(Operation that copies the data from the first tensor to the rest of
@@ -340,84 +355,95 @@ static const char *__doc_kp_OpTensorCopy_OpTensorCopy =
 R"doc(Default constructor with parameters that provides the core vulkan
 resources and the tensors that will be used in the operation.
 
-@param physicalDevice Vulkan physical device used to find device
-queues @param device Vulkan logical device for passing to Algorithm
-@param commandBuffer Vulkan Command Buffer to record commands into
 @param tensors Tensors that will be used to create in operation.)doc";
 
 static const char *__doc_kp_OpTensorCopy_mTensors = R"doc()doc";
 
 static const char *__doc_kp_OpTensorCopy_postEval =
 R"doc(Copies the local vectors for all the tensors to sync the data with the
-gpu.)doc";
+gpu.
 
-static const char *__doc_kp_OpTensorCopy_preEval = R"doc(Does not perform any preEval commands.)doc";
+@param commandBuffer The command buffer to record the command into.)doc";
+
+static const char *__doc_kp_OpTensorCopy_preEval =
+R"doc(Does not perform any preEval commands.
+
+@param commandBuffer The command buffer to record the command into.)doc";
 
 static const char *__doc_kp_OpTensorCopy_record =
 R"doc(Records the copy commands from the first tensor into all the other
-tensors provided. Also optionally records a barrier.)doc";
+tensors provided. Also optionally records a barrier.
+
+@param commandBuffer The command buffer to record the command into.)doc";
 
 static const char *__doc_kp_OpTensorSyncDevice =
 R"doc(Operation that syncs tensor's device by mapping local data into the
 device memory. For TensorTypes::eDevice it will use a record operation
 for the memory to be syncd into GPU memory which means that the
 operation will be done in sync with GPU commands. For
-TensorTypes::eStaging it will only map the data into host memory which
+TensorTypes::eHost it will only map the data into host memory which
 will happen during preEval before the recorded commands are
-dispatched. This operation won't have any effect on
-TensorTypes::eStaging.)doc";
+dispatched.)doc";
 
 static const char *__doc_kp_OpTensorSyncDevice_OpTensorSyncDevice =
 R"doc(Default constructor with parameters that provides the core vulkan
 resources and the tensors that will be used in the operation. The
 tensos provided cannot be of type TensorTypes::eStorage.
 
-@param physicalDevice Vulkan physical device used to find device
-queues @param device Vulkan logical device for passing to Algorithm
-@param commandBuffer Vulkan Command Buffer to record commands into
 @param tensors Tensors that will be used to create in operation.)doc";
 
 static const char *__doc_kp_OpTensorSyncDevice_mTensors = R"doc()doc";
 
-static const char *__doc_kp_OpTensorSyncDevice_postEval = R"doc(Does not perform any postEval commands.)doc";
+static const char *__doc_kp_OpTensorSyncDevice_postEval =
+R"doc(Does not perform any postEval commands.
 
-static const char *__doc_kp_OpTensorSyncDevice_preEval = R"doc(Does not perform any preEval commands.)doc";
+@param commandBuffer The command buffer to record the command into.)doc";
+
+static const char *__doc_kp_OpTensorSyncDevice_preEval =
+R"doc(Does not perform any preEval commands.
+
+@param commandBuffer The command buffer to record the command into.)doc";
 
 static const char *__doc_kp_OpTensorSyncDevice_record =
 R"doc(For device tensors, it records the copy command for the tensor to copy
-the data from its staging to device memory.)doc";
+the data from its staging to device memory.
+
+@param commandBuffer The command buffer to record the command into.)doc";
 
 static const char *__doc_kp_OpTensorSyncLocal =
 R"doc(Operation that syncs tensor's local memory by mapping device data into
 the local CPU memory. For TensorTypes::eDevice it will use a record
 operation for the memory to be syncd into GPU memory which means that
 the operation will be done in sync with GPU commands. For
-TensorTypes::eStaging it will only map the data into host memory which
+TensorTypes::eHost it will only map the data into host memory which
 will happen during preEval before the recorded commands are
-dispatched. This operation won't have any effect on
-TensorTypes::eStaging.)doc";
+dispatched.)doc";
 
 static const char *__doc_kp_OpTensorSyncLocal_OpTensorSyncLocal =
 R"doc(Default constructor with parameters that provides the core vulkan
 resources and the tensors that will be used in the operation. The
 tensors provided cannot be of type TensorTypes::eStorage.
 
-@param physicalDevice Vulkan physical device used to find device
-queues @param device Vulkan logical device for passing to Algorithm
-@param commandBuffer Vulkan Command Buffer to record commands into
 @param tensors Tensors that will be used to create in operation.)doc";
 
 static const char *__doc_kp_OpTensorSyncLocal_mTensors = R"doc()doc";
 
 static const char *__doc_kp_OpTensorSyncLocal_postEval =
 R"doc(For host tensors it performs the map command from the host memory into
-local memory.)doc";
+local memory.
 
-static const char *__doc_kp_OpTensorSyncLocal_preEval = R"doc(Does not perform any preEval commands.)doc";
+@param commandBuffer The command buffer to record the command into.)doc";
+
+static const char *__doc_kp_OpTensorSyncLocal_preEval =
+R"doc(Does not perform any preEval commands.
+
+@param commandBuffer The command buffer to record the command into.)doc";
 
 static const char *__doc_kp_OpTensorSyncLocal_record =
 R"doc(For device tensors, it records the copy command for the tensor to copy
-the data from its device to staging memory.)doc";
+the data from its device to staging memory.
+
+@param commandBuffer The command buffer to record the command into.)doc";
 
 static const char *__doc_kp_Sequence = R"doc(Container of operations that can be sent to GPU as batch)doc";
 
@@ -427,7 +453,8 @@ generate all dependent resources.
 
 @param physicalDevice Vulkan physical device @param device Vulkan
 logical device @param computeQueue Vulkan compute queue @param
-queueIndex Vulkan compute queue index in device)doc";
+queueIndex Vulkan compute queue index in device @param totalTimestamps
+Maximum number of timestamps to allocate)doc";
 
 static const char *__doc_kp_Sequence_begin =
 R"doc(Begins recording commands for commands to be submitted into the
@@ -443,6 +470,8 @@ static const char *__doc_kp_Sequence_createCommandBuffer = R"doc()doc";
 
 static const char *__doc_kp_Sequence_createCommandPool = R"doc()doc";
 
+static const char *__doc_kp_Sequence_createTimestampQueryPool = R"doc()doc";
+
 static const char *__doc_kp_Sequence_destroy =
 R"doc(Destroys and frees the GPU resources which include the buffer and
 memory and sets the sequence as init=False.)doc";
@@ -528,6 +557,10 @@ finishes, it runs the postEval of all operations.
 @param waitFor Number of milliseconds to wait before timing out.
 @return shared_ptr<Sequence> of the Sequence class itself)doc";
 
+static const char *__doc_kp_Sequence_getTimestamps =
+R"doc(Return the timestamps that were latched at the beginning and after
+each operation during the last eval() call.)doc";
+
 static const char *__doc_kp_Sequence_isInit =
 R"doc(Returns true if the sequence has been initialised, and it's based on
 the GPU resources being refrenced.
@@ -607,9 +640,11 @@ R"doc(Clears command buffer and triggers re-record of all the current
 operations saved, which is useful if the underlying kp::Tensors or
 kp::Algorithms are modified and need to be re-recorded.)doc";
 
+static const char *__doc_kp_Sequence_timestampQueryPool = R"doc()doc";
+
 static const char *__doc_kp_Shader = R"doc(Shader utily class with functions to compile and process glsl files.)doc";
 
-static const char *__doc_kp_Shader_compile_source =
+static const char *__doc_kp_Shader_compileSource =
 R"doc(Compile a single glslang source from string value. Currently this
 function uses the glslang C++ interface which is not thread safe so
 this funciton should not be called from multiple threads concurrently.
@@ -622,7 +657,7 @@ List of pairs containing key value definitions @param resourcesLimit A
 list that contains the resource limits for the GLSL compiler @return
 The compiled SPIR-V binary in unsigned int32 format)doc";
 
-static const char *__doc_kp_Shader_compile_sources =
+static const char *__doc_kp_Shader_compileSources =
 R"doc(Compile multiple sources with optional filenames. Currently this
 function uses the glslang C++ interface which is not thread safe so
 this funciton should not be called from multiple threads concurrently.
@@ -645,11 +680,13 @@ buffer, which would be used to store their respective data. The
 tensors can be used for GPU data storage or transfer.)doc";
 
 static const char *__doc_kp_Tensor_Tensor =
-R"doc(Default constructor with data provided which would be used to create
-the respective vulkan buffer and memory.
+R"doc(Constructor with data provided which would be used to create the
+respective vulkan buffer and memory.
 
+@param physicalDevice The physical device to use to fetch properties
+@param device The device to use to create the buffer and memory from
 @param data Non-zero-sized vector of data that will be used by the
-tensor @param tensorType Type for the tensor which is of type
+tensor @param tensorTypes Type for the tensor which is of type
 TensorTypes)doc";
 
 static const char *__doc_kp_Tensor_TensorTypes =
@@ -697,7 +734,11 @@ static const char *__doc_kp_Tensor_getStagingBufferUsageFlags = R"doc()doc";
 
 static const char *__doc_kp_Tensor_getStagingMemoryPropertyFlags = R"doc()doc";
 
-static const char *__doc_kp_Tensor_isInit = R"doc()doc";
+static const char *__doc_kp_Tensor_isInit =
+R"doc(Check whether tensor is initialized based on the created gpu
+resources.
+
+@returns Boolean stating whether tensor is initialized)doc";
 
 static const char *__doc_kp_Tensor_mData = R"doc()doc";
 
@@ -742,11 +783,11 @@ vector's.
 Returns the element in the position requested.)doc";
 
 static const char *__doc_kp_Tensor_rebuild =
-R"doc(Initialiser which calls the initialisation for all the respective
-tensors as well as creates the respective staging tensors. The staging
-tensors would only be created for the tensors of type
-TensorType::eDevice as otherwise there is no need to copy from host
-memory.)doc";
+R"doc(Function to trigger reinitialisation of the tensor buffer and memory
+with new data as well as new potential device type.
+
+@param data Vector of data to use to initialise vector from @param
+tensorType The type to use for the tensor)doc";
 
 static const char *__doc_kp_Tensor_recordBufferMemoryBarrier =
 R"doc(Records the buffer memory barrier into the command buffer which
diff --git a/python/src/main.cpp b/python/src/main.cpp
index 7165d41e7..d4b0f2084 100644
--- a/python/src/main.cpp
+++ b/python/src/main.cpp
@@ -26,9 +26,9 @@ PYBIND11_MODULE(kp, m) {
     py::module_ np = py::module_::import("numpy");
 
     py::enum_<kp::Tensor::TensorTypes>(m, "TensorTypes")
-        .value("device", kp::Tensor::TensorTypes::eDevice, "Tensor holding data in GPU memory.")
-        .value("host", kp::Tensor::TensorTypes::eHost, "Tensor used for CPU visible GPU data.")
-        .value("storage", kp::Tensor::TensorTypes::eStorage, "Tensor with host visible gpu memory.")
+        .value("device", kp::Tensor::TensorTypes::eDevice, DOC(kp, Tensor, TensorTypes, eDevice))
+        .value("host", kp::Tensor::TensorTypes::eHost, DOC(kp, Tensor, TensorTypes, eHost))
+        .value("storage", kp::Tensor::TensorTypes::eStorage, DOC(kp, Tensor, TensorTypes, eStorage))
         .export_values();
 
 #if !defined(KOMPUTE_DISABLE_SHADER_UTILS) || !KOMPUTE_DISABLE_SHADER_UTILS
@@ -37,51 +37,63 @@ PYBIND11_MODULE(kp, m) {
                                     const std::string& source,
                                     const std::string& entryPoint,
                                     const std::vector<std::pair<std::string,std::string>>& definitions) {
-                std::vector<uint32_t> spirv = kp::Shader::compile_source(source, entryPoint, definitions);
+                std::vector<uint32_t> spirv = kp::Shader::compileSource(source, entryPoint, definitions);
                 return py::bytes((const char*)spirv.data(), spirv.size() * sizeof(uint32_t));
             },
-            "Compiles string source provided and returns the value in bytes",
-            py::arg("source"), py::arg("entryPoint") = "main", py::arg("definitions") = std::vector<std::pair<std::string,std::string>>() )
+            DOC(kp, Shader, compileSource),
+            py::arg("source"),
+            py::arg("entryPoint") = "main",
+            py::arg("definitions") = std::vector<std::pair<std::string,std::string>>() )
         .def_static("compile_sources", [](
                                     const std::vector<std::string>& source,
                                     const std::vector<std::string>& files,
                                     const std::string& entryPoint,
                                     const std::vector<std::pair<std::string,std::string>>& definitions) {
-                std::vector<uint32_t> spirv = kp::Shader::compile_sources(source, files, entryPoint, definitions);
+                std::vector<uint32_t> spirv = kp::Shader::compileSources(source, files, entryPoint, definitions);
                 return py::bytes((const char*)spirv.data(), spirv.size() * sizeof(uint32_t));
             },
-            "Compiles sources provided with file names and returns the value in bytes",
-            py::arg("sources"), py::arg("files") = std::vector<std::string>(), py::arg("entryPoint") = "main", py::arg("definitions") = std::vector<std::pair<std::string,std::string>>() );
+            DOC(kp, Shader, compileSources),
+            py::arg("sources"),
+            py::arg("files") = std::vector<std::string>(),
+            py::arg("entryPoint") = "main",
+            py::arg("definitions") = std::vector<std::pair<std::string,std::string>>() );
 #endif // KOMPUTE_DISABLE_SHADER_UTILS
 
-    py::class_<kp::OpBase, std::shared_ptr<kp::OpBase>>(m, "OpBase");
+    py::class_<kp::OpBase, std::shared_ptr<kp::OpBase>>(m, "OpBase", DOC(kp, OpBase));
 
-    py::class_<kp::OpTensorSyncDevice, std::shared_ptr<kp::OpTensorSyncDevice>>(m, "OpTensorSyncDevice", py::base<kp::OpBase>())
-        .def(py::init<const std::vector<std::shared_ptr<kp::Tensor>>&>());
+    py::class_<kp::OpTensorSyncDevice, std::shared_ptr<kp::OpTensorSyncDevice>>(
+            m, "OpTensorSyncDevice", py::base<kp::OpBase>(), DOC(kp, OpTensorSyncDevice))
+        .def(py::init<const std::vector<std::shared_ptr<kp::Tensor>>&>(), DOC(kp, OpTensorSyncDevice, OpTensorSyncDevice));
 
-    py::class_<kp::OpTensorSyncLocal, std::shared_ptr<kp::OpTensorSyncLocal>>(m, "OpTensorSyncLocal", py::base<kp::OpBase>())
-        .def(py::init<const std::vector<std::shared_ptr<kp::Tensor>>&>());
+    py::class_<kp::OpTensorSyncLocal, std::shared_ptr<kp::OpTensorSyncLocal>>(
+            m, "OpTensorSyncLocal", py::base<kp::OpBase>(), DOC(kp, OpTensorSyncLocal))
+        .def(py::init<const std::vector<std::shared_ptr<kp::Tensor>>&>(), DOC(kp, OpTensorSyncLocal, OpTensorSyncLocal));
 
-    py::class_<kp::OpTensorCopy, std::shared_ptr<kp::OpTensorCopy>>(m, "OpTensorCopy", py::base<kp::OpBase>())
-        .def(py::init<const std::vector<std::shared_ptr<kp::Tensor>>&>());
+    py::class_<kp::OpTensorCopy, std::shared_ptr<kp::OpTensorCopy>>(
+            m, "OpTensorCopy", py::base<kp::OpBase>(), DOC(kp, OpTensorCopy))
+        .def(py::init<const std::vector<std::shared_ptr<kp::Tensor>>&>(), DOC(kp, OpTensorCopy, OpTensorCopy));
 
-    py::class_<kp::OpAlgoDispatch, std::shared_ptr<kp::OpAlgoDispatch>>(m, "OpAlgoDispatch", py::base<kp::OpBase>())
+    py::class_<kp::OpAlgoDispatch, std::shared_ptr<kp::OpAlgoDispatch>>(
+            m, "OpAlgoDispatch", py::base<kp::OpBase>(), DOC(kp, OpAlgoDispatch))
         .def(py::init<const std::shared_ptr<kp::Algorithm>&,const kp::Constants&>(),
+                DOC(kp, OpAlgoDispatch, OpAlgoDispatch),
                 py::arg("algorithm"), py::arg("push_consts") = kp::Constants());
 
-    py::class_<kp::OpMult, std::shared_ptr<kp::OpMult>>(m, "OpMult", py::base<kp::OpBase>())
-        .def(py::init<const std::vector<std::shared_ptr<kp::Tensor>>&,const std::shared_ptr<kp::Algorithm>&>());
+    py::class_<kp::OpMult, std::shared_ptr<kp::OpMult>>(
+            m, "OpMult", py::base<kp::OpBase>(), DOC(kp, OpMult))
+        .def(py::init<const std::vector<std::shared_ptr<kp::Tensor>>&,const std::shared_ptr<kp::Algorithm>&>(),
+                DOC(kp, OpMult, OpMult));
 
-    py::class_<kp::Algorithm, std::shared_ptr<kp::Algorithm>>(m, "Algorithm")
-        .def("get_tensors", &kp::Algorithm::getTensors)
-        .def("destroy", &kp::Algorithm::destroy)
-        .def("get_spec_consts", &kp::Algorithm::getSpecializationConstants)
-        .def("is_init", &kp::Algorithm::isInit);
+    py::class_<kp::Algorithm, std::shared_ptr<kp::Algorithm>>(m, "Algorithm", DOC(kp, Algorithm, Algorithm))
+        .def("get_tensors", &kp::Algorithm::getTensors, DOC(kp, Algorithm, getTensors))
+        .def("destroy", &kp::Algorithm::destroy, DOC(kp, Algorithm, destroy))
+        .def("get_spec_consts", &kp::Algorithm::getSpecializationConstants, DOC(kp, Algorithm, getSpecializationConstants))
+        .def("is_init", &kp::Algorithm::isInit, DOC(kp, Algorithm, isInit));
 
     py::class_<kp::Tensor, std::shared_ptr<kp::Tensor>>(m, "Tensor", DOC(kp, Tensor))
         .def("data", [](kp::Tensor& self) {
                 return py::array(self.data().size(), self.data().data());
-            }, "Returns stored data as a new numpy array.")
+            }, DOC(kp, Tensor, data))
         .def("__getitem__", [](kp::Tensor &self, size_t index) -> float { return self.data()[index]; },
                 "When only an index is necessary")
         .def("__setitem__", [](kp::Tensor &self, size_t index, float value) {
@@ -91,7 +103,7 @@ PYBIND11_MODULE(kp, m) {
                 const py::buffer_info info        = flatdata.request();
                 const float* ptr                  = (float*) info.ptr;
                 self.setData(std::vector<float>(ptr, ptr+flatdata.size()));
-            }, "Overrides the data in the local Tensor memory.")
+            }, DOC(kp, Tensor, setData))
         .def("__iter__", [](kp::Tensor &self) {
                 return py::make_iterator(self.data().begin(), self.data().end());
             }, py::keep_alive<0, 1>(), // Required to keep alive iterator while exists
@@ -112,35 +124,52 @@ PYBIND11_MODULE(kp, m) {
                 }
                 return reversed;
             })
-        .def("size", &kp::Tensor::size, "Retrieves the size of the Tensor data as per the local Tensor memory.")
-        .def("__len__", &kp::Tensor::size, "Retrieves the size of the Tensor data as per the local Tensor memory.")
-        .def("tensor_type", &kp::Tensor::tensorType, "Retreves the memory type of the tensor.")
-        .def("is_init", &kp::Tensor::isInit, "Checks whether the tensor GPU memory has been initialised.")
-        .def("destroy", &kp::Tensor::destroy, "Destroy tensor GPU resources.");
+        .def("size", &kp::Tensor::size, DOC(kp, Tensor, size))
+        .def("__len__", &kp::Tensor::size, DOC(kp, Tensor, size))
+        .def("tensor_type", &kp::Tensor::tensorType, DOC(kp, Tensor, tensorType))
+        .def("is_init", &kp::Tensor::isInit, DOC(kp, Tensor, isInit))
+        .def("destroy", &kp::Tensor::destroy, DOC(kp, Tensor, destroy));
 
-    py::class_<kp::Sequence, std::shared_ptr<kp::Sequence>>(m, "Sequence")
-        .def("record", [](kp::Sequence& self, std::shared_ptr<kp::OpBase> op) { return self.record(op); })
-        .def("eval", [](kp::Sequence& self) { return self.eval(); })
-        .def("eval", [](kp::Sequence& self, std::shared_ptr<kp::OpBase> op) { return self.eval(op); })
-        .def("eval_async", [](kp::Sequence& self) { return self.eval(); })
-        .def("eval_async", [](kp::Sequence& self, std::shared_ptr<kp::OpBase> op) { return self.evalAsync(op); })
-        .def("eval_await", [](kp::Sequence& self) { return self.evalAwait(); })
-        .def("eval_await", [](kp::Sequence& self, uint32_t wait) { return self.evalAwait(wait); })
-        .def("is_recording", &kp::Sequence::isRecording)
-        .def("is_running", &kp::Sequence::isRunning)
-        .def("is_init", &kp::Sequence::isInit)
-        .def("get_timestamps", &kp::Sequence::getTimestamps)
-        .def("clear", &kp::Sequence::clear)
-        .def("destroy", &kp::Sequence::destroy);
+    py::class_<kp::Sequence, std::shared_ptr<kp::Sequence>>(m, "Sequence", DOC(kp, Sequence))
+        .def("record", [](kp::Sequence& self, std::shared_ptr<kp::OpBase> op) { return self.record(op); },
+                DOC(kp, Sequence, record))
+        .def("eval", [](kp::Sequence& self) { return self.eval(); },
+                DOC(kp, Sequence, eval))
+        .def("eval", [](kp::Sequence& self, std::shared_ptr<kp::OpBase> op) { return self.eval(op); },
+                DOC(kp, Sequence, eval))
+        .def("eval_async", [](kp::Sequence& self) { return self.eval(); },
+                DOC(kp, Sequence, evalAsync))
+        .def("eval_async", [](kp::Sequence& self, std::shared_ptr<kp::OpBase> op) { return self.evalAsync(op); },
+                DOC(kp, Sequence, evalAsync))
+        .def("eval_await", [](kp::Sequence& self) { return self.evalAwait(); },
+                DOC(kp, Sequence, evalAwait))
+        .def("eval_await", [](kp::Sequence& self, uint32_t wait) { return self.evalAwait(wait); },
+                DOC(kp, Sequence, evalAwait))
+        .def("is_recording", &kp::Sequence::isRecording,
+                DOC(kp, Sequence, isRecording))
+        .def("is_running", &kp::Sequence::isRunning,
+                DOC(kp, Sequence, isRunning))
+        .def("is_init", &kp::Sequence::isInit,
+                DOC(kp, Sequence, isInit))
+        .def("clear", &kp::Sequence::clear,
+                DOC(kp, Sequence, clear))
+        .def("rerecord", &kp::Sequence::rerecord,
+                DOC(kp, Sequence, rerecord))
+        .def("get_timestamps", &kp::Sequence::getTimestamps,
+            DOC(kp, Sequence, getTimestamps))
+        .def("destroy", &kp::Sequence::destroy,
+                DOC(kp, Sequence, destroy));
 
-    py::class_<kp::Manager, std::shared_ptr<kp::Manager>>(m, "Manager")
-        .def(py::init())
-        .def(py::init<uint32_t>())
+    py::class_<kp::Manager, std::shared_ptr<kp::Manager>>(m, "Manager", DOC(kp, Manager))
+        .def(py::init(), DOC(kp, Manager, Manager))
+        .def(py::init<uint32_t>(), DOC(kp, Manager, Manager_2))
         .def(py::init<uint32_t,const std::vector<uint32_t>&,const std::vector<std::string>&>(),
+                DOC(kp, Manager, Manager_2),
                 py::arg("device") = 0,
                 py::arg("family_queue_indices") = std::vector<uint32_t>(),
                 py::arg("desired_extensions") = std::vector<std::string>())
-        .def("sequence", &kp::Manager::sequence, py::arg("queue_index") = 0, py::arg("total_timestamps") = 0)
+        .def("sequence", &kp::Manager::sequence, DOC(kp, Manager, sequence),
+                py::arg("queue_index") = 0, py::arg("total_timestamps") = 0)
         .def("tensor", [np](kp::Manager& self,
                             const py::array_t<float> data,
                             kp::Tensor::TensorTypes tensor_type) {
@@ -149,7 +178,7 @@ PYBIND11_MODULE(kp, m) {
                 const float* ptr                  = (float*) info.ptr;
                 return self.tensor(std::vector<float>(ptr, ptr+flatdata.size()), tensor_type);
             },
-            "Tensor initialisation function with data and tensor type",
+            DOC(kp, Manager, tensor),
             py::arg("data"), py::arg("tensor_type") = kp::Tensor::TensorTypes::eDevice)
         .def("algorithm", [](kp::Manager& self,
                              const std::vector<std::shared_ptr<kp::Tensor>>& tensors,
@@ -163,8 +192,12 @@ PYBIND11_MODULE(kp, m) {
                     std::vector<uint32_t> spirvVec((uint32_t*)data, (uint32_t*)(data + length));
                     return self.algorithm(tensors, spirvVec, workgroup, spec_consts, push_consts);
                 },
-            "Algorithm initialisation function",
-            py::arg("tensors"), py::arg("spirv"), py::arg("workgroup") = kp::Workgroup(), py::arg("spec_consts") = kp::Constants(), py::arg("push_consts") = kp::Constants());
+            DOC(kp, Manager, algorithm),
+            py::arg("tensors"),
+            py::arg("spirv"),
+            py::arg("workgroup") = kp::Workgroup(),
+            py::arg("spec_consts") = kp::Constants(),
+            py::arg("push_consts") = kp::Constants());
 
 #ifdef VERSION_INFO
     m.attr("__version__") = VERSION_INFO;

From b81896a78062fb53b56ecadadb66936e434879bd Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sat, 6 Mar 2021 16:15:03 +0000
Subject: [PATCH 03/16] Innitial iteration of multiple type tensor

---
 src/Tensor.cpp                 | 101 +++----------
 src/include/kompute/Tensor.hpp | 268 +++++++++++++++++++++++++++------
 2 files changed, 243 insertions(+), 126 deletions(-)

diff --git a/src/Tensor.cpp b/src/Tensor.cpp
index f584c07bd..dc254fe83 100644
--- a/src/Tensor.cpp
+++ b/src/Tensor.cpp
@@ -5,17 +5,20 @@ namespace kp {
 
 Tensor::Tensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
                std::shared_ptr<vk::Device> device,
-               const std::vector<float>& data,
+               void* data,
+               uint32_t elementTotalCount,
+               uint32_t elementMemorySize,
+               const TensorDataTypes& dataType,
                const TensorTypes& tensorType)
 {
     KP_LOG_DEBUG("Kompute Tensor constructor data length: {}, and type: {}",
-                 data.size(),
+                 elementTotalCount,
                  tensorType);
 
     this->mPhysicalDevice = physicalDevice;
     this->mDevice = device;
 
-    this->rebuild(data, tensorType);
+    this->rebuild(data, elementTotalCount, elementMemorySize, dataType, tensorType);
 }
 
 Tensor::~Tensor()
@@ -29,11 +32,17 @@ Tensor::~Tensor()
 }
 
 void
-Tensor::rebuild(const std::vector<float>& data, TensorTypes tensorType)
+Tensor::rebuild(void* data,
+                uint32_t elementTotalCount,
+                uint32_t elementMemorySize,
+                const TensorDataTypes& dataType,
+                TensorTypes tensorType)
 {
-    KP_LOG_DEBUG("Kompute Tensor rebuilding with size {}", data.size());
+    KP_LOG_DEBUG("Kompute Tensor rebuilding with size {}", elementTotalCount);
 
-    this->mData = data;
+    this->mSize = elementTotalCount;
+    this->mElementMemorySize = elementMemorySize;
+    this->mDataType = dataType;
     this->mTensorType = tensorType;
 
     if (this->mPrimaryBuffer || this->mPrimaryMemory) {
@@ -43,30 +52,7 @@ Tensor::rebuild(const std::vector<float>& data, TensorTypes tensorType)
     }
 
     this->allocateMemoryCreateGPUResources();
-}
-
-std::vector<float>&
-Tensor::data()
-{
-    return this->mData;
-}
-
-float&
-Tensor::operator[](int index)
-{
-    return this->mData[index];
-}
-
-uint64_t
-Tensor::memorySize()
-{
-    return this->size() * sizeof(float);
-}
-
-uint32_t
-Tensor::size()
-{
-    return static_cast<uint32_t>(this->mData.size());
+    this->rawMapDataIntoHostMemory(data);
 }
 
 Tensor::TensorTypes
@@ -81,15 +67,6 @@ Tensor::isInit()
     return this->mDevice && this->mPrimaryBuffer && this->mPrimaryMemory;
 }
 
-void
-Tensor::setData(const std::vector<float>& data)
-{
-    if (data.size() != this->mData.size()) {
-        throw std::runtime_error(
-          "Kompute Tensor Cannot set data of different sizes");
-    }
-    this->mData = data;
-}
 
 void
 Tensor::recordCopyFrom(const vk::CommandBuffer& commandBuffer,
@@ -204,55 +181,13 @@ Tensor::constructDescriptorBufferInfo()
 void
 Tensor::mapDataFromHostMemory()
 {
-    KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
-
-    std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
-
-    if (this->mTensorType == TensorTypes::eHost) {
-        hostVisibleMemory = this->mPrimaryMemory;
-    } else if (this->mTensorType == TensorTypes::eDevice) {
-        hostVisibleMemory = this->mStagingMemory;
-    } else {
-        KP_LOG_WARN(
-          "Kompute Tensor mapping data not supported on storage tensor");
-        return;
-    }
-
-    vk::DeviceSize bufferSize = this->memorySize();
-    void* mapped = this->mDevice->mapMemory(
-      *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());
-    vk::MappedMemoryRange mappedMemoryRange(*hostVisibleMemory, 0, bufferSize);
-    this->mDevice->invalidateMappedMemoryRanges(mappedMemoryRange);
-    memcpy(this->mData.data(), mapped, bufferSize);
-    this->mDevice->unmapMemory(*hostVisibleMemory);
+    KP_LOG_DEBUG("Kompute Tensor mapDataFromHostMemory - SKIPPING");
 }
 
 void
 Tensor::mapDataIntoHostMemory()
 {
-
-    KP_LOG_DEBUG("Kompute Tensor local mapping tensor data to host buffer");
-
-    std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
-
-    if (this->mTensorType == TensorTypes::eHost) {
-        hostVisibleMemory = this->mPrimaryMemory;
-    } else if (this->mTensorType == TensorTypes::eDevice) {
-        hostVisibleMemory = this->mStagingMemory;
-    } else {
-        KP_LOG_WARN(
-          "Kompute Tensor mapping data not supported on storage tensor");
-        return;
-    }
-
-    vk::DeviceSize bufferSize = this->memorySize();
-
-    void* mapped = this->mDevice->mapMemory(
-      *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());
-    memcpy(mapped, this->mData.data(), bufferSize);
-    vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize);
-    this->mDevice->flushMappedMemoryRanges(1, &mappedRange);
-    this->mDevice->unmapMemory(*hostVisibleMemory);
+    KP_LOG_DEBUG("Kompute Tensor mapDataIntoHostMemory - SKIPPING");
 }
 
 vk::BufferUsageFlags
diff --git a/src/include/kompute/Tensor.hpp b/src/include/kompute/Tensor.hpp
index 195af44f4..f2583708d 100644
--- a/src/include/kompute/Tensor.hpp
+++ b/src/include/kompute/Tensor.hpp
@@ -27,6 +27,14 @@ class Tensor
         eHost = 1,    ///< Type is host memory, source and destination
         eStorage = 2, ///< Type is Device memory (only)
     };
+    enum class TensorDataTypes
+    {
+        eBool = 0,
+        eInt = 1,
+        eUnsignedInt = 2,
+        eFloat = 3,
+        eDouble = 4,
+    };
 
     /**
      *  Constructor with data provided which would be used to create the
@@ -40,7 +48,10 @@ class Tensor
      */
     Tensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
            std::shared_ptr<vk::Device> device,
-           const std::vector<float>& data,
+           void* data,
+           uint32_t elementTotalCount,
+           uint32_t elementMemorySize,
+           const TensorDataTypes& dataType = TensorDataTypes::eFloat,
            const TensorTypes& tensorType = TensorTypes::eDevice);
 
     /**
@@ -49,6 +60,48 @@ class Tensor
      */
     ~Tensor();
 
+    /**
+     * Returns the size/magnitude of the Tensor, which will be the total number
+     * of elements across all dimensions
+     *
+     * @return Unsigned integer representing the total number of elements
+     */
+    // TODO: move to cpp
+    virtual uint32_t size() {
+        return this->mElementMemorySize;
+    }
+
+    // TODO: move to cpp
+    virtual uint32_t memorySize() {
+        return this->mSize * this->mElementMemorySize;
+    }
+
+    /**
+     * Retrieve the underlying data type of the Tensor
+     *
+     * @return Data type of tensor of type kp::Tensor::TensorDataTypes
+     */
+    virtual TensorDataTypes dataType() {
+        return this->mDataType;
+    }
+
+    /**
+     * Maps data from the Host Visible GPU memory into the data vector. It
+     * requires the Tensor to be of staging type for it to work.
+     */
+    virtual void mapDataFromHostMemory();
+    /**
+     * Maps data from the data vector into the Host Visible GPU memory. It
+     * requires the tensor to be of staging type for it to work.
+     */
+    virtual void mapDataIntoHostMemory();
+
+    // TODO: Decide whether this is one we prefer to have also overriden in the underlying tensorView
+    // TODO: move to cpp
+    void getRawData(void* data) {
+        this->rawMapDataFromHostMemory(data);
+    }
+
     /**
      * Function to trigger reinitialisation of the tensor buffer and memory with
      * new data as well as new potential device type.
@@ -56,7 +109,10 @@ class Tensor
      * @param data Vector of data to use to initialise vector from
      * @param tensorType The type to use for the tensor
      */
-    void rebuild(const std::vector<float>& data,
+    void rebuild(void* data,
+                 uint32_t elementTotalCount,
+                 uint32_t elementMemorySize,
+                 const TensorDataTypes& dataType = TensorDataTypes::eFloat,
                  TensorTypes tensorType = TensorTypes::eDevice);
 
     /**
@@ -71,32 +127,6 @@ class Tensor
      */
     bool isInit();
 
-    /**
-     * Returns the vector of data currently contained by the Tensor. It is
-     * important to ensure that there is no out-of-sync data with the GPU
-     * memory.
-     *
-     * @return Reference to vector of elements representing the data in the
-     * tensor.
-     */
-    std::vector<float>& data();
-    /**
-     * Overrides the subscript operator to expose the underlying data's
-     * subscript operator which in this case would be its underlying
-     * vector's.
-     *
-     * @param i The index where the element will be returned from.
-     * @return Returns the element in the position requested.
-     */
-    float& operator[](int index);
-    /**
-     * Returns the size/magnitude of the Tensor, which will be the total number
-     * of elements across all dimensions
-     *
-     * @return Unsigned integer representing the total number of elements
-     */
-    uint32_t size();
-
     /**
      * Retrieve the tensor type of the Tensor
      *
@@ -108,7 +138,15 @@ class Tensor
      * Sets / resets the vector data of the tensor. This function does not
      * perform any copies into GPU memory and is only performed on the host.
      */
-    void setData(const std::vector<float>& data);
+    void setRawData(void* data, uint32_t elementTotalCount, uint32_t elementMemorySize) {
+        if (elementTotalCount * elementMemorySize != this->memorySize()) {
+            throw std::runtime_error(
+              "Kompute Tensor Cannot set data of different sizes");
+        }
+        this->mSize = elementTotalCount;
+        this->mElementMemorySize = elementMemorySize;
+        this->rawMapDataIntoHostMemory(data);
+    }
 
     /**
      * Records a copy from the memory of the tensor provided to the current
@@ -172,16 +210,6 @@ class Tensor
      * @return Descriptor buffer info with own buffer
      */
     vk::DescriptorBufferInfo constructDescriptorBufferInfo();
-    /**
-     * Maps data from the Host Visible GPU memory into the data vector. It
-     * requires the Tensor to be of staging type for it to work.
-     */
-    void mapDataFromHostMemory();
-    /**
-     * Maps data from the data vector into the Host Visible GPU memory. It
-     * requires the tensor to be of staging type for it to work.
-     */
-    void mapDataIntoHostMemory();
 
   private:
     // -------------- NEVER OWNED RESOURCES
@@ -199,9 +227,10 @@ class Tensor
     bool mFreeStagingMemory = false;
 
     // -------------- ALWAYS OWNED RESOURCES
-    std::vector<float> mData;
-
-    TensorTypes mTensorType = TensorTypes::eDevice;
+    TensorTypes mTensorType;
+    TensorDataTypes mDataType;
+    uint32_t mSize;
+    uint32_t mElementMemorySize;
 
     void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer
     void createBuffer(std::shared_ptr<vk::Buffer> buffer,
@@ -221,7 +250,160 @@ class Tensor
     vk::MemoryPropertyFlags getPrimaryMemoryPropertyFlags();
     vk::BufferUsageFlags getStagingBufferUsageFlags();
     vk::MemoryPropertyFlags getStagingMemoryPropertyFlags();
-    uint64_t memorySize();
+
+    void rawMapDataFromHostMemory(void* data) {
+
+        KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
+
+        std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
+
+        if (this->mTensorType == TensorTypes::eHost) {
+            hostVisibleMemory = this->mPrimaryMemory;
+        } else if (this->mTensorType == TensorTypes::eDevice) {
+            hostVisibleMemory = this->mStagingMemory;
+        } else {
+            KP_LOG_WARN(
+              "Kompute Tensor mapping data not supported on storage tensor");
+            return;
+        }
+
+        vk::DeviceSize bufferSize = this->memorySize();
+        void* mapped = this->mDevice->mapMemory(
+          *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());
+        vk::MappedMemoryRange mappedMemoryRange(*hostVisibleMemory, 0, bufferSize);
+        this->mDevice->invalidateMappedMemoryRanges(mappedMemoryRange);
+        memcpy(data, mapped, bufferSize);
+        this->mDevice->unmapMemory(*hostVisibleMemory);
+    }
+
+    void rawMapDataIntoHostMemory(void* data) {
+        KP_LOG_DEBUG("Kompute Tensor mapping data into host buffer");
+
+        std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
+
+        if (this->mTensorType == TensorTypes::eHost) {
+            hostVisibleMemory = this->mPrimaryMemory;
+        } else if (this->mTensorType == TensorTypes::eDevice) {
+            hostVisibleMemory = this->mStagingMemory;
+        } else {
+            KP_LOG_WARN(
+              "Kompute Tensor mapping data not supported on storage tensor");
+            return;
+        }
+
+        vk::DeviceSize bufferSize = this->memorySize();
+
+        void* mapped = this->mDevice->mapMemory(
+          *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());
+        memcpy(mapped, data, bufferSize);
+        vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize);
+        this->mDevice->flushMappedMemoryRanges(1, &mappedRange);
+        this->mDevice->unmapMemory(*hostVisibleMemory);
+    }
 };
 
+// TODO: Limit T to be only float, bool, double, etc
+template <typename T>
+class TensorView: public Tensor
+{
+  public:
+    TensorView(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
+           std::shared_ptr<vk::Device> device,
+           const std::vector<T>& data,
+           const TensorTypes& tensorType = TensorTypes::eDevice);
+
+    ~TensorView();
+
+    void rebuild(const std::vector<T>& data,
+            TensorTypes tensorType = TensorTypes::eDevice) {
+
+        this->mData = data;
+        Tensor::rebuild(data.data(), data.size(), sizeof(T), this->dataType(), tensorType);
+    }
+
+    std::vector<T>& data() {
+        return this->mData;
+    }
+
+    T& operator[](int index) {
+        return this->mData[index];
+    }
+
+    void setData(const std::vector<T>& data) {
+
+        if (data.size() != this->mData.size()) {
+            throw std::runtime_error(
+              "Kompute TensorView Cannot set data of different sizes");
+        }
+
+        this->mData = data;
+
+        this->setRawData(this->mData.data(), this->mData.size(), sizeof(T), this->dataType());
+    }
+
+    TensorDataTypes dataType() override;
+
+    uint32_t size() override {
+        return this->mData->size();
+    }
+
+    uint32_t memorySize() override {
+        return this->mData->size() * sizeof(T);
+    }
+
+    /**
+     * Maps data from the Host Visible GPU memory into the data vector. It
+     * requires the Tensor to be of staging type for it to work.
+     */
+    void mapDataFromHostMemory() override {
+        KP_LOG_DEBUG("Kompute TensorView mapDataFromHostMemory copying data");
+
+        this->rawMapDataFromHostMemory(this->mData.data());
+    }
+    /**
+     * Maps data from the data vector into the Host Visible GPU memory. It
+     * requires the tensor to be of staging type for it to work.
+     */
+    void mapDataIntoHostMemory() override {
+        KP_LOG_DEBUG("Kompute TensorView mapDataIntoHostMemory copying data");
+
+        this->rawMapDataIntoHostMemory(this->mData.data());
+    }
+
+  private:
+    // -------------- ALWAYS OWNED RESOURCES
+    std::vector<T> mData;
+
+};
+
+template<>
+Tensor::TensorDataTypes
+TensorView<bool>::dataType() {
+    return Tensor::TensorDataTypes::eBool;
+}
+
+template<>
+Tensor::TensorDataTypes
+TensorView<int32_t>::dataType() {
+    return Tensor::TensorDataTypes::eInt;
+}
+
+template<>
+Tensor::TensorDataTypes
+TensorView<uint32_t>::dataType() {
+    return Tensor::TensorDataTypes::eUnsignedInt;
+}
+
+template<>
+Tensor::TensorDataTypes
+TensorView<float>::dataType() {
+    return Tensor::TensorDataTypes::eFloat;
+}
+
+template<>
+Tensor::TensorDataTypes
+TensorView<double>::dataType() {
+    return Tensor::TensorDataTypes::eDouble;
+}
+
 } // End namespace kp

From ad18c2e54698e1496347727ce61d46a8e9562e7b Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sat, 6 Mar 2021 17:25:35 +0000
Subject: [PATCH 04/16] Initial implementation of tensor working compiling

---
 examples/array_multiplication/CMakeLists.txt |   6 +-
 examples/array_multiplication/README.md      |   7 +-
 examples/array_multiplication/src/Main.cpp   |  11 +-
 examples/logistic_regression/CMakeLists.txt  |   8 +-
 examples/logistic_regression/README.md       |   7 +-
 examples/logistic_regression/src/Main.cpp    |  18 +-
 single_include/kompute/Kompute.hpp           | 294 +++++++++++++++----
 src/Manager.cpp                              |  15 -
 src/OpTensorCopy.cpp                         |  17 +-
 src/Tensor.cpp                               |  42 ++-
 src/include/kompute/Manager.hpp              |  19 +-
 src/include/kompute/Tensor.hpp               | 183 ++++++------
 12 files changed, 417 insertions(+), 210 deletions(-)

diff --git a/examples/array_multiplication/CMakeLists.txt b/examples/array_multiplication/CMakeLists.txt
index 0b648382e..bfc4c1c79 100644
--- a/examples/array_multiplication/CMakeLists.txt
+++ b/examples/array_multiplication/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.17.0)
+cmake_minimum_required(VERSION 3.4.1)
 project(kompute_array_mult VERSION 0.1.0)
 
 set(CMAKE_CXX_STANDARD 14)
@@ -23,10 +23,6 @@ endif()
 
 find_package(Vulkan REQUIRED)
 
-if(KOMPUTE_OPT_ENABLE_SPDLOG)
-    find_package(spdlog REQUIRED)
-endif()
-
 add_executable(kompute_array_mult
     src/Main.cpp)
 
diff --git a/examples/array_multiplication/README.md b/examples/array_multiplication/README.md
index 931c7d639..d4082c713 100644
--- a/examples/array_multiplication/README.md
+++ b/examples/array_multiplication/README.md
@@ -15,8 +15,11 @@ This project has the option to either import the Kompute dependency relative to
 To build you just need to run the cmake command in this folder as follows:
 
 ```
-cmake \
-    -Bbuild
+cmake -Bbuild/ \
+          -DCMAKE_BUILD_TYPE=Debug                   \
+          -DKOMPUTE_OPT_INSTALL=0                    \
+          -DKOMPUTE_OPT_REPO_SUBMODULE_BUILD=1       \
+          -DKOMPUTE_OPT_ENABLE_SPDLOG=1
 ```
 
 You can pass the following optional parameters based on your desired configuration:
diff --git a/examples/array_multiplication/src/Main.cpp b/examples/array_multiplication/src/Main.cpp
index acb76898c..dacc67f89 100755
--- a/examples/array_multiplication/src/Main.cpp
+++ b/examples/array_multiplication/src/Main.cpp
@@ -7,16 +7,11 @@
 
 int main()
 {
-#if KOMPUTE_ENABLE_SPDLOG
-    spdlog::set_level(
-      static_cast<spdlog::level::level_enum>(SPDLOG_ACTIVE_LEVEL));
-#endif
-
     kp::Manager mgr;
 
-    auto tensorInA = mgr.tensor({ 2.0, 4.0, 6.0 });
-    auto tensorInB = mgr.tensor({ 0.0, 1.0, 2.0 });
-    auto tensorOut = mgr.tensor({ 0.0, 0.0, 0.0 });
+    auto tensorInA = mgr.tensor<float>({ 2.0, 4.0, 6.0 });
+    auto tensorInB = mgr.tensor<float>({ 0.0, 1.0, 2.0 });
+    auto tensorOut = mgr.tensor<float>({ 0.0, 0.0, 0.0 });
 
     std::string shader(R"(
         // The version to use 
diff --git a/examples/logistic_regression/CMakeLists.txt b/examples/logistic_regression/CMakeLists.txt
index f918bbf21..8c8e0eb8f 100644
--- a/examples/logistic_regression/CMakeLists.txt
+++ b/examples/logistic_regression/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.17.0)
+cmake_minimum_required(VERSION 3.4.1)
 project(kompute_linear_reg VERSION 0.1.0)
 
 set(CMAKE_CXX_STANDARD 14)
@@ -23,10 +23,6 @@ endif()
 
 find_package(Vulkan REQUIRED)
 
-if(KOMPUTE_OPT_ENABLE_SPDLOG)
-    find_package(spdlog REQUIRED)
-endif()
-
 add_executable(kompute_linear_reg
     src/Main.cpp)
 
@@ -39,7 +35,7 @@ include_directories(
         ../../single_include/)
 
 if(KOMPUTE_OPT_ENABLE_SPDLOG)
-    target_link_libraries(kompute_array_mult
+    target_link_libraries(kompute_linear_reg
         spdlog::spdlog)
 endif()
 
diff --git a/examples/logistic_regression/README.md b/examples/logistic_regression/README.md
index 0de7ee30a..342bbfca1 100644
--- a/examples/logistic_regression/README.md
+++ b/examples/logistic_regression/README.md
@@ -15,8 +15,11 @@ This project has the option to either import the Kompute dependency relative to
 To build you just need to run the cmake command in this folder as follows:
 
 ```
-cmake \
-    -Bbuild
+cmake -Bbuild/ \
+          -DCMAKE_BUILD_TYPE=Debug                   \
+          -DKOMPUTE_OPT_INSTALL=0                    \
+          -DKOMPUTE_OPT_REPO_SUBMODULE_BUILD=1       \
+          -DKOMPUTE_OPT_ENABLE_SPDLOG=1
 ```
 
 You can pass the following optional parameters based on your desired configuration:
diff --git a/examples/logistic_regression/src/Main.cpp b/examples/logistic_regression/src/Main.cpp
index c435575e2..3b6ec11e1 100755
--- a/examples/logistic_regression/src/Main.cpp
+++ b/examples/logistic_regression/src/Main.cpp
@@ -17,19 +17,19 @@ int main()
 
     kp::Manager mgr;
 
-    std::shared_ptr<kp::Tensor> xI = mgr.tensor({ 0, 1, 1, 1, 1 });
-    std::shared_ptr<kp::Tensor> xJ = mgr.tensor({ 0, 0, 0, 1, 1 });
+    auto xI = mgr.tensor<float>({ 0, 1, 1, 1, 1 });
+    auto xJ = mgr.tensor<float>({ 0, 0, 0, 1, 1 });
 
-    std::shared_ptr<kp::Tensor> y = mgr.tensor({ 0, 0, 0, 1, 1 });
+    auto y = mgr.tensor<float>({ 0, 0, 0, 1, 1 });
 
-    std::shared_ptr<kp::Tensor> wIn = mgr.tensor({ 0.001, 0.001 });
-    std::shared_ptr<kp::Tensor> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
-    std::shared_ptr<kp::Tensor> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });
+    auto wIn = mgr.tensor<float>({ 0.001, 0.001 });
+    auto wOutI = mgr.tensor<float>({ 0, 0, 0, 0, 0 });
+    auto wOutJ = mgr.tensor<float>({ 0, 0, 0, 0, 0 });
 
-    std::shared_ptr<kp::Tensor> bIn = mgr.tensor({ 0 });
-    std::shared_ptr<kp::Tensor> bOut = mgr.tensor({ 0, 0, 0, 0, 0 });
+    auto bIn = mgr.tensor<float>({ 0 });
+    auto bOut = mgr.tensor<float>({ 0, 0, 0, 0, 0 });
 
-    std::shared_ptr<kp::Tensor> lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
+    auto lOut = mgr.tensor<float>({ 0, 0, 0, 0, 0 });
 
     std::vector<std::shared_ptr<kp::Tensor>> params = { xI,  xJ,    y,
                                                         wIn, wOutI, wOutJ,
diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp
index 593390dbe..41e9434f8 100755
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@@ -762,7 +762,7 @@ class Shader
      * GLSL compiler
      * @return The compiled SPIR-V binary in unsigned int32 format
      */
-    static std::vector<uint32_t> compileSources(
+    static std::vector<uint32_t> compile_sources(
       const std::vector<std::string>& sources,
       const std::vector<std::string>& files = {},
       const std::string& entryPoint = "main",
@@ -783,7 +783,7 @@ class Shader
      * GLSL compiler
      * @return The compiled SPIR-V binary in unsigned int32 format
      */
-    static std::vector<uint32_t> compileSource(
+    static std::vector<uint32_t> compile_source(
       const std::string& source,
       const std::string& entryPoint = "main",
       std::vector<std::pair<std::string, std::string>> definitions = {},
@@ -818,6 +818,14 @@ class Tensor
         eHost = 1,    ///< Type is host memory, source and destination
         eStorage = 2, ///< Type is Device memory (only)
     };
+    enum class TensorDataTypes
+    {
+        eBool = 0,
+        eInt = 1,
+        eUnsignedInt = 2,
+        eFloat = 3,
+        eDouble = 4,
+    };
 
     /**
      *  Constructor with data provided which would be used to create the
@@ -831,14 +839,78 @@ class Tensor
      */
     Tensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
            std::shared_ptr<vk::Device> device,
-           const std::vector<float>& data,
+           void* data,
+           uint32_t elementTotalCount,
+           uint32_t elementMemorySize,
+           const TensorDataTypes& dataType,
            const TensorTypes& tensorType = TensorTypes::eDevice);
 
     /**
      * Destructor which is in charge of freeing vulkan resources unless they
      * have been provided externally.
      */
-    ~Tensor();
+    virtual ~Tensor();
+
+    /**
+     * Returns the size/magnitude of the Tensor, which will be the total number
+     * of elements across all dimensions
+     *
+     * @return Unsigned integer representing the total number of elements
+     */
+    // TODO: move to cpp
+    virtual uint32_t size() {
+        return this->mSize;
+    }
+
+    // TODO: move to cpp
+    virtual uint32_t dataTypeMemorySize() {
+        return this->mDataTypeMemorySize;
+    }
+
+    // TODO: move to cpp
+    virtual uint32_t memorySize() {
+        return this->mSize * this->mDataTypeMemorySize;
+    }
+
+    /**
+     * Retrieve the underlying data type of the Tensor
+     *
+     * @return Data type of tensor of type kp::Tensor::TensorDataTypes
+     */
+    virtual TensorDataTypes dataType() {
+        return this->mDataType;
+    }
+
+    /**
+     * Maps data from the Host Visible GPU memory into the data vector. It
+     * requires the Tensor to be of staging type for it to work.
+     */
+    virtual void mapDataFromHostMemory();
+    /**
+     * Maps data from the data vector into the Host Visible GPU memory. It
+     * requires the tensor to be of staging type for it to work.
+     */
+    virtual void mapDataIntoHostMemory();
+
+    // TODO: Decide whether this is one we prefer to have also overriden in the underlying tensorView
+    // TODO: move to cpp
+    virtual void getRawData(void* data) {
+        this->rawMapDataFromHostMemory(data);
+    }
+
+    /**
+     * Sets / resets the vector data of the tensor. This function does not
+     * perform any copies into GPU memory and is only performed on the host.
+     */
+    virtual void setRawData(void* data, uint32_t elementTotalCount, uint32_t elementMemorySize) {
+        if (elementTotalCount * elementMemorySize != this->memorySize()) {
+            throw std::runtime_error(
+              "Kompute Tensor Cannot set data of different sizes");
+        }
+        this->mSize = elementTotalCount;
+        this->mDataTypeMemorySize = elementMemorySize;
+        this->rawMapDataIntoHostMemory(data);
+    }
 
     /**
      * Function to trigger reinitialisation of the tensor buffer and memory with
@@ -847,8 +919,9 @@ class Tensor
      * @param data Vector of data to use to initialise vector from
      * @param tensorType The type to use for the tensor
      */
-    void rebuild(const std::vector<float>& data,
-                 TensorTypes tensorType = TensorTypes::eDevice);
+    void rebuild(void* data,
+                 uint32_t elementTotalCount,
+                 uint32_t elementMemorySize);
 
     /**
      * Destroys and frees the GPU resources which include the buffer and memory.
@@ -862,32 +935,6 @@ class Tensor
      */
     bool isInit();
 
-    /**
-     * Returns the vector of data currently contained by the Tensor. It is
-     * important to ensure that there is no out-of-sync data with the GPU
-     * memory.
-     *
-     * @return Reference to vector of elements representing the data in the
-     * tensor.
-     */
-    std::vector<float>& data();
-    /**
-     * Overrides the subscript operator to expose the underlying data's
-     * subscript operator which in this case would be its underlying
-     * vector's.
-     *
-     * @param i The index where the element will be returned from.
-     * @return Returns the element in the position requested.
-     */
-    float& operator[](int index);
-    /**
-     * Returns the size/magnitude of the Tensor, which will be the total number
-     * of elements across all dimensions
-     *
-     * @return Unsigned integer representing the total number of elements
-     */
-    uint32_t size();
-
     /**
      * Retrieve the tensor type of the Tensor
      *
@@ -895,12 +942,6 @@ class Tensor
      */
     TensorTypes tensorType();
 
-    /**
-     * Sets / resets the vector data of the tensor. This function does not
-     * perform any copies into GPU memory and is only performed on the host.
-     */
-    void setData(const std::vector<float>& data);
-
     /**
      * Records a copy from the memory of the tensor provided to the current
      * thensor. This is intended to pass memory into a processing, to perform
@@ -963,17 +1004,57 @@ class Tensor
      * @return Descriptor buffer info with own buffer
      */
     vk::DescriptorBufferInfo constructDescriptorBufferInfo();
-    /**
-     * Maps data from the Host Visible GPU memory into the data vector. It
-     * requires the Tensor to be of staging type for it to work.
-     */
-    void mapDataFromHostMemory();
-    /**
-     * Maps data from the data vector into the Host Visible GPU memory. It
-     * requires the tensor to be of staging type for it to work.
-     */
-    void mapDataIntoHostMemory();
 
+  protected:
+    void rawMapDataFromHostMemory(void* data) {
+
+        KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
+
+        std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
+
+        if (this->mTensorType == TensorTypes::eHost) {
+            hostVisibleMemory = this->mPrimaryMemory;
+        } else if (this->mTensorType == TensorTypes::eDevice) {
+            hostVisibleMemory = this->mStagingMemory;
+        } else {
+            KP_LOG_WARN(
+              "Kompute Tensor mapping data not supported on storage tensor");
+            return;
+        }
+
+        vk::DeviceSize bufferSize = this->memorySize();
+        void* mapped = this->mDevice->mapMemory(
+          *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());
+        vk::MappedMemoryRange mappedMemoryRange(*hostVisibleMemory, 0, bufferSize);
+        this->mDevice->invalidateMappedMemoryRanges(mappedMemoryRange);
+        memcpy(data, mapped, bufferSize);
+        this->mDevice->unmapMemory(*hostVisibleMemory);
+    }
+
+    void rawMapDataIntoHostMemory(void* data) {
+        KP_LOG_DEBUG("Kompute Tensor mapping data into host buffer");
+
+        std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
+
+        if (this->mTensorType == TensorTypes::eHost) {
+            hostVisibleMemory = this->mPrimaryMemory;
+        } else if (this->mTensorType == TensorTypes::eDevice) {
+            hostVisibleMemory = this->mStagingMemory;
+        } else {
+            KP_LOG_WARN(
+              "Kompute Tensor mapping data not supported on storage tensor");
+            return;
+        }
+
+        vk::DeviceSize bufferSize = this->memorySize();
+
+        void* mapped = this->mDevice->mapMemory(
+          *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());
+        memcpy(mapped, data, bufferSize);
+        vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize);
+        this->mDevice->flushMappedMemoryRanges(1, &mappedRange);
+        this->mDevice->unmapMemory(*hostVisibleMemory);
+    }
   private:
     // -------------- NEVER OWNED RESOURCES
     std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
@@ -990,9 +1071,10 @@ class Tensor
     bool mFreeStagingMemory = false;
 
     // -------------- ALWAYS OWNED RESOURCES
-    std::vector<float> mData;
-
-    TensorTypes mTensorType = TensorTypes::eDevice;
+    TensorTypes mTensorType;
+    TensorDataTypes mDataType;
+    uint32_t mSize;
+    uint32_t mDataTypeMemorySize;
 
     void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer
     void createBuffer(std::shared_ptr<vk::Buffer> buffer,
@@ -1012,9 +1094,98 @@ class Tensor
     vk::MemoryPropertyFlags getPrimaryMemoryPropertyFlags();
     vk::BufferUsageFlags getStagingBufferUsageFlags();
     vk::MemoryPropertyFlags getStagingMemoryPropertyFlags();
-    uint64_t memorySize();
+
 };
 
+// TODO: Limit T to be only float, bool, double, etc
+template <typename T>
+class TensorView: public Tensor
+{
+  public:
+    TensorView(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
+           std::shared_ptr<vk::Device> device,
+           const std::vector<T>& data,
+           const TensorTypes& tensorType = TensorTypes::eDevice)
+        : Tensor(physicalDevice, device, (void*)data.data(), data.size(), sizeof(T), this->dataType())
+    {
+
+    }
+
+    ~TensorView() {
+
+    }
+
+    void rebuild(const std::vector<T>& data,
+            TensorTypes tensorType = TensorTypes::eDevice) {
+
+        this->mData = data;
+        Tensor::rebuild(data.data(), data.size(), sizeof(T));
+    }
+
+    std::vector<T>& data() {
+        return this->mData;
+    }
+
+    T& operator[](int index) {
+        return this->mData[index];
+    }
+
+    void setData(const std::vector<T>& data) {
+
+        if (data.size() != this->mData.size()) {
+            throw std::runtime_error(
+              "Kompute TensorView Cannot set data of different sizes");
+        }
+
+        this->mData = data;
+
+        Tensor::setRawData(this->mData.data(), this->mData.size(), sizeof(T));
+    }
+
+    void setRawData(void* data, uint32_t elementTotalCount, uint32_t elementMemorySize) override 
+    {
+        assert(elementMemorySize == sizeof(T));
+
+        this->mData = { (T*)data, ((T*)data) + elementTotalCount };
+        Tensor::setRawData(this->mData.data(), this->mData.size(), sizeof(T));
+    }
+
+    TensorDataTypes dataType() override;
+
+    uint32_t size() override {
+        return this->mData.size();
+    }
+
+    uint32_t memorySize() override {
+        return this->mData.size() * sizeof(T);
+    }
+
+    /**
+     * Maps data from the Host Visible GPU memory into the data vector. It
+     * requires the Tensor to be of staging type for it to work.
+     */
+    void mapDataFromHostMemory() override {
+        KP_LOG_DEBUG("Kompute TensorView mapDataFromHostMemory copying data");
+
+        this->rawMapDataFromHostMemory(this->mData.data());
+    }
+    /**
+     * Maps data from the data vector into the Host Visible GPU memory. It
+     * requires the tensor to be of staging type for it to work.
+     */
+    void mapDataIntoHostMemory() override {
+        KP_LOG_DEBUG("Kompute TensorView mapDataIntoHostMemory copying data");
+
+        this->rawMapDataIntoHostMemory(this->mData.data());
+    }
+
+  private:
+    // -------------- ALWAYS OWNED RESOURCES
+    std::vector<T> mData;
+
+};
+
+
 } // End namespace kp
 
 namespace kp {
@@ -1883,9 +2054,22 @@ class Manager
      * @param tensorType The type of tensor to initialize
      * @returns Shared pointer with initialised tensor
      */
-    std::shared_ptr<Tensor> tensor(
-      const std::vector<float>& data,
-      Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice);
+    template <typename T>
+    std::shared_ptr<TensorView<T>> tensor(
+      const std::vector<T>& data,
+      Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
+    {
+        KP_LOG_DEBUG("Kompute Manager tensor creation triggered");
+
+        std::shared_ptr<TensorView<T>> tensor{ new kp::TensorView<T>(
+          this->mPhysicalDevice, this->mDevice, data, tensorType) };
+
+        if (this->mManageResources) {
+            this->mManagedTensors.push_back(tensor);
+        }
+
+        return tensor;
+    }
 
     /**
      * Create a managed algorithm that will be destroyed by this manager
diff --git a/src/Manager.cpp b/src/Manager.cpp
index e3bdbb2d9..5d6bf4cd4 100644
--- a/src/Manager.cpp
+++ b/src/Manager.cpp
@@ -395,21 +395,6 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices,
     KP_LOG_DEBUG("Kompute Manager compute queue obtained");
 }
 
-std::shared_ptr<Tensor>
-Manager::tensor(const std::vector<float>& data, Tensor::TensorTypes tensorType)
-{
-    KP_LOG_DEBUG("Kompute Manager tensor creation triggered");
-
-    std::shared_ptr<Tensor> tensor{ new kp::Tensor(
-      this->mPhysicalDevice, this->mDevice, data, tensorType) };
-
-    if (this->mManageResources) {
-        this->mManagedTensors.push_back(tensor);
-    }
-
-    return tensor;
-}
-
 std::shared_ptr<Algorithm>
 Manager::algorithm(const std::vector<std::shared_ptr<Tensor>>& tensors,
                    const std::vector<uint32_t>& spirv,
diff --git a/src/OpTensorCopy.cpp b/src/OpTensorCopy.cpp
index 6950a4cd2..16e3017e9 100644
--- a/src/OpTensorCopy.cpp
+++ b/src/OpTensorCopy.cpp
@@ -13,6 +13,14 @@ OpTensorCopy::OpTensorCopy(const std::vector<std::shared_ptr<Tensor>>& tensors)
         throw std::runtime_error(
           "Kompute OpTensorCopy called with less than 2 tensor");
     }
+
+    kp::Tensor::TensorDataTypes dataType = this->mTensors[0]->dataType();
+    for (const std::shared_ptr<Tensor>& tensor : tensors) {
+        if (tensor->dataType() != dataType) {
+            throw std::runtime_error(fmt::format("Attempting to copy tensors of different types from {} to {}",
+                        dataType, tensor->dataType()));
+        }
+    }
 }
 
 OpTensorCopy::~OpTensorCopy()
@@ -43,9 +51,16 @@ OpTensorCopy::postEval(const vk::CommandBuffer& commandBuffer)
 {
     KP_LOG_DEBUG("Kompute OpTensorCopy postEval called");
 
+    // TODO: Simplify with a copyRawData
+    uint32_t size = this->mTensors[0]->size();
+    uint32_t dataTypeMemSize = this->mTensors[0]->dataTypeMemorySize();
+    uint32_t memSize = size * dataTypeMemSize;
+    void* data = operator new(memSize);
+    this->mTensors[0]->getRawData(data);
+
     // Copy the data from the first tensor into all the tensors
     for (size_t i = 1; i < this->mTensors.size(); i++) {
-        this->mTensors[i]->setData(this->mTensors[0]->data());
+        this->mTensors[i]->setRawData(data, size, dataTypeMemSize);
     }
 }
 
diff --git a/src/Tensor.cpp b/src/Tensor.cpp
index dc254fe83..4f188d5af 100644
--- a/src/Tensor.cpp
+++ b/src/Tensor.cpp
@@ -17,8 +17,10 @@ Tensor::Tensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
 
     this->mPhysicalDevice = physicalDevice;
     this->mDevice = device;
+    this->mDataType = dataType;
+    this->mTensorType = tensorType;
 
-    this->rebuild(data, elementTotalCount, elementMemorySize, dataType, tensorType);
+    this->rebuild(data, elementTotalCount, elementMemorySize);
 }
 
 Tensor::~Tensor()
@@ -34,16 +36,12 @@ Tensor::~Tensor()
 void
 Tensor::rebuild(void* data,
                 uint32_t elementTotalCount,
-                uint32_t elementMemorySize,
-                const TensorDataTypes& dataType,
-                TensorTypes tensorType)
+                uint32_t elementMemorySize)
 {
     KP_LOG_DEBUG("Kompute Tensor rebuilding with size {}", elementTotalCount);
 
     this->mSize = elementTotalCount;
-    this->mElementMemorySize = elementMemorySize;
-    this->mDataType = dataType;
-    this->mTensorType = tensorType;
+    this->mDataTypeMemorySize = elementMemorySize;
 
     if (this->mPrimaryBuffer || this->mPrimaryMemory) {
         KP_LOG_DEBUG(
@@ -439,4 +437,34 @@ Tensor::destroy()
     KP_LOG_DEBUG("Kompute Tensor successful destroy()");
 }
 
+template<>
+Tensor::TensorDataTypes
+TensorView<bool>::dataType() {
+    return Tensor::TensorDataTypes::eBool;
+}
+
+template<>
+Tensor::TensorDataTypes
+TensorView<int32_t>::dataType() {
+    return Tensor::TensorDataTypes::eInt;
+}
+
+template<>
+Tensor::TensorDataTypes
+TensorView<uint32_t>::dataType() {
+    return Tensor::TensorDataTypes::eUnsignedInt;
+}
+
+template<>
+Tensor::TensorDataTypes
+TensorView<float>::dataType() {
+    return Tensor::TensorDataTypes::eFloat;
+}
+
+template<>
+Tensor::TensorDataTypes
+TensorView<double>::dataType() {
+    return Tensor::TensorDataTypes::eDouble;
+}
+
 }
diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp
index d9c6ddf3e..d27bccacc 100644
--- a/src/include/kompute/Manager.hpp
+++ b/src/include/kompute/Manager.hpp
@@ -74,9 +74,22 @@ class Manager
      * @param tensorType The type of tensor to initialize
      * @returns Shared pointer with initialised tensor
      */
-    std::shared_ptr<Tensor> tensor(
-      const std::vector<float>& data,
-      Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice);
+    template <typename T>
+    std::shared_ptr<TensorView<T>> tensor(
+      const std::vector<T>& data,
+      Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
+    {
+        KP_LOG_DEBUG("Kompute Manager tensor creation triggered");
+
+        std::shared_ptr<TensorView<T>> tensor{ new kp::TensorView<T>(
+          this->mPhysicalDevice, this->mDevice, data, tensorType) };
+
+        if (this->mManageResources) {
+            this->mManagedTensors.push_back(tensor);
+        }
+
+        return tensor;
+    }
 
     /**
      * Create a managed algorithm that will be destroyed by this manager
diff --git a/src/include/kompute/Tensor.hpp b/src/include/kompute/Tensor.hpp
index f2583708d..03e52d43d 100644
--- a/src/include/kompute/Tensor.hpp
+++ b/src/include/kompute/Tensor.hpp
@@ -51,14 +51,14 @@ class Tensor
            void* data,
            uint32_t elementTotalCount,
            uint32_t elementMemorySize,
-           const TensorDataTypes& dataType = TensorDataTypes::eFloat,
+           const TensorDataTypes& dataType,
            const TensorTypes& tensorType = TensorTypes::eDevice);
 
     /**
      * Destructor which is in charge of freeing vulkan resources unless they
      * have been provided externally.
      */
-    ~Tensor();
+    virtual ~Tensor();
 
     /**
      * Returns the size/magnitude of the Tensor, which will be the total number
@@ -68,12 +68,17 @@ class Tensor
      */
     // TODO: move to cpp
     virtual uint32_t size() {
-        return this->mElementMemorySize;
+        return this->mSize;
+    }
+
+    // TODO: move to cpp
+    virtual uint32_t dataTypeMemorySize() {
+        return this->mDataTypeMemorySize;
     }
 
     // TODO: move to cpp
     virtual uint32_t memorySize() {
-        return this->mSize * this->mElementMemorySize;
+        return this->mSize * this->mDataTypeMemorySize;
     }
 
     /**
@@ -98,10 +103,24 @@ class Tensor
 
     // TODO: Decide whether this is one we prefer to have also overriden in the underlying tensorView
     // TODO: move to cpp
-    void getRawData(void* data) {
+    virtual void getRawData(void* data) {
         this->rawMapDataFromHostMemory(data);
     }
 
+    /**
+     * Sets / resets the vector data of the tensor. This function does not
+     * perform any copies into GPU memory and is only performed on the host.
+     */
+    virtual void setRawData(void* data, uint32_t elementTotalCount, uint32_t elementMemorySize) {
+        if (elementTotalCount * elementMemorySize != this->memorySize()) {
+            throw std::runtime_error(
+              "Kompute Tensor Cannot set data of different sizes");
+        }
+        this->mSize = elementTotalCount;
+        this->mDataTypeMemorySize = elementMemorySize;
+        this->rawMapDataIntoHostMemory(data);
+    }
+
     /**
      * Function to trigger reinitialisation of the tensor buffer and memory with
      * new data as well as new potential device type.
@@ -111,9 +130,7 @@ class Tensor
      */
     void rebuild(void* data,
                  uint32_t elementTotalCount,
-                 uint32_t elementMemorySize,
-                 const TensorDataTypes& dataType = TensorDataTypes::eFloat,
-                 TensorTypes tensorType = TensorTypes::eDevice);
+                 uint32_t elementMemorySize);
 
     /**
      * Destroys and frees the GPU resources which include the buffer and memory.
@@ -134,19 +151,6 @@ class Tensor
      */
     TensorTypes tensorType();
 
-    /**
-     * Sets / resets the vector data of the tensor. This function does not
-     * perform any copies into GPU memory and is only performed on the host.
-     */
-    void setRawData(void* data, uint32_t elementTotalCount, uint32_t elementMemorySize) {
-        if (elementTotalCount * elementMemorySize != this->memorySize()) {
-            throw std::runtime_error(
-              "Kompute Tensor Cannot set data of different sizes");
-        }
-        this->mSize = elementTotalCount;
-        this->mElementMemorySize = elementMemorySize;
-        this->rawMapDataIntoHostMemory(data);
-    }
 
     /**
      * Records a copy from the memory of the tensor provided to the current
@@ -211,46 +215,7 @@ class Tensor
      */
     vk::DescriptorBufferInfo constructDescriptorBufferInfo();
 
-  private:
-    // -------------- NEVER OWNED RESOURCES
-    std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
-    std::shared_ptr<vk::Device> mDevice;
-
-    // -------------- OPTIONALLY OWNED RESOURCES
-    std::shared_ptr<vk::Buffer> mPrimaryBuffer;
-    bool mFreePrimaryBuffer = false;
-    std::shared_ptr<vk::Buffer> mStagingBuffer;
-    bool mFreeStagingBuffer = false;
-    std::shared_ptr<vk::DeviceMemory> mPrimaryMemory;
-    bool mFreePrimaryMemory = false;
-    std::shared_ptr<vk::DeviceMemory> mStagingMemory;
-    bool mFreeStagingMemory = false;
-
-    // -------------- ALWAYS OWNED RESOURCES
-    TensorTypes mTensorType;
-    TensorDataTypes mDataType;
-    uint32_t mSize;
-    uint32_t mElementMemorySize;
-
-    void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer
-    void createBuffer(std::shared_ptr<vk::Buffer> buffer,
-                      vk::BufferUsageFlags bufferUsageFlags);
-    void allocateBindMemory(std::shared_ptr<vk::Buffer> buffer,
-                            std::shared_ptr<vk::DeviceMemory> memory,
-                            vk::MemoryPropertyFlags memoryPropertyFlags);
-    void recordCopyBuffer(const vk::CommandBuffer& commandBuffer,
-                          std::shared_ptr<vk::Buffer> bufferFrom,
-                          std::shared_ptr<vk::Buffer> bufferTo,
-                          vk::DeviceSize bufferSize,
-                          vk::BufferCopy copyRegion,
-                          bool createBarrier);
-
-    // Private util functions
-    vk::BufferUsageFlags getPrimaryBufferUsageFlags();
-    vk::MemoryPropertyFlags getPrimaryMemoryPropertyFlags();
-    vk::BufferUsageFlags getStagingBufferUsageFlags();
-    vk::MemoryPropertyFlags getStagingMemoryPropertyFlags();
-
+  protected:
     void rawMapDataFromHostMemory(void* data) {
 
         KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
@@ -300,6 +265,46 @@ class Tensor
         this->mDevice->flushMappedMemoryRanges(1, &mappedRange);
         this->mDevice->unmapMemory(*hostVisibleMemory);
     }
+  private:
+    // -------------- NEVER OWNED RESOURCES
+    std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
+    std::shared_ptr<vk::Device> mDevice;
+
+    // -------------- OPTIONALLY OWNED RESOURCES
+    std::shared_ptr<vk::Buffer> mPrimaryBuffer;
+    bool mFreePrimaryBuffer = false;
+    std::shared_ptr<vk::Buffer> mStagingBuffer;
+    bool mFreeStagingBuffer = false;
+    std::shared_ptr<vk::DeviceMemory> mPrimaryMemory;
+    bool mFreePrimaryMemory = false;
+    std::shared_ptr<vk::DeviceMemory> mStagingMemory;
+    bool mFreeStagingMemory = false;
+
+    // -------------- ALWAYS OWNED RESOURCES
+    TensorTypes mTensorType;
+    TensorDataTypes mDataType;
+    uint32_t mSize;
+    uint32_t mDataTypeMemorySize;
+
+    void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer
+    void createBuffer(std::shared_ptr<vk::Buffer> buffer,
+                      vk::BufferUsageFlags bufferUsageFlags);
+    void allocateBindMemory(std::shared_ptr<vk::Buffer> buffer,
+                            std::shared_ptr<vk::DeviceMemory> memory,
+                            vk::MemoryPropertyFlags memoryPropertyFlags);
+    void recordCopyBuffer(const vk::CommandBuffer& commandBuffer,
+                          std::shared_ptr<vk::Buffer> bufferFrom,
+                          std::shared_ptr<vk::Buffer> bufferTo,
+                          vk::DeviceSize bufferSize,
+                          vk::BufferCopy copyRegion,
+                          bool createBarrier);
+
+    // Private util functions
+    vk::BufferUsageFlags getPrimaryBufferUsageFlags();
+    vk::MemoryPropertyFlags getPrimaryMemoryPropertyFlags();
+    vk::BufferUsageFlags getStagingBufferUsageFlags();
+    vk::MemoryPropertyFlags getStagingMemoryPropertyFlags();
+
 };
 
 // TODO: Limit T to be only float, bool, double, etc
@@ -310,15 +315,21 @@ class TensorView: public Tensor
     TensorView(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
            std::shared_ptr<vk::Device> device,
            const std::vector<T>& data,
-           const TensorTypes& tensorType = TensorTypes::eDevice);
+           const TensorTypes& tensorType = TensorTypes::eDevice)
+        : Tensor(physicalDevice, device, (void*)data.data(), data.size(), sizeof(T), this->dataType())
+    {
 
-    ~TensorView();
+    }
+
+    ~TensorView() {
+
+    }
 
     void rebuild(const std::vector<T>& data,
             TensorTypes tensorType = TensorTypes::eDevice) {
 
         this->mData = data;
-        Tensor::rebuild(data.data(), data.size(), sizeof(T), this->dataType(), tensorType);
+        Tensor::rebuild(data.data(), data.size(), sizeof(T));
     }
 
     std::vector<T>& data() {
@@ -338,17 +349,25 @@ class TensorView: public Tensor
 
         this->mData = data;
 
-        this->setRawData(this->mData.data(), this->mData.size(), sizeof(T), this->dataType());
+        Tensor::setRawData(this->mData.data(), this->mData.size(), sizeof(T));
+    }
+
+    void setRawData(void* data, uint32_t elementTotalCount, uint32_t elementMemorySize) override 
+    {
+        assert(elementMemorySize == sizeof(T));
+
+        this->mData = { (T*)data, ((T*)data) + elementTotalCount };
+        Tensor::setRawData(this->mData.data(), this->mData.size(), sizeof(T));
     }
 
     TensorDataTypes dataType() override;
 
     uint32_t size() override {
-        return this->mData->size();
+        return this->mData.size();
     }
 
     uint32_t memorySize() override {
-        return this->mData->size() * sizeof(T);
+        return this->mData.size() * sizeof(T);
     }
 
     /**
@@ -376,34 +395,4 @@ class TensorView: public Tensor
 
 };
 
-template<>
-Tensor::TensorDataTypes
-TensorView<bool>::dataType() {
-    return Tensor::TensorDataTypes::eBool;
-}
-
-template<>
-Tensor::TensorDataTypes
-TensorView<int32_t>::dataType() {
-    return Tensor::TensorDataTypes::eInt;
-}
-
-template<>
-Tensor::TensorDataTypes
-TensorView<uint32_t>::dataType() {
-    return Tensor::TensorDataTypes::eUnsignedInt;
-}
-
-template<>
-Tensor::TensorDataTypes
-TensorView<float>::dataType() {
-    return Tensor::TensorDataTypes::eFloat;
-}
-
-template<>
-Tensor::TensorDataTypes
-TensorView<double>::dataType() {
-    return Tensor::TensorDataTypes::eDouble;
-}
-
 } // End namespace kp

From 956883e0cdee22541b284892ff3f53efcf562cbf Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sat, 6 Mar 2021 17:44:17 +0000
Subject: [PATCH 05/16] Working iteration of kompute tensor with multiplee
 types

---
 examples/array_multiplication/src/Main.cpp |  8 +++++++-
 single_include/kompute/Kompute.hpp         | 15 +++++++++++----
 src/Tensor.cpp                             |  1 +
 src/include/kompute/Tensor.hpp             | 14 +++++++++++---
 4 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/examples/array_multiplication/src/Main.cpp b/examples/array_multiplication/src/Main.cpp
index dacc67f89..812a5039f 100755
--- a/examples/array_multiplication/src/Main.cpp
+++ b/examples/array_multiplication/src/Main.cpp
@@ -7,6 +7,11 @@
 
 int main()
 {
+#if KOMPUTE_ENABLE_SPDLOG
+    spdlog::set_level(
+      static_cast<spdlog::level::level_enum>(SPDLOG_ACTIVE_LEVEL));
+#endif
+
     kp::Manager mgr;
 
     auto tensorInA = mgr.tensor<float>({ 2.0, 4.0, 6.0 });
@@ -39,7 +44,8 @@ int main()
     mgr.sequence()
         ->record<kp::OpTensorSyncDevice>(params)
         ->record<kp::OpAlgoDispatch>(algo)
-        ->record<kp::OpTensorSyncLocal>(params);
+        ->record<kp::OpTensorSyncLocal>(params)
+        ->eval();
 
     // prints "Output {  0  4  12  }"
     std::cout<< "Output: {  ";
diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp
index 41e9434f8..989f58c20 100755
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@@ -1108,16 +1108,17 @@ class TensorView: public Tensor
            const TensorTypes& tensorType = TensorTypes::eDevice)
         : Tensor(physicalDevice, device, (void*)data.data(), data.size(), sizeof(T), this->dataType())
     {
-
+        KP_LOG_DEBUG("Kompute TensorView constructor with data size {}", data.size());
+        this->mData = data;
     }
 
     ~TensorView() {
-
+        KP_LOG_DEBUG("Kompute TensorView destructor");
     }
 
     void rebuild(const std::vector<T>& data,
             TensorTypes tensorType = TensorTypes::eDevice) {
-
+        KP_LOG_DEBUG("Kompute TensorView creating with data size {}", data.size());
         this->mData = data;
         Tensor::rebuild(data.data(), data.size(), sizeof(T));
     }
@@ -1131,6 +1132,7 @@ class TensorView: public Tensor
     }
 
     void setData(const std::vector<T>& data) {
+        KP_LOG_DEBUG("Kompute TensorView setting data with data size {}", data.size());
 
         if (data.size() != this->mData.size()) {
             throw std::runtime_error(
@@ -1144,6 +1146,8 @@ class TensorView: public Tensor
 
     void setRawData(void* data, uint32_t elementTotalCount, uint32_t elementMemorySize) override 
     {
+        KP_LOG_DEBUG("Kompute TensorView setRawData with data size {}", elementTotalCount);
+
         assert(elementMemorySize == sizeof(T));
 
         this->mData = { (T*)data, ((T*)data) + elementTotalCount };
@@ -1153,10 +1157,14 @@ class TensorView: public Tensor
     TensorDataTypes dataType() override;
 
     uint32_t size() override {
+        KP_LOG_DEBUG("Kompute TensorView retrieving size: {}", this->mData.size());
+
         return this->mData.size();
     }
 
     uint32_t memorySize() override {
+        KP_LOG_DEBUG("Kompute TensorView retrieving memory size: {}", this->mData.size() * sizeof(T));
+
         return this->mData.size() * sizeof(T);
     }
 
@@ -1185,7 +1193,6 @@ class TensorView: public Tensor
 
 };
 
-
 } // End namespace kp
 
 namespace kp {
diff --git a/src/Tensor.cpp b/src/Tensor.cpp
index 4f188d5af..d3225987e 100644
--- a/src/Tensor.cpp
+++ b/src/Tensor.cpp
@@ -170,6 +170,7 @@ Tensor::recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
 vk::DescriptorBufferInfo
 Tensor::constructDescriptorBufferInfo()
 {
+    KP_LOG_WARN("Kompute Tensor construct descriptor buffer info size {}", this->memorySize());
     vk::DeviceSize bufferSize = this->memorySize();
     return vk::DescriptorBufferInfo(*this->mPrimaryBuffer,
                                     0, // offset
diff --git a/src/include/kompute/Tensor.hpp b/src/include/kompute/Tensor.hpp
index 03e52d43d..6af4682d6 100644
--- a/src/include/kompute/Tensor.hpp
+++ b/src/include/kompute/Tensor.hpp
@@ -318,16 +318,17 @@ class TensorView: public Tensor
            const TensorTypes& tensorType = TensorTypes::eDevice)
         : Tensor(physicalDevice, device, (void*)data.data(), data.size(), sizeof(T), this->dataType())
     {
-
+        KP_LOG_DEBUG("Kompute TensorView constructor with data size {}", data.size());
+        this->mData = data;
     }
 
     ~TensorView() {
-
+        KP_LOG_DEBUG("Kompute TensorView destructor");
     }
 
     void rebuild(const std::vector<T>& data,
             TensorTypes tensorType = TensorTypes::eDevice) {
-
+        KP_LOG_DEBUG("Kompute TensorView creating with data size {}", data.size());
         this->mData = data;
         Tensor::rebuild(data.data(), data.size(), sizeof(T));
     }
@@ -341,6 +342,7 @@ class TensorView: public Tensor
     }
 
     void setData(const std::vector<T>& data) {
+        KP_LOG_DEBUG("Kompute TensorView setting data with data size {}", data.size());
 
         if (data.size() != this->mData.size()) {
             throw std::runtime_error(
@@ -354,6 +356,8 @@ class TensorView: public Tensor
 
     void setRawData(void* data, uint32_t elementTotalCount, uint32_t elementMemorySize) override 
     {
+        KP_LOG_DEBUG("Kompute TensorView setRawData with data size {}", elementTotalCount);
+
         assert(elementMemorySize == sizeof(T));
 
         this->mData = { (T*)data, ((T*)data) + elementTotalCount };
@@ -363,10 +367,14 @@ class TensorView: public Tensor
     TensorDataTypes dataType() override;
 
     uint32_t size() override {
+        KP_LOG_DEBUG("Kompute TensorView retrieving size: {}", this->mData.size());
+
         return this->mData.size();
     }
 
     uint32_t memorySize() override {
+        KP_LOG_DEBUG("Kompute TensorView retrieving memory size: {}", this->mData.size() * sizeof(T));
+
         return this->mData.size() * sizeof(T);
     }
 

From cf7d46cd23a0e76cbc181eb58dbb059a73f22ee2 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sat, 6 Mar 2021 19:42:41 +0000
Subject: [PATCH 06/16] Initial simpification of interface implementation

---
 single_include/kompute/Kompute.hpp | 215 ++++++++--------------------
 src/OpTensorCopy.cpp               |  11 +-
 src/OpTensorSyncDevice.cpp         |   6 -
 src/OpTensorSyncLocal.cpp          |   5 -
 src/Tensor.cpp                     |  49 +------
 src/include/kompute/Tensor.hpp     | 216 ++++++++---------------------
 6 files changed, 135 insertions(+), 367 deletions(-)

diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp
index 989f58c20..496e6f198 100755
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@@ -851,67 +851,6 @@ class Tensor
      */
     virtual ~Tensor();
 
-    /**
-     * Returns the size/magnitude of the Tensor, which will be the total number
-     * of elements across all dimensions
-     *
-     * @return Unsigned integer representing the total number of elements
-     */
-    // TODO: move to cpp
-    virtual uint32_t size() {
-        return this->mSize;
-    }
-
-    // TODO: move to cpp
-    virtual uint32_t dataTypeMemorySize() {
-        return this->mDataTypeMemorySize;
-    }
-
-    // TODO: move to cpp
-    virtual uint32_t memorySize() {
-        return this->mSize * this->mDataTypeMemorySize;
-    }
-
-    /**
-     * Retrieve the underlying data type of the Tensor
-     *
-     * @return Data type of tensor of type kp::Tensor::TensorDataTypes
-     */
-    virtual TensorDataTypes dataType() {
-        return this->mDataType;
-    }
-
-    /**
-     * Maps data from the Host Visible GPU memory into the data vector. It
-     * requires the Tensor to be of staging type for it to work.
-     */
-    virtual void mapDataFromHostMemory();
-    /**
-     * Maps data from the data vector into the Host Visible GPU memory. It
-     * requires the tensor to be of staging type for it to work.
-     */
-    virtual void mapDataIntoHostMemory();
-
-    // TODO: Decide whether this is one we prefer to have also overriden in the underlying tensorView
-    // TODO: move to cpp
-    virtual void getRawData(void* data) {
-        this->rawMapDataFromHostMemory(data);
-    }
-
-    /**
-     * Sets / resets the vector data of the tensor. This function does not
-     * perform any copies into GPU memory and is only performed on the host.
-     */
-    virtual void setRawData(void* data, uint32_t elementTotalCount, uint32_t elementMemorySize) {
-        if (elementTotalCount * elementMemorySize != this->memorySize()) {
-            throw std::runtime_error(
-              "Kompute Tensor Cannot set data of different sizes");
-        }
-        this->mSize = elementTotalCount;
-        this->mDataTypeMemorySize = elementMemorySize;
-        this->rawMapDataIntoHostMemory(data);
-    }
-
     /**
      * Function to trigger reinitialisation of the tensor buffer and memory with
      * new data as well as new potential device type.
@@ -1005,8 +944,53 @@ class Tensor
      */
     vk::DescriptorBufferInfo constructDescriptorBufferInfo();
 
-  protected:
-    void rawMapDataFromHostMemory(void* data) {
+    /**
+     * Returns the size/magnitude of the Tensor, which will be the total number
+     * of elements across all dimensions
+     *
+     * @return Unsigned integer representing the total number of elements
+     */
+    // TODO: move to cpp
+    uint32_t size() {
+        return this->mSize;
+    }
+
+    // TODO: move to cpp
+    uint32_t dataTypeMemorySize() {
+        return this->mDataTypeMemorySize;
+    }
+
+    // TODO: move to cpp
+    uint32_t memorySize() {
+        return this->mSize * this->mDataTypeMemorySize;
+    }
+
+    /**
+     * Retrieve the underlying data type of the Tensor
+     *
+     * @return Data type of tensor of type kp::Tensor::TensorDataTypes
+     */
+    TensorDataTypes dataType() {
+        return this->mDataType;
+    }
+
+    // TODO: move to cpp
+    const void* getRawData() {
+        return this->mRawData;
+    }
+
+    /**
+     * Sets / resets the vector data of the tensor. This function does not
+     * perform any copies into GPU memory and is only performed on the host.
+     */
+    void setRawData(const void* data) 
+    {
+        // Copy data 
+        memcpy(this->mRawData, data, this->memorySize());
+    }
+
+  private:
+    void rawMapData() {
 
         KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
 
@@ -1023,39 +1007,12 @@ class Tensor
         }
 
         vk::DeviceSize bufferSize = this->memorySize();
-        void* mapped = this->mDevice->mapMemory(
+        // Given we request coherent host memory we don't need to invalidate / flush
+        this->mRawData = this->mDevice->mapMemory(
           *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());
         vk::MappedMemoryRange mappedMemoryRange(*hostVisibleMemory, 0, bufferSize);
-        this->mDevice->invalidateMappedMemoryRanges(mappedMemoryRange);
-        memcpy(data, mapped, bufferSize);
-        this->mDevice->unmapMemory(*hostVisibleMemory);
     }
 
-    void rawMapDataIntoHostMemory(void* data) {
-        KP_LOG_DEBUG("Kompute Tensor mapping data into host buffer");
-
-        std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
-
-        if (this->mTensorType == TensorTypes::eHost) {
-            hostVisibleMemory = this->mPrimaryMemory;
-        } else if (this->mTensorType == TensorTypes::eDevice) {
-            hostVisibleMemory = this->mStagingMemory;
-        } else {
-            KP_LOG_WARN(
-              "Kompute Tensor mapping data not supported on storage tensor");
-            return;
-        }
-
-        vk::DeviceSize bufferSize = this->memorySize();
-
-        void* mapped = this->mDevice->mapMemory(
-          *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());
-        memcpy(mapped, data, bufferSize);
-        vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize);
-        this->mDevice->flushMappedMemoryRanges(1, &mappedRange);
-        this->mDevice->unmapMemory(*hostVisibleMemory);
-    }
-  private:
     // -------------- NEVER OWNED RESOURCES
     std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
     std::shared_ptr<vk::Device> mDevice;
@@ -1075,6 +1032,7 @@ class Tensor
     TensorDataTypes mDataType;
     uint32_t mSize;
     uint32_t mDataTypeMemorySize;
+    void* mRawData;
 
     void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer
     void createBuffer(std::shared_ptr<vk::Buffer> buffer,
@@ -1106,91 +1064,40 @@ class TensorView: public Tensor
            std::shared_ptr<vk::Device> device,
            const std::vector<T>& data,
            const TensorTypes& tensorType = TensorTypes::eDevice)
-        : Tensor(physicalDevice, device, (void*)data.data(), data.size(), sizeof(T), this->dataType())
+        : Tensor(physicalDevice,
+                 device,
+                 (void*)data.data(),
+                 data.size(),
+                 sizeof(T),
+                 this->dataType())
     {
         KP_LOG_DEBUG("Kompute TensorView constructor with data size {}", data.size());
-        this->mData = data;
     }
 
     ~TensorView() {
         KP_LOG_DEBUG("Kompute TensorView destructor");
     }
 
-    void rebuild(const std::vector<T>& data,
-            TensorTypes tensorType = TensorTypes::eDevice) {
-        KP_LOG_DEBUG("Kompute TensorView creating with data size {}", data.size());
-        this->mData = data;
-        Tensor::rebuild(data.data(), data.size(), sizeof(T));
-    }
-
-    std::vector<T>& data() {
-        return this->mData;
+    std::vector<T> data() {
+        return { (T*)this->getRawData(), ((T*)this->getRawData()) + this->size() };
     }
 
     T& operator[](int index) {
-        return this->mData[index];
+        return ((T*)this->mRawData)[index];
     }
 
     void setData(const std::vector<T>& data) {
+
         KP_LOG_DEBUG("Kompute TensorView setting data with data size {}", data.size());
 
-        if (data.size() != this->mData.size()) {
+        if (data.size() != this->mSize) {
             throw std::runtime_error(
               "Kompute TensorView Cannot set data of different sizes");
         }
 
-        this->mData = data;
-
         Tensor::setRawData(this->mData.data(), this->mData.size(), sizeof(T));
     }
 
-    void setRawData(void* data, uint32_t elementTotalCount, uint32_t elementMemorySize) override 
-    {
-        KP_LOG_DEBUG("Kompute TensorView setRawData with data size {}", elementTotalCount);
-
-        assert(elementMemorySize == sizeof(T));
-
-        this->mData = { (T*)data, ((T*)data) + elementTotalCount };
-        Tensor::setRawData(this->mData.data(), this->mData.size(), sizeof(T));
-    }
-
-    TensorDataTypes dataType() override;
-
-    uint32_t size() override {
-        KP_LOG_DEBUG("Kompute TensorView retrieving size: {}", this->mData.size());
-
-        return this->mData.size();
-    }
-
-    uint32_t memorySize() override {
-        KP_LOG_DEBUG("Kompute TensorView retrieving memory size: {}", this->mData.size() * sizeof(T));
-
-        return this->mData.size() * sizeof(T);
-    }
-
-    /**
-     * Maps data from the Host Visible GPU memory into the data vector. It
-     * requires the Tensor to be of staging type for it to work.
-     */
-    void mapDataFromHostMemory() override {
-        KP_LOG_DEBUG("Kompute TensorView mapDataFromHostMemory copying data");
-
-        this->rawMapDataFromHostMemory(this->mData.data());
-    }
-    /**
-     * Maps data from the data vector into the Host Visible GPU memory. It
-     * requires the tensor to be of staging type for it to work.
-     */
-    void mapDataIntoHostMemory() override {
-        KP_LOG_DEBUG("Kompute TensorView mapDataIntoHostMemory copying data");
-
-        this->rawMapDataIntoHostMemory(this->mData.data());
-    }
-
-  private:
-    // -------------- ALWAYS OWNED RESOURCES
-    std::vector<T> mData;
-
 };
 
 } // End namespace kp
diff --git a/src/OpTensorCopy.cpp b/src/OpTensorCopy.cpp
index 16e3017e9..ce53455a3 100644
--- a/src/OpTensorCopy.cpp
+++ b/src/OpTensorCopy.cpp
@@ -15,11 +15,17 @@ OpTensorCopy::OpTensorCopy(const std::vector<std::shared_ptr<Tensor>>& tensors)
     }
 
     kp::Tensor::TensorDataTypes dataType = this->mTensors[0]->dataType();
+    uint32_t size = this->mTensors[0]->size();
     for (const std::shared_ptr<Tensor>& tensor : tensors) {
         if (tensor->dataType() != dataType) {
             throw std::runtime_error(fmt::format("Attempting to copy tensors of different types from {} to {}",
                         dataType, tensor->dataType()));
         }
+        if (tensor->size() != size) {
+            throw std::runtime_error(fmt::format("Attempting to copy tensors of different sizes from {} to {}",
+                        size, tensor->size()));
+
+        }
     }
 }
 
@@ -55,12 +61,11 @@ OpTensorCopy::postEval(const vk::CommandBuffer& commandBuffer)
     uint32_t size = this->mTensors[0]->size();
     uint32_t dataTypeMemSize = this->mTensors[0]->dataTypeMemorySize();
     uint32_t memSize = size * dataTypeMemSize;
-    void* data = operator new(memSize);
-    this->mTensors[0]->getRawData(data);
+    const void* data = this->mTensors[0]->getRawData();
 
     // Copy the data from the first tensor into all the tensors
     for (size_t i = 1; i < this->mTensors.size(); i++) {
-        this->mTensors[i]->setRawData(data, size, dataTypeMemSize);
+        this->mTensors[i]->setRawData(data);
     }
 }
 
diff --git a/src/OpTensorSyncDevice.cpp b/src/OpTensorSyncDevice.cpp
index 85cefde77..4dbfaec83 100644
--- a/src/OpTensorSyncDevice.cpp
+++ b/src/OpTensorSyncDevice.cpp
@@ -41,12 +41,6 @@ OpTensorSyncDevice::preEval(const vk::CommandBuffer& commandBuffer)
 {
     KP_LOG_DEBUG("Kompute OpTensorSyncDevice preEval called");
 
-    // Performing sync of data as eval can be called multiple times with same op
-    for (size_t i = 0; i < this->mTensors.size(); i++) {
-        if (this->mTensors[i]->tensorType() != Tensor::TensorTypes::eStorage) {
-            this->mTensors[i]->mapDataIntoHostMemory();
-        }
-    }
 }
 
 void
diff --git a/src/OpTensorSyncLocal.cpp b/src/OpTensorSyncLocal.cpp
index 092490d15..f7e15ffd5 100644
--- a/src/OpTensorSyncLocal.cpp
+++ b/src/OpTensorSyncLocal.cpp
@@ -48,11 +48,6 @@ OpTensorSyncLocal::postEval(const vk::CommandBuffer& commandBuffer)
     KP_LOG_DEBUG("Kompute OpTensorSyncLocal postEval called");
 
     KP_LOG_DEBUG("Kompute OpTensorSyncLocal mapping data into tensor local");
-    for (size_t i = 0; i < this->mTensors.size(); i++) {
-        if (this->mTensors[i]->tensorType() != Tensor::TensorTypes::eStorage) {
-            this->mTensors[i]->mapDataFromHostMemory();
-        }
-    }
 }
 
 }
diff --git a/src/Tensor.cpp b/src/Tensor.cpp
index d3225987e..4d7dcd2db 100644
--- a/src/Tensor.cpp
+++ b/src/Tensor.cpp
@@ -50,7 +50,9 @@ Tensor::rebuild(void* data,
     }
 
     this->allocateMemoryCreateGPUResources();
-    this->rawMapDataIntoHostMemory(data);
+    this->rawMapData();
+
+    memcpy(this->mRawData, data, this->memorySize());
 }
 
 Tensor::TensorTypes
@@ -177,18 +179,6 @@ Tensor::constructDescriptorBufferInfo()
                                     bufferSize);
 }
 
-void
-Tensor::mapDataFromHostMemory()
-{
-    KP_LOG_DEBUG("Kompute Tensor mapDataFromHostMemory - SKIPPING");
-}
-
-void
-Tensor::mapDataIntoHostMemory()
-{
-    KP_LOG_DEBUG("Kompute Tensor mapDataIntoHostMemory - SKIPPING");
-}
-
 vk::BufferUsageFlags
 Tensor::getPrimaryBufferUsageFlags()
 {
@@ -219,7 +209,8 @@ Tensor::getPrimaryMemoryPropertyFlags()
             return vk::MemoryPropertyFlagBits::eDeviceLocal;
             break;
         case TensorTypes::eHost:
-            return vk::MemoryPropertyFlagBits::eHostVisible;
+            return vk::MemoryPropertyFlagBits::eHostVisible |
+                vk::MemoryPropertyFlagBits::eHostCoherent;
             break;
         case TensorTypes::eStorage:
             return vk::MemoryPropertyFlagBits::eDeviceLocal;
@@ -438,34 +429,4 @@ Tensor::destroy()
     KP_LOG_DEBUG("Kompute Tensor successful destroy()");
 }
 
-template<>
-Tensor::TensorDataTypes
-TensorView<bool>::dataType() {
-    return Tensor::TensorDataTypes::eBool;
-}
-
-template<>
-Tensor::TensorDataTypes
-TensorView<int32_t>::dataType() {
-    return Tensor::TensorDataTypes::eInt;
-}
-
-template<>
-Tensor::TensorDataTypes
-TensorView<uint32_t>::dataType() {
-    return Tensor::TensorDataTypes::eUnsignedInt;
-}
-
-template<>
-Tensor::TensorDataTypes
-TensorView<float>::dataType() {
-    return Tensor::TensorDataTypes::eFloat;
-}
-
-template<>
-Tensor::TensorDataTypes
-TensorView<double>::dataType() {
-    return Tensor::TensorDataTypes::eDouble;
-}
-
 }
diff --git a/src/include/kompute/Tensor.hpp b/src/include/kompute/Tensor.hpp
index 6af4682d6..f041d57e3 100644
--- a/src/include/kompute/Tensor.hpp
+++ b/src/include/kompute/Tensor.hpp
@@ -60,67 +60,6 @@ class Tensor
      */
     virtual ~Tensor();
 
-    /**
-     * Returns the size/magnitude of the Tensor, which will be the total number
-     * of elements across all dimensions
-     *
-     * @return Unsigned integer representing the total number of elements
-     */
-    // TODO: move to cpp
-    virtual uint32_t size() {
-        return this->mSize;
-    }
-
-    // TODO: move to cpp
-    virtual uint32_t dataTypeMemorySize() {
-        return this->mDataTypeMemorySize;
-    }
-
-    // TODO: move to cpp
-    virtual uint32_t memorySize() {
-        return this->mSize * this->mDataTypeMemorySize;
-    }
-
-    /**
-     * Retrieve the underlying data type of the Tensor
-     *
-     * @return Data type of tensor of type kp::Tensor::TensorDataTypes
-     */
-    virtual TensorDataTypes dataType() {
-        return this->mDataType;
-    }
-
-    /**
-     * Maps data from the Host Visible GPU memory into the data vector. It
-     * requires the Tensor to be of staging type for it to work.
-     */
-    virtual void mapDataFromHostMemory();
-    /**
-     * Maps data from the data vector into the Host Visible GPU memory. It
-     * requires the tensor to be of staging type for it to work.
-     */
-    virtual void mapDataIntoHostMemory();
-
-    // TODO: Decide whether this is one we prefer to have also overriden in the underlying tensorView
-    // TODO: move to cpp
-    virtual void getRawData(void* data) {
-        this->rawMapDataFromHostMemory(data);
-    }
-
-    /**
-     * Sets / resets the vector data of the tensor. This function does not
-     * perform any copies into GPU memory and is only performed on the host.
-     */
-    virtual void setRawData(void* data, uint32_t elementTotalCount, uint32_t elementMemorySize) {
-        if (elementTotalCount * elementMemorySize != this->memorySize()) {
-            throw std::runtime_error(
-              "Kompute Tensor Cannot set data of different sizes");
-        }
-        this->mSize = elementTotalCount;
-        this->mDataTypeMemorySize = elementMemorySize;
-        this->rawMapDataIntoHostMemory(data);
-    }
-
     /**
      * Function to trigger reinitialisation of the tensor buffer and memory with
      * new data as well as new potential device type.
@@ -151,7 +90,6 @@ class Tensor
      */
     TensorTypes tensorType();
 
-
     /**
      * Records a copy from the memory of the tensor provided to the current
      * thensor. This is intended to pass memory into a processing, to perform
@@ -215,8 +153,53 @@ class Tensor
      */
     vk::DescriptorBufferInfo constructDescriptorBufferInfo();
 
-  protected:
-    void rawMapDataFromHostMemory(void* data) {
+    /**
+     * Returns the size/magnitude of the Tensor, which will be the total number
+     * of elements across all dimensions
+     *
+     * @return Unsigned integer representing the total number of elements
+     */
+    // TODO: move to cpp
+    uint32_t size() {
+        return this->mSize;
+    }
+
+    // TODO: move to cpp
+    uint32_t dataTypeMemorySize() {
+        return this->mDataTypeMemorySize;
+    }
+
+    // TODO: move to cpp
+    uint32_t memorySize() {
+        return this->mSize * this->mDataTypeMemorySize;
+    }
+
+    /**
+     * Retrieve the underlying data type of the Tensor
+     *
+     * @return Data type of tensor of type kp::Tensor::TensorDataTypes
+     */
+    TensorDataTypes dataType() {
+        return this->mDataType;
+    }
+
+    // TODO: move to cpp
+    const void* getRawData() {
+        return this->mRawData;
+    }
+
+    /**
+     * Sets / resets the vector data of the tensor. This function does not
+     * perform any copies into GPU memory and is only performed on the host.
+     */
+    void setRawData(const void* data) 
+    {
+        // Copy data 
+        memcpy(this->mRawData, data, this->memorySize());
+    }
+
+  private:
+    void rawMapData() {
 
         KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
 
@@ -233,39 +216,12 @@ class Tensor
         }
 
         vk::DeviceSize bufferSize = this->memorySize();
-        void* mapped = this->mDevice->mapMemory(
+        // Given we request coherent host memory we don't need to invalidate / flush
+        this->mRawData = this->mDevice->mapMemory(
           *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());
         vk::MappedMemoryRange mappedMemoryRange(*hostVisibleMemory, 0, bufferSize);
-        this->mDevice->invalidateMappedMemoryRanges(mappedMemoryRange);
-        memcpy(data, mapped, bufferSize);
-        this->mDevice->unmapMemory(*hostVisibleMemory);
     }
 
-    void rawMapDataIntoHostMemory(void* data) {
-        KP_LOG_DEBUG("Kompute Tensor mapping data into host buffer");
-
-        std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
-
-        if (this->mTensorType == TensorTypes::eHost) {
-            hostVisibleMemory = this->mPrimaryMemory;
-        } else if (this->mTensorType == TensorTypes::eDevice) {
-            hostVisibleMemory = this->mStagingMemory;
-        } else {
-            KP_LOG_WARN(
-              "Kompute Tensor mapping data not supported on storage tensor");
-            return;
-        }
-
-        vk::DeviceSize bufferSize = this->memorySize();
-
-        void* mapped = this->mDevice->mapMemory(
-          *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());
-        memcpy(mapped, data, bufferSize);
-        vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize);
-        this->mDevice->flushMappedMemoryRanges(1, &mappedRange);
-        this->mDevice->unmapMemory(*hostVisibleMemory);
-    }
-  private:
     // -------------- NEVER OWNED RESOURCES
     std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
     std::shared_ptr<vk::Device> mDevice;
@@ -285,6 +241,7 @@ class Tensor
     TensorDataTypes mDataType;
     uint32_t mSize;
     uint32_t mDataTypeMemorySize;
+    void* mRawData;
 
     void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer
     void createBuffer(std::shared_ptr<vk::Buffer> buffer,
@@ -316,91 +273,40 @@ class TensorView: public Tensor
            std::shared_ptr<vk::Device> device,
            const std::vector<T>& data,
            const TensorTypes& tensorType = TensorTypes::eDevice)
-        : Tensor(physicalDevice, device, (void*)data.data(), data.size(), sizeof(T), this->dataType())
+        : Tensor(physicalDevice,
+                 device,
+                 (void*)data.data(),
+                 data.size(),
+                 sizeof(T),
+                 this->dataType())
     {
         KP_LOG_DEBUG("Kompute TensorView constructor with data size {}", data.size());
-        this->mData = data;
     }
 
     ~TensorView() {
         KP_LOG_DEBUG("Kompute TensorView destructor");
     }
 
-    void rebuild(const std::vector<T>& data,
-            TensorTypes tensorType = TensorTypes::eDevice) {
-        KP_LOG_DEBUG("Kompute TensorView creating with data size {}", data.size());
-        this->mData = data;
-        Tensor::rebuild(data.data(), data.size(), sizeof(T));
-    }
-
-    std::vector<T>& data() {
-        return this->mData;
+    std::vector<T> data() {
+        return { (T*)this->getRawData(), ((T*)this->getRawData()) + this->size() };
     }
 
     T& operator[](int index) {
-        return this->mData[index];
+        return ((T*)this->mRawData)[index];
     }
 
     void setData(const std::vector<T>& data) {
+
         KP_LOG_DEBUG("Kompute TensorView setting data with data size {}", data.size());
 
-        if (data.size() != this->mData.size()) {
+        if (data.size() != this->mSize) {
             throw std::runtime_error(
               "Kompute TensorView Cannot set data of different sizes");
         }
 
-        this->mData = data;
-
         Tensor::setRawData(this->mData.data(), this->mData.size(), sizeof(T));
     }
 
-    void setRawData(void* data, uint32_t elementTotalCount, uint32_t elementMemorySize) override 
-    {
-        KP_LOG_DEBUG("Kompute TensorView setRawData with data size {}", elementTotalCount);
-
-        assert(elementMemorySize == sizeof(T));
-
-        this->mData = { (T*)data, ((T*)data) + elementTotalCount };
-        Tensor::setRawData(this->mData.data(), this->mData.size(), sizeof(T));
-    }
-
-    TensorDataTypes dataType() override;
-
-    uint32_t size() override {
-        KP_LOG_DEBUG("Kompute TensorView retrieving size: {}", this->mData.size());
-
-        return this->mData.size();
-    }
-
-    uint32_t memorySize() override {
-        KP_LOG_DEBUG("Kompute TensorView retrieving memory size: {}", this->mData.size() * sizeof(T));
-
-        return this->mData.size() * sizeof(T);
-    }
-
-    /**
-     * Maps data from the Host Visible GPU memory into the data vector. It
-     * requires the Tensor to be of staging type for it to work.
-     */
-    void mapDataFromHostMemory() override {
-        KP_LOG_DEBUG("Kompute TensorView mapDataFromHostMemory copying data");
-
-        this->rawMapDataFromHostMemory(this->mData.data());
-    }
-    /**
-     * Maps data from the data vector into the Host Visible GPU memory. It
-     * requires the tensor to be of staging type for it to work.
-     */
-    void mapDataIntoHostMemory() override {
-        KP_LOG_DEBUG("Kompute TensorView mapDataIntoHostMemory copying data");
-
-        this->rawMapDataIntoHostMemory(this->mData.data());
-    }
-
-  private:
-    // -------------- ALWAYS OWNED RESOURCES
-    std::vector<T> mData;
-
 };
 
 } // End namespace kp

From f02b9d6915c9e05b71b426c5f609178c5946aa04 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sun, 7 Mar 2021 08:00:19 +0000
Subject: [PATCH 07/16] Working implementation with tests

---
 examples/array_multiplication/src/Main.cpp |  8 +--
 examples/logistic_regression/src/Main.cpp  | 21 ++++---
 single_include/kompute/Kompute.hpp         | 72 +++++++++++++++-------
 src/OpTensorCopy.cpp                       |  2 +-
 src/Tensor.cpp                             | 30 +++++++++
 src/include/kompute/Manager.hpp            | 11 +++-
 src/include/kompute/Tensor.hpp             | 61 +++++++++++-------
 test/TestAsyncOperations.cpp               | 12 ++--
 test/TestDestroy.cpp                       | 18 +++---
 test/TestLogisticRegression.cpp            | 38 ++++++------
 test/TestManager.cpp                       | 24 ++++----
 test/TestMultipleAlgoExecutions.cpp        | 24 ++++----
 test/TestOpShadersFromStringAndFile.cpp    | 24 ++++----
 test/TestOpTensorCopy.cpp                  | 56 ++++++++---------
 test/TestOpTensorCreate.cpp                | 14 ++---
 test/TestOpTensorSync.cpp                  | 16 ++---
 test/TestPushConstant.cpp                  | 10 +--
 test/TestSequence.cpp                      | 10 +--
 test/TestSpecializationConstant.cpp        |  8 +--
 test/TestTensor.cpp                        |  4 +-
 test/TestWorkgroup.cpp                     | 50 +++++++--------
 21 files changed, 297 insertions(+), 216 deletions(-)

diff --git a/examples/array_multiplication/src/Main.cpp b/examples/array_multiplication/src/Main.cpp
index 812a5039f..95e0781ad 100755
--- a/examples/array_multiplication/src/Main.cpp
+++ b/examples/array_multiplication/src/Main.cpp
@@ -14,9 +14,9 @@ int main()
 
     kp::Manager mgr;
 
-    auto tensorInA = mgr.tensor<float>({ 2.0, 4.0, 6.0 });
-    auto tensorInB = mgr.tensor<float>({ 0.0, 1.0, 2.0 });
-    auto tensorOut = mgr.tensor<float>({ 0.0, 0.0, 0.0 });
+    auto tensorInA = mgr.tensor({ 2.0, 4.0, 6.0 });
+    auto tensorInB = mgr.tensor({ 0.0, 1.0, 2.0 });
+    auto tensorOut = mgr.tensor({ 0.0, 0.0, 0.0 });
 
     std::string shader(R"(
         // The version to use 
@@ -49,7 +49,7 @@ int main()
 
     // prints "Output {  0  4  12  }"
     std::cout<< "Output: {  ";
-    for (const float& elem : tensorOut->data()) {
+    for (const float& elem : tensorOut->vector()) {
       std::cout << elem << "  ";
     }
     std::cout << "}" << std::endl;
diff --git a/examples/logistic_regression/src/Main.cpp b/examples/logistic_regression/src/Main.cpp
index 3b6ec11e1..c7cc827ba 100755
--- a/examples/logistic_regression/src/Main.cpp
+++ b/examples/logistic_regression/src/Main.cpp
@@ -17,19 +17,19 @@ int main()
 
     kp::Manager mgr;
 
-    auto xI = mgr.tensor<float>({ 0, 1, 1, 1, 1 });
-    auto xJ = mgr.tensor<float>({ 0, 0, 0, 1, 1 });
+    auto xI = mgr.tensor({ 0, 1, 1, 1, 1 });
+    auto xJ = mgr.tensor({ 0, 0, 0, 1, 1 });
 
-    auto y = mgr.tensor<float>({ 0, 0, 0, 1, 1 });
+    auto y = mgr.tensor({ 0, 0, 0, 1, 1 });
 
-    auto wIn = mgr.tensor<float>({ 0.001, 0.001 });
-    auto wOutI = mgr.tensor<float>({ 0, 0, 0, 0, 0 });
-    auto wOutJ = mgr.tensor<float>({ 0, 0, 0, 0, 0 });
+    auto wIn = mgr.tensor({ 0.001, 0.001 });
+    auto wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
+    auto wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });
 
-    auto bIn = mgr.tensor<float>({ 0 });
-    auto bOut = mgr.tensor<float>({ 0, 0, 0, 0, 0 });
+    auto bIn = mgr.tensor({ 0 });
+    auto bOut = mgr.tensor({ 0, 0, 0, 0, 0 });
 
-    auto lOut = mgr.tensor<float>({ 0, 0, 0, 0, 0 });
+    auto lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
 
     std::vector<std::shared_ptr<kp::Tensor>> params = { xI,  xJ,    y,
                                                         wIn, wOutI, wOutJ,
@@ -40,7 +40,8 @@ int main()
                 (uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv
                     + kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));
 
-    std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(params, spirv);
+    std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(
+            params, spirv, kp::Workgroup({ 5 }), kp::Constants({ 5.0 }));
 
     mgr.sequence()->eval<kp::OpTensorSyncDevice>(params);
 
diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp
index 496e6f198..df9549aab 100755
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@@ -974,11 +974,21 @@ class Tensor
         return this->mDataType;
     }
 
-    // TODO: move to cpp
-    const void* getRawData() {
+    void* rawData() {
         return this->mRawData;
     }
 
+    // TODO: move to cpp
+    template <typename T>
+    T* data() {
+        return this->mRawData;
+    }
+
+    template <typename T>
+    std::vector<T> vector() {
+        return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() };
+    }
+
     /**
      * Sets / resets the vector data of the tensor. This function does not
      * perform any copies into GPU memory and is only performed on the host.
@@ -989,6 +999,14 @@ class Tensor
         memcpy(this->mRawData, data, this->memorySize());
     }
 
+  protected:
+    // -------------- ALWAYS OWNED RESOURCES
+    TensorTypes mTensorType;
+    TensorDataTypes mDataType;
+    uint32_t mSize;
+    uint32_t mDataTypeMemorySize;
+    void* mRawData;
+
   private:
     void rawMapData() {
 
@@ -1027,13 +1045,6 @@ class Tensor
     std::shared_ptr<vk::DeviceMemory> mStagingMemory;
     bool mFreeStagingMemory = false;
 
-    // -------------- ALWAYS OWNED RESOURCES
-    TensorTypes mTensorType;
-    TensorDataTypes mDataType;
-    uint32_t mSize;
-    uint32_t mDataTypeMemorySize;
-    void* mRawData;
-
     void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer
     void createBuffer(std::shared_ptr<vk::Buffer> buffer,
                       vk::BufferUsageFlags bufferUsageFlags);
@@ -1057,10 +1068,11 @@ class Tensor
 
 // TODO: Limit T to be only float, bool, double, etc
 template <typename T>
-class TensorView: public Tensor
+class TensorT: public Tensor
 {
+
   public:
-    TensorView(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
+    TensorT(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
            std::shared_ptr<vk::Device> device,
            const std::vector<T>& data,
            const TensorTypes& tensorType = TensorTypes::eDevice)
@@ -1069,35 +1081,42 @@ class TensorView: public Tensor
                  (void*)data.data(),
                  data.size(),
                  sizeof(T),
-                 this->dataType())
+                 this->dataType(),
+                 tensorType)
     {
-        KP_LOG_DEBUG("Kompute TensorView constructor with data size {}", data.size());
+        KP_LOG_DEBUG("Kompute TensorT constructor with data size {}", data.size());
     }
 
-    ~TensorView() {
-        KP_LOG_DEBUG("Kompute TensorView destructor");
+    ~TensorT() {
+        KP_LOG_DEBUG("Kompute TensorT destructor");
     }
 
-    std::vector<T> data() {
-        return { (T*)this->getRawData(), ((T*)this->getRawData()) + this->size() };
+    T* data() {
+        return (T*)this->mRawData;
+    }
+
+    std::vector<T> vector() {
+        return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() };
     }
 
     T& operator[](int index) {
-        return ((T*)this->mRawData)[index];
+        return *(((T*)this->mRawData) + index);
     }
 
     void setData(const std::vector<T>& data) {
 
-        KP_LOG_DEBUG("Kompute TensorView setting data with data size {}", data.size());
+        KP_LOG_DEBUG("Kompute TensorT setting data with data size {}", data.size());
 
         if (data.size() != this->mSize) {
             throw std::runtime_error(
-              "Kompute TensorView Cannot set data of different sizes");
+              "Kompute TensorT Cannot set data of different sizes");
         }
 
-        Tensor::setRawData(this->mData.data(), this->mData.size(), sizeof(T));
+        Tensor::setRawData(data.data());
     }
 
+    TensorDataTypes dataType();
+
 };
 
 } // End namespace kp
@@ -1969,13 +1988,13 @@ class Manager
      * @returns Shared pointer with initialised tensor
      */
     template <typename T>
-    std::shared_ptr<TensorView<T>> tensor(
+    std::shared_ptr<TensorT<T>> tensorT(
       const std::vector<T>& data,
       Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
     {
         KP_LOG_DEBUG("Kompute Manager tensor creation triggered");
 
-        std::shared_ptr<TensorView<T>> tensor{ new kp::TensorView<T>(
+        std::shared_ptr<TensorT<T>> tensor{ new kp::TensorT<T>(
           this->mPhysicalDevice, this->mDevice, data, tensorType) };
 
         if (this->mManageResources) {
@@ -1985,6 +2004,13 @@ class Manager
         return tensor;
     }
 
+    std::shared_ptr<TensorT<float>> tensor(
+      const std::vector<float>& data,
+      Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
+    {
+        return this->tensorT<float>(data, tensorType);
+    }
+
     /**
      * Create a managed algorithm that will be destroyed by this manager
      * if it hasn't been destroyed by its reference count going to zero.
diff --git a/src/OpTensorCopy.cpp b/src/OpTensorCopy.cpp
index ce53455a3..c93830902 100644
--- a/src/OpTensorCopy.cpp
+++ b/src/OpTensorCopy.cpp
@@ -61,7 +61,7 @@ OpTensorCopy::postEval(const vk::CommandBuffer& commandBuffer)
     uint32_t size = this->mTensors[0]->size();
     uint32_t dataTypeMemSize = this->mTensors[0]->dataTypeMemorySize();
     uint32_t memSize = size * dataTypeMemSize;
-    const void* data = this->mTensors[0]->getRawData();
+    void* data = this->mTensors[0]->rawData();
 
     // Copy the data from the first tensor into all the tensors
     for (size_t i = 1; i < this->mTensors.size(); i++) {
diff --git a/src/Tensor.cpp b/src/Tensor.cpp
index 4d7dcd2db..335e48959 100644
--- a/src/Tensor.cpp
+++ b/src/Tensor.cpp
@@ -429,4 +429,34 @@ Tensor::destroy()
     KP_LOG_DEBUG("Kompute Tensor successful destroy()");
 }
 
+template<>
+Tensor::TensorDataTypes
+TensorT<bool>::dataType() {
+    return Tensor::TensorDataTypes::eBool;
+}
+
+template<>
+Tensor::TensorDataTypes
+TensorT<int32_t>::dataType() {
+    return Tensor::TensorDataTypes::eInt;
+}
+
+template<>
+Tensor::TensorDataTypes
+TensorT<uint32_t>::dataType() {
+    return Tensor::TensorDataTypes::eUnsignedInt;
+}
+
+template<>
+Tensor::TensorDataTypes
+TensorT<float>::dataType() {
+    return Tensor::TensorDataTypes::eFloat;
+}
+
+template<>
+Tensor::TensorDataTypes
+TensorT<double>::dataType() {
+    return Tensor::TensorDataTypes::eDouble;
+}
+
 }
diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp
index d27bccacc..c39f5d6b5 100644
--- a/src/include/kompute/Manager.hpp
+++ b/src/include/kompute/Manager.hpp
@@ -75,13 +75,13 @@ class Manager
      * @returns Shared pointer with initialised tensor
      */
     template <typename T>
-    std::shared_ptr<TensorView<T>> tensor(
+    std::shared_ptr<TensorT<T>> tensorT(
       const std::vector<T>& data,
       Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
     {
         KP_LOG_DEBUG("Kompute Manager tensor creation triggered");
 
-        std::shared_ptr<TensorView<T>> tensor{ new kp::TensorView<T>(
+        std::shared_ptr<TensorT<T>> tensor{ new kp::TensorT<T>(
           this->mPhysicalDevice, this->mDevice, data, tensorType) };
 
         if (this->mManageResources) {
@@ -91,6 +91,13 @@ class Manager
         return tensor;
     }
 
+    std::shared_ptr<TensorT<float>> tensor(
+      const std::vector<float>& data,
+      Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
+    {
+        return this->tensorT<float>(data, tensorType);
+    }
+
     /**
      * Create a managed algorithm that will be destroyed by this manager
      * if it hasn't been destroyed by its reference count going to zero.
diff --git a/src/include/kompute/Tensor.hpp b/src/include/kompute/Tensor.hpp
index f041d57e3..898a2df08 100644
--- a/src/include/kompute/Tensor.hpp
+++ b/src/include/kompute/Tensor.hpp
@@ -183,11 +183,21 @@ class Tensor
         return this->mDataType;
     }
 
-    // TODO: move to cpp
-    const void* getRawData() {
+    void* rawData() {
         return this->mRawData;
     }
 
+    // TODO: move to cpp
+    template <typename T>
+    T* data() {
+        return this->mRawData;
+    }
+
+    template <typename T>
+    std::vector<T> vector() {
+        return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() };
+    }
+
     /**
      * Sets / resets the vector data of the tensor. This function does not
      * perform any copies into GPU memory and is only performed on the host.
@@ -198,6 +208,14 @@ class Tensor
         memcpy(this->mRawData, data, this->memorySize());
     }
 
+  protected:
+    // -------------- ALWAYS OWNED RESOURCES
+    TensorTypes mTensorType;
+    TensorDataTypes mDataType;
+    uint32_t mSize;
+    uint32_t mDataTypeMemorySize;
+    void* mRawData;
+
   private:
     void rawMapData() {
 
@@ -236,13 +254,6 @@ class Tensor
     std::shared_ptr<vk::DeviceMemory> mStagingMemory;
     bool mFreeStagingMemory = false;
 
-    // -------------- ALWAYS OWNED RESOURCES
-    TensorTypes mTensorType;
-    TensorDataTypes mDataType;
-    uint32_t mSize;
-    uint32_t mDataTypeMemorySize;
-    void* mRawData;
-
     void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer
     void createBuffer(std::shared_ptr<vk::Buffer> buffer,
                       vk::BufferUsageFlags bufferUsageFlags);
@@ -266,10 +277,11 @@ class Tensor
 
 // TODO: Limit T to be only float, bool, double, etc
 template <typename T>
-class TensorView: public Tensor
+class TensorT: public Tensor
 {
+
   public:
-    TensorView(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
+    TensorT(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
            std::shared_ptr<vk::Device> device,
            const std::vector<T>& data,
            const TensorTypes& tensorType = TensorTypes::eDevice)
@@ -278,35 +290,42 @@ class TensorView: public Tensor
                  (void*)data.data(),
                  data.size(),
                  sizeof(T),
-                 this->dataType())
+                 this->dataType(),
+                 tensorType)
     {
-        KP_LOG_DEBUG("Kompute TensorView constructor with data size {}", data.size());
+        KP_LOG_DEBUG("Kompute TensorT constructor with data size {}", data.size());
     }
 
-    ~TensorView() {
-        KP_LOG_DEBUG("Kompute TensorView destructor");
+    ~TensorT() {
+        KP_LOG_DEBUG("Kompute TensorT destructor");
     }
 
-    std::vector<T> data() {
-        return { (T*)this->getRawData(), ((T*)this->getRawData()) + this->size() };
+    T* data() {
+        return (T*)this->mRawData;
+    }
+
+    std::vector<T> vector() {
+        return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() };
     }
 
     T& operator[](int index) {
-        return ((T*)this->mRawData)[index];
+        return *(((T*)this->mRawData) + index);
     }
 
     void setData(const std::vector<T>& data) {
 
-        KP_LOG_DEBUG("Kompute TensorView setting data with data size {}", data.size());
+        KP_LOG_DEBUG("Kompute TensorT setting data with data size {}", data.size());
 
         if (data.size() != this->mSize) {
             throw std::runtime_error(
-              "Kompute TensorView Cannot set data of different sizes");
+              "Kompute TensorT Cannot set data of different sizes");
         }
 
-        Tensor::setRawData(this->mData.data(), this->mData.size(), sizeof(T));
+        Tensor::setRawData(data.data());
     }
 
+    TensorDataTypes dataType();
+
 };
 
 } // End namespace kp
diff --git a/test/TestAsyncOperations.cpp b/test/TestAsyncOperations.cpp
index 2f8c7d819..7feaaa30e 100644
--- a/test/TestAsyncOperations.cpp
+++ b/test/TestAsyncOperations.cpp
@@ -73,7 +73,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
     sq->eval<kp::OpTensorSyncLocal>(inputsSyncB);
 
     for (uint32_t i = 0; i < numParallel; i++) {
-        EXPECT_EQ(inputsSyncB[i]->data(), resultSync);
+        EXPECT_EQ(inputsSyncB[i]->vector<float>(), resultSync);
     }
 
     kp::Manager mgrAsync(0, { 0, 2 });
@@ -111,7 +111,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
     sq->eval<kp::OpTensorSyncLocal>({ inputsAsyncB });
 
     for (uint32_t i = 0; i < numParallel; i++) {
-        EXPECT_EQ(inputsAsyncB[i]->data(), resultAsync);
+        EXPECT_EQ((inputsAsyncB[i]->vector<float>()), resultAsync);
     }
 
     // The speedup should be at least 40%
@@ -152,8 +152,8 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution)
 
     kp::Manager mgr;
 
-    std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(data);
-    std::shared_ptr<kp::Tensor> tensorB = mgr.tensor(data);
+    std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor(data);
+    std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor(data);
 
     std::shared_ptr<kp::Sequence> sq1 = mgr.sequence();
     std::shared_ptr<kp::Sequence> sq2 = mgr.sequence();
@@ -172,6 +172,6 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution)
     sq1->evalAsync<kp::OpTensorSyncLocal>({ tensorA, tensorB });
     sq1->evalAwait();
 
-    EXPECT_EQ(tensorA->data(), resultAsync);
-    EXPECT_EQ(tensorB->data(), resultAsync);
+    EXPECT_EQ(tensorA->vector(), resultAsync);
+    EXPECT_EQ(tensorB->vector(), resultAsync);
 }
diff --git a/test/TestDestroy.cpp b/test/TestDestroy.cpp
index 0b948d64f..0ccfdb0f8 100644
--- a/test/TestDestroy.cpp
+++ b/test/TestDestroy.cpp
@@ -5,9 +5,9 @@
 
 TEST(TestDestroy, TestDestroyTensorSingle)
 {
-    std::shared_ptr<kp::Tensor> tensorA = nullptr;
+    std::shared_ptr<kp::TensorT<float>> tensorA = nullptr;
 
-    std::string shader(R"(
+        std::string shader(R"(
       #version 450
       layout (local_size_x = 1) in;
       layout(set = 0, binding = 0) buffer a { float pa[]; };
@@ -39,13 +39,13 @@ TEST(TestDestroy, TestDestroyTensorSingle)
         }
         EXPECT_FALSE(tensorA->isInit());
     }
-    EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
+    EXPECT_EQ(tensorA->vector(), std::vector<float>({ 1, 1, 1 }));
 }
 
 TEST(TestDestroy, TestDestroyTensorVector)
 {
-    std::shared_ptr<kp::Tensor> tensorA = nullptr;
-    std::shared_ptr<kp::Tensor> tensorB = nullptr;
+    std::shared_ptr<kp::TensorT<float>> tensorA = nullptr;
+    std::shared_ptr<kp::TensorT<float>> tensorB = nullptr;
 
     std::string shader(R"(
       #version 450
@@ -84,13 +84,13 @@ TEST(TestDestroy, TestDestroyTensorVector)
             EXPECT_FALSE(tensorB->isInit());
         }
     }
-    EXPECT_EQ(tensorA->data(), std::vector<float>({ 2, 2, 2 }));
-    EXPECT_EQ(tensorB->data(), std::vector<float>({ 3, 3, 3 }));
+    EXPECT_EQ(tensorA->vector(), std::vector<float>({ 2, 2, 2 }));
+    EXPECT_EQ(tensorB->vector(), std::vector<float>({ 3, 3, 3 }));
 }
 
 TEST(TestDestroy, TestDestroySequenceSingle)
 {
-    std::shared_ptr<kp::Tensor> tensorA = nullptr;
+    std::shared_ptr<kp::TensorT<float>> tensorA = nullptr;
 
     std::string shader(R"(
       #version 450
@@ -123,5 +123,5 @@ TEST(TestDestroy, TestDestroySequenceSingle)
             EXPECT_FALSE(sq->isInit());
         }
     }
-    EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
+    EXPECT_EQ(tensorA->vector(), std::vector<float>({ 1, 1, 1 }));
 }
diff --git a/test/TestLogisticRegression.cpp b/test/TestLogisticRegression.cpp
index 980273246..a4402637f 100644
--- a/test/TestLogisticRegression.cpp
+++ b/test/TestLogisticRegression.cpp
@@ -14,19 +14,19 @@ TEST(TestLogisticRegression, TestMainLogisticRegression)
     {
         kp::Manager mgr;
 
-        std::shared_ptr<kp::Tensor> xI = mgr.tensor({ 0, 1, 1, 1, 1 });
-        std::shared_ptr<kp::Tensor> xJ = mgr.tensor({ 0, 0, 0, 1, 1 });
+        std::shared_ptr<kp::TensorT<float>> xI = mgr.tensor({ 0, 1, 1, 1, 1 });
+        std::shared_ptr<kp::TensorT<float>> xJ = mgr.tensor({ 0, 0, 0, 1, 1 });
 
-        std::shared_ptr<kp::Tensor> y = mgr.tensor({ 0, 0, 0, 1, 1 });
+        std::shared_ptr<kp::TensorT<float>> y = mgr.tensor({ 0, 0, 0, 1, 1 });
 
-        std::shared_ptr<kp::Tensor> wIn = mgr.tensor({ 0.001, 0.001 });
-        std::shared_ptr<kp::Tensor> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
-        std::shared_ptr<kp::Tensor> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });
+        std::shared_ptr<kp::TensorT<float>> wIn = mgr.tensor({ 0.001, 0.001 });
+        std::shared_ptr<kp::TensorT<float>> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
+        std::shared_ptr<kp::TensorT<float>> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });
 
-        std::shared_ptr<kp::Tensor> bIn = mgr.tensor({ 0 });
-        std::shared_ptr<kp::Tensor> bOut = mgr.tensor({ 0, 0, 0, 0, 0 });
+        std::shared_ptr<kp::TensorT<float>> bIn = mgr.tensor({ 0 });
+        std::shared_ptr<kp::TensorT<float>> bOut = mgr.tensor({ 0, 0, 0, 0, 0 });
 
-        std::shared_ptr<kp::Tensor> lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
+        std::shared_ptr<kp::TensorT<float>> lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
 
         std::vector<std::shared_ptr<kp::Tensor>> params = { xI,  xJ,    y,
                                                             wIn, wOutI, wOutJ,
@@ -88,21 +88,21 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy)
     {
         kp::Manager mgr;
 
-        std::shared_ptr<kp::Tensor> xI = mgr.tensor({ 0, 1, 1, 1, 1 });
-        std::shared_ptr<kp::Tensor> xJ = mgr.tensor({ 0, 0, 0, 1, 1 });
+        std::shared_ptr<kp::TensorT<float>> xI = mgr.tensor({ 0, 1, 1, 1, 1 });
+        std::shared_ptr<kp::TensorT<float>> xJ = mgr.tensor({ 0, 0, 0, 1, 1 });
 
-        std::shared_ptr<kp::Tensor> y = mgr.tensor({ 0, 0, 0, 1, 1 });
+        std::shared_ptr<kp::TensorT<float>> y = mgr.tensor({ 0, 0, 0, 1, 1 });
 
-        std::shared_ptr<kp::Tensor> wIn =
+        std::shared_ptr<kp::TensorT<float>> wIn =
           mgr.tensor({ 0.001, 0.001 }, kp::Tensor::TensorTypes::eHost);
-        std::shared_ptr<kp::Tensor> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
-        std::shared_ptr<kp::Tensor> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });
+        std::shared_ptr<kp::TensorT<float>> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
+        std::shared_ptr<kp::TensorT<float>> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });
 
-        std::shared_ptr<kp::Tensor> bIn =
+        std::shared_ptr<kp::TensorT<float>> bIn =
           mgr.tensor({ 0 }, kp::Tensor::TensorTypes::eHost);
-        std::shared_ptr<kp::Tensor> bOut = mgr.tensor({ 0, 0, 0, 0, 0 });
+        std::shared_ptr<kp::TensorT<float>> bOut = mgr.tensor({ 0, 0, 0, 0, 0 });
 
-        std::shared_ptr<kp::Tensor> lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
+        std::shared_ptr<kp::TensorT<float>> lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
 
         std::vector<std::shared_ptr<kp::Tensor>> params = { xI,  xJ,    y,
                                                             wIn, wOutI, wOutJ,
@@ -136,8 +136,6 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy)
                 wIn->data()[1] -= learningRate * wOutJ->data()[j];
                 bIn->data()[0] -= learningRate * bOut->data()[j];
             }
-            wIn->mapDataIntoHostMemory();
-            bIn->mapDataIntoHostMemory();
         }
 
         // Based on the inputs the outputs should be at least:
diff --git a/test/TestManager.cpp b/test/TestManager.cpp
index ce055ff8c..f759208aa 100644
--- a/test/TestManager.cpp
+++ b/test/TestManager.cpp
@@ -7,9 +7,9 @@ TEST(TestManager, EndToEndOpMultEvalFlow)
 {
     kp::Manager mgr;
 
-    std::shared_ptr<kp::Tensor> tensorLHS = mgr.tensor({ 0, 1, 2 });
-    std::shared_ptr<kp::Tensor> tensorRHS = mgr.tensor({ 2, 4, 6 });
-    std::shared_ptr<kp::Tensor> tensorOutput = mgr.tensor({ 0, 0, 0 });
+    std::shared_ptr<kp::TensorT<float>> tensorLHS = mgr.tensor({ 0, 1, 2 });
+    std::shared_ptr<kp::TensorT<float>> tensorRHS = mgr.tensor({ 2, 4, 6 });
+    std::shared_ptr<kp::TensorT<float>> tensorOutput = mgr.tensor({ 0, 0, 0 });
 
     std::vector<std::shared_ptr<kp::Tensor>> params = { tensorLHS,
                                                         tensorRHS,
@@ -20,16 +20,16 @@ TEST(TestManager, EndToEndOpMultEvalFlow)
       ->eval<kp::OpMult>(params, mgr.algorithm())
       ->eval<kp::OpTensorSyncLocal>(params);
 
-    EXPECT_EQ(tensorOutput->data(), std::vector<float>({ 0, 4, 12 }));
+    EXPECT_EQ(tensorOutput->vector(), std::vector<float>({ 0, 4, 12 }));
 }
 
 TEST(TestManager, EndToEndOpMultSeqFlow)
 {
     kp::Manager mgr;
 
-    std::shared_ptr<kp::Tensor> tensorLHS = mgr.tensor({ 0, 1, 2 });
-    std::shared_ptr<kp::Tensor> tensorRHS = mgr.tensor({ 2, 4, 6 });
-    std::shared_ptr<kp::Tensor> tensorOutput = mgr.tensor({ 0, 0, 0 });
+    std::shared_ptr<kp::TensorT<float>> tensorLHS = mgr.tensor({ 0, 1, 2 });
+    std::shared_ptr<kp::TensorT<float>> tensorRHS = mgr.tensor({ 2, 4, 6 });
+    std::shared_ptr<kp::TensorT<float>> tensorOutput = mgr.tensor({ 0, 0, 0 });
 
     std::vector<std::shared_ptr<kp::Tensor>> params = { tensorLHS,
                                                         tensorRHS,
@@ -41,16 +41,16 @@ TEST(TestManager, EndToEndOpMultSeqFlow)
       ->record<kp::OpTensorSyncLocal>(params)
       ->eval();
 
-    EXPECT_EQ(tensorOutput->data(), std::vector<float>({ 0, 4, 12 }));
+    EXPECT_EQ(tensorOutput->vector(), std::vector<float>({ 0, 4, 12 }));
 }
 
 TEST(TestManager, TestMultipleSequences)
 {
     kp::Manager mgr;
 
-    std::shared_ptr<kp::Tensor> tensorLHS = mgr.tensor({ 0, 1, 2 });
-    std::shared_ptr<kp::Tensor> tensorRHS = mgr.tensor({ 2, 4, 6 });
-    std::shared_ptr<kp::Tensor> tensorOutput = mgr.tensor({ 0, 0, 0 });
+    std::shared_ptr<kp::TensorT<float>> tensorLHS = mgr.tensor({ 0, 1, 2 });
+    std::shared_ptr<kp::TensorT<float>> tensorRHS = mgr.tensor({ 2, 4, 6 });
+    std::shared_ptr<kp::TensorT<float>> tensorOutput = mgr.tensor({ 0, 0, 0 });
 
     std::vector<std::shared_ptr<kp::Tensor>> params = { tensorLHS,
                                                         tensorRHS,
@@ -60,5 +60,5 @@ TEST(TestManager, TestMultipleSequences)
     mgr.sequence()->eval<kp::OpMult>(params, mgr.algorithm());
     mgr.sequence()->eval<kp::OpTensorSyncLocal>(params);
 
-    EXPECT_EQ(tensorOutput->data(), std::vector<float>({ 0, 4, 12 }));
+    EXPECT_EQ(tensorOutput->vector(), std::vector<float>({ 0, 4, 12 }));
 }
diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp
index 63dd5f7fe..b934f7e83 100644
--- a/test/TestMultipleAlgoExecutions.cpp
+++ b/test/TestMultipleAlgoExecutions.cpp
@@ -64,8 +64,8 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality)
 
     sq->evalAwait();
 
-    EXPECT_EQ(tensorOutA->data(), std::vector<float>({ 4, 8, 12 }));
-    EXPECT_EQ(tensorOutB->data(), std::vector<float>({ 10, 10, 10 }));
+    EXPECT_EQ(tensorOutA->vector(), std::vector<float>({ 4, 8, 12 }));
+    EXPECT_EQ(tensorOutB->vector(), std::vector<float>({ 10, 10, 10 }));
 }
 
 TEST(TestMultipleAlgoExecutions, SingleSequenceRecord)
@@ -73,7 +73,7 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord)
 
     kp::Manager mgr;
 
-    std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 0, 0 });
+    std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({ 0, 0, 0 });
 
     std::string shader(R"(
       #version 450
@@ -96,14 +96,14 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord)
           ->eval();
     }
 
-    EXPECT_EQ(tensorA->data(), std::vector<float>({ 3, 3, 3 }));
+    EXPECT_EQ(tensorA->vector(), std::vector<float>({ 3, 3, 3 }));
 }
 
 TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords)
 {
     kp::Manager mgr;
 
-    std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 0, 0 });
+    std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({ 0, 0, 0 });
 
     std::string shader(R"(
       #version 450
@@ -131,7 +131,7 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords)
 
     mgr.sequence()->record<kp::OpTensorSyncLocal>({ tensorA })->eval();
 
-    EXPECT_EQ(tensorA->data(), std::vector<float>({ 3, 3, 3 }));
+    EXPECT_EQ(tensorA->vector(), std::vector<float>({ 3, 3, 3 }));
 }
 
 TEST(TestMultipleAlgoExecutions, MultipleSequences)
@@ -139,7 +139,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences)
 
     kp::Manager mgr;
 
-    std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 0, 0 });
+    std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({ 0, 0, 0 });
 
     std::string shader(R"(
       #version 450
@@ -167,14 +167,14 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences)
 
     sq->record<kp::OpTensorSyncLocal>({ tensorA })->eval();
 
-    EXPECT_EQ(tensorA->data(), std::vector<float>({ 3, 3, 3 }));
+    EXPECT_EQ(tensorA->vector(), std::vector<float>({ 3, 3, 3 }));
 }
 
 TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)
 {
     kp::Manager mgr;
 
-    std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 0, 0 });
+    std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({ 0, 0, 0 });
 
     std::string shader(R"(
       #version 450
@@ -198,12 +198,12 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)
 
     sq->record<kp::OpTensorSyncLocal>({ tensorA })->eval();
 
-    EXPECT_EQ(tensorA->data(), std::vector<float>({ 3, 3, 3 }));
+    EXPECT_EQ(tensorA->vector(), std::vector<float>({ 3, 3, 3 }));
 }
 
 TEST(TestMultipleAlgoExecutions, SequenceAlgoDestroyOutsideManagerScope)
 {
-    std::shared_ptr<kp::Tensor> tensorA = nullptr;
+    std::shared_ptr<kp::TensorT<float>> tensorA = nullptr;
 
     {
         std::shared_ptr<kp::Sequence> sq = nullptr;
@@ -236,5 +236,5 @@ TEST(TestMultipleAlgoExecutions, SequenceAlgoDestroyOutsideManagerScope)
         }
     }
 
-    EXPECT_EQ(tensorA->data(), std::vector<float>({ 3, 3, 3 }));
+    EXPECT_EQ(tensorA->vector(), std::vector<float>({ 3, 3, 3 }));
 }
diff --git a/test/TestOpShadersFromStringAndFile.cpp b/test/TestOpShadersFromStringAndFile.cpp
index e766c8efb..a1f8eda99 100644
--- a/test/TestOpShadersFromStringAndFile.cpp
+++ b/test/TestOpShadersFromStringAndFile.cpp
@@ -9,8 +9,8 @@ TEST(TestOpAlgoCreate, ShaderRawDataFromConstructor)
 {
     kp::Manager mgr;
 
-    std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 3, 4, 5 });
-    std::shared_ptr<kp::Tensor> tensorB = mgr.tensor({ 0, 0, 0 });
+    std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({ 3, 4, 5 });
+    std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor({ 0, 0, 0 });
 
     std::string shader(R"(
         #version 450
@@ -36,16 +36,16 @@ TEST(TestOpAlgoCreate, ShaderRawDataFromConstructor)
       ->eval<kp::OpAlgoDispatch>(mgr.algorithm(params, spirv))
       ->eval<kp::OpTensorSyncLocal>(params);
 
-    EXPECT_EQ(tensorA->data(), std::vector<float>({ 0, 1, 2 }));
-    EXPECT_EQ(tensorB->data(), std::vector<float>({ 3, 4, 5 }));
+    EXPECT_EQ(tensorA->vector(), std::vector<float>({ 0, 1, 2 }));
+    EXPECT_EQ(tensorB->vector(), std::vector<float>({ 3, 4, 5 }));
 }
 
 TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor)
 {
     kp::Manager mgr;
 
-    std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 3, 4, 5 });
-    std::shared_ptr<kp::Tensor> tensorB = mgr.tensor({ 0, 0, 0 });
+    std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({ 3, 4, 5 });
+    std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor({ 0, 0, 0 });
 
     std::vector<uint32_t> spirv = std::vector<uint32_t>(
       (uint32_t*)
@@ -62,8 +62,8 @@ TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor)
       ->eval<kp::OpAlgoDispatch>(mgr.algorithm(params, spirv))
       ->eval<kp::OpTensorSyncLocal>(params);
 
-    EXPECT_EQ(tensorA->data(), std::vector<float>({ 0, 1, 2 }));
-    EXPECT_EQ(tensorB->data(), std::vector<float>({ 3, 4, 5 }));
+    EXPECT_EQ(tensorA->vector(), std::vector<float>({ 0, 1, 2 }));
+    EXPECT_EQ(tensorB->vector(), std::vector<float>({ 3, 4, 5 }));
 }
 
 // TODO: Add support to read from file for shader
@@ -71,8 +71,8 @@ TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor)
 //{
 //    kp::Manager mgr;
 //
-//    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
-//    std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
+//    std::shared_ptr<kp::TensorT<float>> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
+//    std::shared_ptr<kp::TensorT<float>> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
 //    mgr.rebuild({ tensorA, tensorB });
 //
 //    mgr.evalOpDefault<kp::OpAlgoCreate>(
@@ -81,6 +81,6 @@ TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor)
 //
 //    mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA, tensorB });
 //
-//    EXPECT_EQ(tensorA->data(), std::vector<float>({ 0, 1, 2 }));
-//    EXPECT_EQ(tensorB->data(), std::vector<float>({ 3, 4, 5 }));
+//    EXPECT_EQ(tensorA->vector(), std::vector<float>({ 0, 1, 2 }));
+//    EXPECT_EQ(tensorB->vector(), std::vector<float>({ 3, 4, 5 }));
 //}
diff --git a/test/TestOpTensorCopy.cpp b/test/TestOpTensorCopy.cpp
index 85e0b545b..6978eeeea 100644
--- a/test/TestOpTensorCopy.cpp
+++ b/test/TestOpTensorCopy.cpp
@@ -11,8 +11,8 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensor)
     std::vector<float> testVecA{ 1, 2, 3 };
     std::vector<float> testVecB{ 0, 0, 0 };
 
-    std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(testVecA);
-    std::shared_ptr<kp::Tensor> tensorB = mgr.tensor(testVecB);
+    std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor(testVecA);
+    std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor(testVecB);
 
     EXPECT_TRUE(tensorA->isInit());
     EXPECT_TRUE(tensorB->isInit());
@@ -22,8 +22,8 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensor)
       ->eval<kp::OpTensorCopy>({ tensorA, tensorB })
       ->eval<kp::OpTensorSyncLocal>({ tensorA, tensorB });
 
-    // Making sure the GPU holds the same data
-    EXPECT_EQ(tensorA->data(), tensorB->data());
+    // Making sure the GPU holds the same vector
+    EXPECT_EQ(tensorA->vector(), tensorB->vector());
 }
 
 TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti)
@@ -35,9 +35,9 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti)
     std::vector<float> testVecB{ 0, 0, 0 };
     std::vector<float> testVecC{ 0, 0, 0 };
 
-    std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(testVecA);
-    std::shared_ptr<kp::Tensor> tensorB = mgr.tensor(testVecB);
-    std::shared_ptr<kp::Tensor> tensorC = mgr.tensor(testVecC);
+    std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor(testVecA);
+    std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor(testVecB);
+    std::shared_ptr<kp::TensorT<float>> tensorC = mgr.tensor(testVecC);
 
     EXPECT_TRUE(tensorA->isInit());
     EXPECT_TRUE(tensorB->isInit());
@@ -47,14 +47,14 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti)
       ->eval<kp::OpTensorSyncLocal>({ tensorA, tensorB, tensorC })
       ->eval<kp::OpTensorCopy>({ tensorA, tensorB, tensorC });
 
-    EXPECT_EQ(tensorA->data(), tensorB->data());
-    EXPECT_EQ(tensorA->data(), tensorC->data());
+    EXPECT_EQ(tensorA->vector(), tensorB->vector());
+    EXPECT_EQ(tensorA->vector(), tensorC->vector());
 
-    // Making sure the GPU holds the same data
+    // Making sure the GPU holds the same vector
     mgr.sequence()->eval<kp::OpTensorSyncLocal>({ tensorB, tensorC });
 
-    EXPECT_EQ(tensorA->data(), tensorB->data());
-    EXPECT_EQ(tensorA->data(), tensorC->data());
+    EXPECT_EQ(tensorA->vector(), tensorB->vector());
+    EXPECT_EQ(tensorA->vector(), tensorC->vector());
 }
 
 TEST(TestOpTensorCopy, CopyDeviceToHostTensor)
@@ -65,8 +65,8 @@ TEST(TestOpTensorCopy, CopyDeviceToHostTensor)
     std::vector<float> testVecA{ 3, 4, 5 };
     std::vector<float> testVecB{ 0, 0, 0 };
 
-    std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(testVecA);
-    std::shared_ptr<kp::Tensor> tensorB =
+    std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor(testVecA);
+    std::shared_ptr<kp::TensorT<float>> tensorB =
       mgr.tensor(testVecB, kp::Tensor::TensorTypes::eHost);
 
     //  Only calling sync on device type tensor
@@ -77,11 +77,11 @@ TEST(TestOpTensorCopy, CopyDeviceToHostTensor)
 
     mgr.sequence()->eval<kp::OpTensorCopy>({ tensorA, tensorB });
 
-    EXPECT_EQ(tensorA->data(), tensorB->data());
+    EXPECT_EQ(tensorA->vector(), tensorB->vector());
 
-    // Making sure the GPU holds the same data
+    // Making sure the GPU holds the same vector
     mgr.sequence()->eval<kp::OpTensorSyncLocal>({ tensorB });
-    EXPECT_EQ(tensorA->data(), tensorB->data());
+    EXPECT_EQ(tensorA->vector(), tensorB->vector());
 }
 
 TEST(TestOpTensorCopy, CopyHostToDeviceTensor)
@@ -92,9 +92,9 @@ TEST(TestOpTensorCopy, CopyHostToDeviceTensor)
     std::vector<float> testVecA{ 4, 5, 6 };
     std::vector<float> testVecB{ 0, 0, 0 };
 
-    std::shared_ptr<kp::Tensor> tensorA =
+    std::shared_ptr<kp::TensorT<float>> tensorA =
       mgr.tensor(testVecA, kp::Tensor::TensorTypes::eHost);
-    std::shared_ptr<kp::Tensor> tensorB = mgr.tensor(testVecB);
+    std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor(testVecB);
 
     //  Only calling sync on device type tensor
     mgr.sequence()->eval<kp::OpTensorSyncDevice>({ tensorA, tensorB });
@@ -104,11 +104,11 @@ TEST(TestOpTensorCopy, CopyHostToDeviceTensor)
 
     mgr.sequence()->eval<kp::OpTensorCopy>({ tensorA, tensorB });
 
-    EXPECT_EQ(tensorA->data(), tensorB->data());
+    EXPECT_EQ(tensorA->vector(), tensorB->vector());
 
-    // Making sure the GPU holds the same data
+    // Making sure the GPU holds the same vector
     mgr.sequence()->eval<kp::OpTensorSyncLocal>({ tensorB });
-    EXPECT_EQ(tensorA->data(), tensorB->data());
+    EXPECT_EQ(tensorA->vector(), tensorB->vector());
 }
 
 TEST(TestOpTensorCopy, CopyHostToHostTensor)
@@ -119,9 +119,9 @@ TEST(TestOpTensorCopy, CopyHostToHostTensor)
     std::vector<float> testVecA{ 5, 6, 7 };
     std::vector<float> testVecB{ 0, 0, 0 };
 
-    std::shared_ptr<kp::Tensor> tensorA =
+    std::shared_ptr<kp::TensorT<float>> tensorA =
       mgr.tensor(testVecA, kp::Tensor::TensorTypes::eHost);
-    std::shared_ptr<kp::Tensor> tensorB =
+    std::shared_ptr<kp::TensorT<float>> tensorB =
       mgr.tensor(testVecB, kp::Tensor::TensorTypes::eHost);
 
     EXPECT_TRUE(tensorA->isInit());
@@ -131,11 +131,11 @@ TEST(TestOpTensorCopy, CopyHostToHostTensor)
       ->eval<kp::OpTensorSyncDevice>({ tensorA })
       ->eval<kp::OpTensorCopy>({ tensorA, tensorB });
 
-    EXPECT_EQ(tensorA->data(), tensorB->data());
+    EXPECT_EQ(tensorA->vector(), tensorB->vector());
 
-    // Making sure the GPU holds the same data
+    // Making sure the GPU holds the same vector
     mgr.sequence()->eval<kp::OpTensorSyncLocal>({ tensorB });
-    EXPECT_EQ(tensorA->data(), tensorB->data());
+    EXPECT_EQ(tensorA->vector(), tensorB->vector());
 }
 
 TEST(TestOpTensorCopy, SingleTensorShouldFail)
@@ -145,7 +145,7 @@ TEST(TestOpTensorCopy, SingleTensorShouldFail)
 
     std::vector<float> testVecA{ 6, 7, 8 };
 
-    std::shared_ptr<kp::Tensor> tensorA =
+    std::shared_ptr<kp::TensorT<float>> tensorA =
       mgr.tensor(testVecA, kp::Tensor::TensorTypes::eHost);
 
     EXPECT_TRUE(tensorA->isInit());
diff --git a/test/TestOpTensorCreate.cpp b/test/TestOpTensorCreate.cpp
index 14153427e..7ba1be615 100644
--- a/test/TestOpTensorCreate.cpp
+++ b/test/TestOpTensorCreate.cpp
@@ -6,7 +6,7 @@
 TEST(TestOpTensorCreate, CreateSingleTensorSingleOp)
 {
     std::vector<float> testVecA{ 9, 8, 7 };
-    std::shared_ptr<kp::Tensor> tensorA = nullptr;
+    std::shared_ptr<kp::TensorT<float>> tensorA = nullptr;
 
     {
         kp::Manager mgr;
@@ -15,7 +15,7 @@ TEST(TestOpTensorCreate, CreateSingleTensorSingleOp)
 
         EXPECT_TRUE(tensorA->isInit());
 
-        EXPECT_EQ(tensorA->data(), testVecA);
+        EXPECT_EQ(tensorA->vector(), testVecA);
     }
 
     EXPECT_FALSE(tensorA->isInit());
@@ -29,11 +29,11 @@ TEST(TestOpTensorCreate, NoErrorIfTensorFreedBefore)
 
     kp::Manager mgr;
 
-    std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(testVecA);
-    std::shared_ptr<kp::Tensor> tensorB = mgr.tensor(testVecB);
+    std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor(testVecA);
+    std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor(testVecB);
 
-    EXPECT_EQ(tensorA->data(), testVecA);
-    EXPECT_EQ(tensorB->data(), testVecB);
+    EXPECT_EQ(tensorA->vector(), testVecA);
+    EXPECT_EQ(tensorB->vector(), testVecB);
 
     tensorA->destroy();
     tensorB->destroy();
@@ -49,7 +49,7 @@ TEST(TestOpTensorCreate, ExceptionOnZeroSizeTensor)
     kp::Manager mgr;
 
     try {
-        std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(testVecA);
+        std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor(testVecA);
     } catch (const std::runtime_error& err) {
         // check exception
         ASSERT_TRUE(std::string(err.what()).find("zero-sized") !=
diff --git a/test/TestOpTensorSync.cpp b/test/TestOpTensorSync.cpp
index 55e02ad13..02271c618 100644
--- a/test/TestOpTensorSync.cpp
+++ b/test/TestOpTensorSync.cpp
@@ -11,7 +11,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemorySingleTensor)
     std::vector<float> testVecPreA{ 0, 0, 0 };
     std::vector<float> testVecPostA{ 9, 8, 7 };
 
-    std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(testVecPreA);
+    std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor(testVecPreA);
 
     EXPECT_TRUE(tensorA->isInit());
 
@@ -21,7 +21,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemorySingleTensor)
 
     mgr.sequence()->eval<kp::OpTensorSyncLocal>({ tensorA });
 
-    EXPECT_EQ(tensorA->data(), testVecPostA);
+    EXPECT_EQ(tensorA->vector(), testVecPostA);
 }
 
 TEST(TestOpTensorSync, SyncToDeviceMemoryMultiTensor)
@@ -31,9 +31,9 @@ TEST(TestOpTensorSync, SyncToDeviceMemoryMultiTensor)
 
     std::vector<float> testVec{ 9, 8, 7 };
 
-    std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 0, 0 });
-    std::shared_ptr<kp::Tensor> tensorB = mgr.tensor({ 0, 0, 0 });
-    std::shared_ptr<kp::Tensor> tensorC = mgr.tensor({ 0, 0, 0 });
+    std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({ 0, 0, 0 });
+    std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor({ 0, 0, 0 });
+    std::shared_ptr<kp::TensorT<float>> tensorC = mgr.tensor({ 0, 0, 0 });
 
     EXPECT_TRUE(tensorA->isInit());
     EXPECT_TRUE(tensorB->isInit());
@@ -47,7 +47,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemoryMultiTensor)
 
     mgr.sequence()->eval<kp::OpTensorSyncLocal>({ tensorA, tensorB, tensorC });
 
-    EXPECT_EQ(tensorA->data(), testVec);
-    EXPECT_EQ(tensorB->data(), testVec);
-    EXPECT_EQ(tensorC->data(), testVec);
+    EXPECT_EQ(tensorA->vector(), testVec);
+    EXPECT_EQ(tensorB->vector(), testVec);
+    EXPECT_EQ(tensorC->vector(), testVec);
 }
diff --git a/test/TestPushConstant.cpp b/test/TestPushConstant.cpp
index b37fe4d72..9599596ed 100644
--- a/test/TestPushConstant.cpp
+++ b/test/TestPushConstant.cpp
@@ -29,7 +29,7 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchOverride)
         {
             kp::Manager mgr;
 
-            std::shared_ptr<kp::Tensor> tensor = mgr.tensor({ 0, 0, 0 });
+            std::shared_ptr<kp::TensorT<float>> tensor = mgr.tensor({ 0, 0, 0 });
 
             std::shared_ptr<kp::Algorithm> algo =
               mgr.algorithm({ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.0, 0.0, 0.0 });
@@ -42,7 +42,7 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchOverride)
             sq->eval<kp::OpAlgoDispatch>(algo, kp::Constants{ 0.3, 0.2, 0.1 });
             sq->eval<kp::OpTensorSyncLocal>({ tensor });
 
-            EXPECT_EQ(tensor->data(), kp::Constants({ 0.4, 0.4, 0.4 }));
+            EXPECT_EQ(tensor->vector(), kp::Constants({ 0.4, 0.4, 0.4 }));
         }
     }
 }
@@ -72,7 +72,7 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchNoOverride)
         {
             kp::Manager mgr;
 
-            std::shared_ptr<kp::Tensor> tensor = mgr.tensor({ 0, 0, 0 });
+            std::shared_ptr<kp::TensorT<float>> tensor = mgr.tensor({ 0, 0, 0 });
 
             std::shared_ptr<kp::Algorithm> algo =
               mgr.algorithm({ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.1, 0.2, 0.3 });
@@ -85,7 +85,7 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchNoOverride)
             sq->eval<kp::OpAlgoDispatch>(algo, kp::Constants{ 0.3, 0.2, 0.1 });
             sq->eval<kp::OpTensorSyncLocal>({ tensor });
 
-            EXPECT_EQ(tensor->data(), kp::Constants({ 0.4, 0.4, 0.4 }));
+            EXPECT_EQ(tensor->vector(), kp::Constants({ 0.4, 0.4, 0.4 }));
         }
     }
 }
@@ -115,7 +115,7 @@ TEST(TestPushConstants, TestConstantsWrongSize)
         {
             kp::Manager mgr;
 
-            std::shared_ptr<kp::Tensor> tensor = mgr.tensor({ 0, 0, 0 });
+            std::shared_ptr<kp::TensorT<float>> tensor = mgr.tensor({ 0, 0, 0 });
 
             std::shared_ptr<kp::Algorithm> algo =
               mgr.algorithm({ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.0 });
diff --git a/test/TestSequence.cpp b/test/TestSequence.cpp
index 7d70a477b..090a6317b 100644
--- a/test/TestSequence.cpp
+++ b/test/TestSequence.cpp
@@ -60,9 +60,9 @@ TEST(TestSequence, RerecordSequence)
 
     std::shared_ptr<kp::Sequence> sq = mgr.sequence();
 
-    std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({1, 2, 3});
-    std::shared_ptr<kp::Tensor> tensorB = mgr.tensor({2, 2, 2});
-    std::shared_ptr<kp::Tensor> tensorOut = mgr.tensor({0, 0, 0});
+    std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({1, 2, 3});
+    std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor({2, 2, 2});
+    std::shared_ptr<kp::TensorT<float>> tensorOut = mgr.tensor({0, 0, 0});
 
     sq->eval<kp::OpTensorSyncDevice>({ tensorA, tensorB, tensorOut });
 
@@ -90,7 +90,7 @@ TEST(TestSequence, RerecordSequence)
 
     sq->eval();
 
-    EXPECT_EQ(tensorOut->data(), std::vector<float>({2, 4, 6}));
+    EXPECT_EQ(tensorOut->vector(), std::vector<float>({2, 4, 6}));
 
     algo->rebuild({tensorOut, tensorA, tensorB}, spirv);
 
@@ -98,7 +98,7 @@ TEST(TestSequence, RerecordSequence)
     sq->rerecord();
     sq->eval();
 
-    EXPECT_EQ(tensorB->data(), std::vector<float>({2, 8, 18}));
+    EXPECT_EQ(tensorB->vector(), std::vector<float>({2, 8, 18}));
 }
 
 
diff --git a/test/TestSpecializationConstant.cpp b/test/TestSpecializationConstant.cpp
index 2c6e284d2..fe40fb5ea 100644
--- a/test/TestSpecializationConstant.cpp
+++ b/test/TestSpecializationConstant.cpp
@@ -25,8 +25,8 @@ TEST(TestSpecializationConstants, TestTwoConstants)
         {
             kp::Manager mgr;
 
-            std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 0, 0 });
-            std::shared_ptr<kp::Tensor> tensorB = mgr.tensor({ 0, 0, 0 });
+            std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({ 0, 0, 0 });
+            std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor({ 0, 0, 0 });
 
             std::vector<std::shared_ptr<kp::Tensor>> params = { tensorA,
                                                                 tensorB };
@@ -42,8 +42,8 @@ TEST(TestSpecializationConstants, TestTwoConstants)
                    ->record<kp::OpTensorSyncLocal>(params)
                    ->eval();
 
-            EXPECT_EQ(tensorA->data(), std::vector<float>({ 5, 5, 5 }));
-            EXPECT_EQ(tensorB->data(), std::vector<float>({ 0.3, 0.3, 0.3 }));
+            EXPECT_EQ(tensorA->vector(), std::vector<float>({ 5, 5, 5 }));
+            EXPECT_EQ(tensorB->vector(), std::vector<float>({ 0.3, 0.3, 0.3 }));
         }
     }
 }
diff --git a/test/TestTensor.cpp b/test/TestTensor.cpp
index d33367722..c267024db 100644
--- a/test/TestTensor.cpp
+++ b/test/TestTensor.cpp
@@ -7,7 +7,7 @@ TEST(TestTensor, ConstructorData)
 {
     kp::Manager mgr;
     std::vector<float> vec{ 0, 1, 2 };
-    std::shared_ptr<kp::Tensor> tensor = mgr.tensor(vec);
+    std::shared_ptr<kp::TensorT<float>> tensor = mgr.tensor(vec);
     EXPECT_EQ(tensor->size(), vec.size());
-    EXPECT_EQ(tensor->data(), vec);
+    EXPECT_EQ(tensor->vector(), vec);
 }
diff --git a/test/TestWorkgroup.cpp b/test/TestWorkgroup.cpp
index 3eb9147a1..8836840a6 100644
--- a/test/TestWorkgroup.cpp
+++ b/test/TestWorkgroup.cpp
@@ -7,8 +7,8 @@
 
 TEST(TestWorkgroup, TestSimpleWorkgroup)
 {
-    std::shared_ptr<kp::Tensor> tensorA = nullptr;
-    std::shared_ptr<kp::Tensor> tensorB = nullptr;
+    std::shared_ptr<kp::TensorT<float>> tensorA = nullptr;
+    std::shared_ptr<kp::TensorT<float>> tensorB = nullptr;
     {
         std::shared_ptr<kp::Sequence> sq = nullptr;
 
@@ -39,29 +39,29 @@ TEST(TestWorkgroup, TestSimpleWorkgroup)
             sq->record<kp::OpAlgoDispatch>(algorithm);
             sq->record<kp::OpTensorSyncLocal>(params);
             sq->eval();
+
+            std::vector<float> expectedA = {
+                0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  1,  1,  1,
+                2,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,  3,  3,  3,
+                4,  4,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,
+                6,  6,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,
+                8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9,
+                10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11,
+                12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13,
+                14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15
+            };
+
+            std::vector<float> expectedB = {
+                0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
+                6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3,
+                4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1,
+                2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7,
+                0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
+                6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7
+            };
+
+            EXPECT_EQ(tensorA->vector(), expectedA);
+            EXPECT_EQ(tensorB->vector(), expectedB);
         }
     }
-
-    std::vector<float> expectedA = {
-        0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  1,  1,  1,
-        2,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,  3,  3,  3,
-        4,  4,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,
-        6,  6,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,
-        8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9,
-        10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11,
-        12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13,
-        14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15
-    };
-
-    std::vector<float> expectedB = {
-        0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
-        6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3,
-        4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1,
-        2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7,
-        0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
-        6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7
-    };
-
-    EXPECT_EQ(tensorA->data(), expectedA);
-    EXPECT_EQ(tensorB->data(), expectedB);
 }

From 1cc369cb191db337d30d588e6f8ebeabb813e0ec Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sun, 7 Mar 2021 08:02:30 +0000
Subject: [PATCH 08/16] Mark pointer invalid after destroy tensor

---
 src/Tensor.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/Tensor.cpp b/src/Tensor.cpp
index 335e48959..aaf6ba388 100644
--- a/src/Tensor.cpp
+++ b/src/Tensor.cpp
@@ -360,6 +360,10 @@ Tensor::destroy()
 {
     KP_LOG_DEBUG("Kompute Tensor started destroy()");
 
+    this->mRawData = nullptr;
+    this->mSize = 0;
+    this->mDataTypeMemorySize = 0;
+
     if (!this->mDevice) {
         KP_LOG_WARN(
           "Kompute Tensor destructor reached with null Device pointer");

From bb64b2b37c44038c01a27bd1e985cfc4c92d00fe Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sun, 7 Mar 2021 08:10:42 +0000
Subject: [PATCH 09/16] Updated destroy and amended tests to ensure they test
 tensor in scope

---
 src/Tensor.cpp                 | 11 +++++++++--
 src/include/kompute/Tensor.hpp | 26 +++++++++++++++++++++++++-
 test/TestDestroy.cpp           | 11 +++++++----
 3 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/src/Tensor.cpp b/src/Tensor.cpp
index aaf6ba388..8b96be163 100644
--- a/src/Tensor.cpp
+++ b/src/Tensor.cpp
@@ -50,7 +50,7 @@ Tensor::rebuild(void* data,
     }
 
     this->allocateMemoryCreateGPUResources();
-    this->rawMapData();
+    this->mapRawData();
 
     memcpy(this->mRawData, data, this->memorySize());
 }
@@ -64,7 +64,10 @@ Tensor::tensorType()
 bool
 Tensor::isInit()
 {
-    return this->mDevice && this->mPrimaryBuffer && this->mPrimaryMemory;
+    return this->mDevice
+        && this->mPrimaryBuffer
+        && this->mPrimaryMemory
+        && this->mRawData;
 }
 
 
@@ -360,6 +363,7 @@ Tensor::destroy()
 {
     KP_LOG_DEBUG("Kompute Tensor started destroy()");
 
+    // Setting raw data to null regardless whether device is available to invalidate Tensor
     this->mRawData = nullptr;
     this->mSize = 0;
     this->mDataTypeMemorySize = 0;
@@ -370,6 +374,9 @@ Tensor::destroy()
         return;
     }
 
+    // Unmap the current memory data
+    this->unmapRawData();
+
     if (this->mFreePrimaryBuffer) {
         if (!this->mPrimaryBuffer) {
             KP_LOG_WARN("Kompose Tensor expected to destroy primary buffer "
diff --git a/src/include/kompute/Tensor.hpp b/src/include/kompute/Tensor.hpp
index 898a2df08..efc3cda18 100644
--- a/src/include/kompute/Tensor.hpp
+++ b/src/include/kompute/Tensor.hpp
@@ -217,7 +217,7 @@ class Tensor
     void* mRawData;
 
   private:
-    void rawMapData() {
+    void mapRawData() {
 
         KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
 
@@ -234,12 +234,36 @@ class Tensor
         }
 
         vk::DeviceSize bufferSize = this->memorySize();
+
         // Given we request coherent host memory we don't need to invalidate / flush
         this->mRawData = this->mDevice->mapMemory(
           *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());
+
         vk::MappedMemoryRange mappedMemoryRange(*hostVisibleMemory, 0, bufferSize);
     }
 
+    void unmapRawData() {
+
+        KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
+
+        std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
+
+        if (this->mTensorType == TensorTypes::eHost) {
+            hostVisibleMemory = this->mPrimaryMemory;
+        } else if (this->mTensorType == TensorTypes::eDevice) {
+            hostVisibleMemory = this->mStagingMemory;
+        } else {
+            KP_LOG_WARN(
+              "Kompute Tensor mapping data not supported on storage tensor");
+            return;
+        }
+
+        vk::DeviceSize bufferSize = this->memorySize();
+        vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize);
+        this->mDevice->flushMappedMemoryRanges(1, &mappedRange);
+        this->mDevice->unmapMemory(*hostVisibleMemory);
+    }
+
     // -------------- NEVER OWNED RESOURCES
     std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
     std::shared_ptr<vk::Device> mDevice;
diff --git a/test/TestDestroy.cpp b/test/TestDestroy.cpp
index 0ccfdb0f8..72eeaf72b 100644
--- a/test/TestDestroy.cpp
+++ b/test/TestDestroy.cpp
@@ -34,12 +34,13 @@ TEST(TestDestroy, TestDestroyTensorSingle)
               ->eval()
               ->eval<kp::OpTensorSyncLocal>(algo->getTensors());
 
+            EXPECT_EQ(tensorA->vector(), std::vector<float>({ 1, 1, 1 }));
+
             tensorA->destroy();
             EXPECT_FALSE(tensorA->isInit());
         }
         EXPECT_FALSE(tensorA->isInit());
     }
-    EXPECT_EQ(tensorA->vector(), std::vector<float>({ 1, 1, 1 }));
 }
 
 TEST(TestDestroy, TestDestroyTensorVector)
@@ -82,10 +83,11 @@ TEST(TestDestroy, TestDestroyTensorVector)
 
             EXPECT_FALSE(tensorA->isInit());
             EXPECT_FALSE(tensorB->isInit());
+
+            EXPECT_EQ(tensorA->vector(), std::vector<float>({ 2, 2, 2 }));
+            EXPECT_EQ(tensorB->vector(), std::vector<float>({ 3, 3, 3 }));
         }
     }
-    EXPECT_EQ(tensorA->vector(), std::vector<float>({ 2, 2, 2 }));
-    EXPECT_EQ(tensorB->vector(), std::vector<float>({ 3, 3, 3 }));
 }
 
 TEST(TestDestroy, TestDestroySequenceSingle)
@@ -121,7 +123,8 @@ TEST(TestDestroy, TestDestroySequenceSingle)
             sq->destroy();
 
             EXPECT_FALSE(sq->isInit());
+
+            EXPECT_EQ(tensorA->vector(), std::vector<float>({ 1, 1, 1 }));
         }
     }
-    EXPECT_EQ(tensorA->vector(), std::vector<float>({ 1, 1, 1 }));
 }

From a2ee928f4c3503127bf773ad8348f37e6db191cd Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sun, 7 Mar 2021 10:39:30 +0000
Subject: [PATCH 10/16] Updated tests and rebased

---
 python/src/main.cpp                      | 105 +++++++++++++++--------
 python/test/test_array_multiplication.py |   6 +-
 python/test/test_logistic_regression.py  |  27 +++---
 setup.py                                 |   2 +-
 single_include/kompute/Kompute.hpp       |  49 ++++++++++-
 src/include/kompute/Manager.hpp          |  17 ++++
 src/include/kompute/Tensor.hpp           |   2 +-
 test/TestDestroy.cpp                     |   6 +-
 test/TestMultipleAlgoExecutions.cpp      |  37 --------
 9 files changed, 151 insertions(+), 100 deletions(-)

diff --git a/python/src/main.cpp b/python/src/main.cpp
index d4b0f2084..eab8e5ef4 100644
--- a/python/src/main.cpp
+++ b/python/src/main.cpp
@@ -92,59 +92,46 @@ PYBIND11_MODULE(kp, m) {
 
     py::class_<kp::Tensor, std::shared_ptr<kp::Tensor>>(m, "Tensor", DOC(kp, Tensor))
         .def("data", [](kp::Tensor& self) {
-                return py::array(self.data().size(), self.data().data());
-            }, DOC(kp, Tensor, data))
-        .def("__getitem__", [](kp::Tensor &self, size_t index) -> float { return self.data()[index]; },
-                "When only an index is necessary")
-        .def("__setitem__", [](kp::Tensor &self, size_t index, float value) {
-                self.data()[index] = value; })
-        .def("set_data", [np](kp::Tensor &self, const py::array_t<float> data){
-                const py::array_t<float> flatdata = np.attr("ravel")(data);
-                const py::buffer_info info        = flatdata.request();
-                const float* ptr                  = (float*) info.ptr;
-                self.setData(std::vector<float>(ptr, ptr+flatdata.size()));
-            }, DOC(kp, Tensor, setData))
-        .def("__iter__", [](kp::Tensor &self) {
-                return py::make_iterator(self.data().begin(), self.data().end());
-            }, py::keep_alive<0, 1>(), // Required to keep alive iterator while exists
-            "Iterator to enable looping within data structure as required.")
-        .def("__contains__", [](kp::Tensor &self, float v) {
-                for (size_t i = 0; i < self.data().size(); ++i) {
-                    if (v == self.data()[i]) {
-                            return true;
-                        }
-                    }
-                return false;
-            })
-        .def("__reversed__", [](kp::Tensor &self) { 
-                size_t size = self.data().size();
-                std::vector<float> reversed(size);
-                for (size_t i = 0; i < size; i++) {
-                    reversed[size - i - 1] = self.data()[i];
+                // Non-owning container exposing the underlying pointer
+                py::str dummyDataOwner; // Explicitly request data to not be owned by np
+                switch (self.dataType()) {
+                case kp::Tensor::TensorDataTypes::eFloat:
+                    return py::array(self.size(), self.data<float>(), dummyDataOwner);
+                case kp::Tensor::TensorDataTypes::eUnsignedInt:
+                    return py::array(self.size(), self.data<uint32_t>(), dummyDataOwner);
+                case kp::Tensor::TensorDataTypes::eInt:
+                    return py::array(self.size(), self.data<int32_t>(), dummyDataOwner);
+                case kp::Tensor::TensorDataTypes::eDouble:
+                    return py::array(self.size(), self.data<double>(), dummyDataOwner);
+                case kp::Tensor::TensorDataTypes::eBool:
+                    return py::array(self.size(), self.data<bool>(), dummyDataOwner);
+                default:
+                    throw std::runtime_error("Kompute Python data type not supported");
                 }
-                return reversed;
-            })
+            }, DOC(kp, Tensor, data))
         .def("size", &kp::Tensor::size, DOC(kp, Tensor, size))
         .def("__len__", &kp::Tensor::size, DOC(kp, Tensor, size))
         .def("tensor_type", &kp::Tensor::tensorType, DOC(kp, Tensor, tensorType))
+        .def("data_type", &kp::Tensor::dataType, DOC(kp, Tensor, dataType))
         .def("is_init", &kp::Tensor::isInit, DOC(kp, Tensor, isInit))
         .def("destroy", &kp::Tensor::destroy, DOC(kp, Tensor, destroy));
 
-    py::class_<kp::Sequence, std::shared_ptr<kp::Sequence>>(m, "Sequence", DOC(kp, Sequence))
+    py::class_<kp::Sequence, std::shared_ptr<kp::Sequence>>(m, "Sequence")
         .def("record", [](kp::Sequence& self, std::shared_ptr<kp::OpBase> op) { return self.record(op); },
                 DOC(kp, Sequence, record))
         .def("eval", [](kp::Sequence& self) { return self.eval(); },
                 DOC(kp, Sequence, eval))
         .def("eval", [](kp::Sequence& self, std::shared_ptr<kp::OpBase> op) { return self.eval(op); },
-                DOC(kp, Sequence, eval))
+                DOC(kp, Sequence, eval_2))
         .def("eval_async", [](kp::Sequence& self) { return self.eval(); },
-                DOC(kp, Sequence, evalAsync))
+                DOC(kp, Sequence, evalAwait))
         .def("eval_async", [](kp::Sequence& self, std::shared_ptr<kp::OpBase> op) { return self.evalAsync(op); },
                 DOC(kp, Sequence, evalAsync))
         .def("eval_await", [](kp::Sequence& self) { return self.evalAwait(); },
                 DOC(kp, Sequence, evalAwait))
         .def("eval_await", [](kp::Sequence& self, uint32_t wait) { return self.evalAwait(wait); },
                 DOC(kp, Sequence, evalAwait))
+<<<<<<< HEAD
         .def("is_recording", &kp::Sequence::isRecording,
                 DOC(kp, Sequence, isRecording))
         .def("is_running", &kp::Sequence::isRunning,
@@ -163,6 +150,17 @@ PYBIND11_MODULE(kp, m) {
     py::class_<kp::Manager, std::shared_ptr<kp::Manager>>(m, "Manager", DOC(kp, Manager))
         .def(py::init(), DOC(kp, Manager, Manager))
         .def(py::init<uint32_t>(), DOC(kp, Manager, Manager_2))
+=======
+        .def("is_recording", &kp::Sequence::isRecording, DOC(kp, Sequence, isRecording))
+        .def("is_running", &kp::Sequence::isRunning, DOC(kp, Sequence, isRunning))
+        .def("is_init", &kp::Sequence::isInit, DOC(kp, Sequence, isInit))
+        .def("clear", &kp::Sequence::clear, DOC(kp, Sequence, clear))
+        .def("destroy", &kp::Sequence::destroy, DOC(kp, Sequence, destroy));
+
+    py::class_<kp::Manager, std::shared_ptr<kp::Manager>>(m, "Manager")
+        .def(py::init())
+        .def(py::init<uint32_t>())
+>>>>>>> cc1a6cc (Updated tests and rebased)
         .def(py::init<uint32_t,const std::vector<uint32_t>&,const std::vector<std::string>&>(),
                 DOC(kp, Manager, Manager_2),
                 py::arg("device") = 0,
@@ -173,13 +171,44 @@ PYBIND11_MODULE(kp, m) {
         .def("tensor", [np](kp::Manager& self,
                             const py::array_t<float> data,
                             kp::Tensor::TensorTypes tensor_type) {
-                const py::array_t<float> flatdata = np.attr("ravel")(data);
-                const py::buffer_info info        = flatdata.request();
-                const float* ptr                  = (float*) info.ptr;
-                return self.tensor(std::vector<float>(ptr, ptr+flatdata.size()), tensor_type);
+                const py::buffer_info info        = data.request();
+                return self.tensor(
+                        info.ptr,
+                        data.size(),
+                        sizeof(float),
+                        kp::Tensor::TensorDataTypes::eFloat,
+                        tensor_type);
             },
             DOC(kp, Manager, tensor),
             py::arg("data"), py::arg("tensor_type") = kp::Tensor::TensorTypes::eDevice)
+        .def("tensor_t", [np](kp::Manager& self,
+                            const py::array data,
+                            kp::Tensor::TensorTypes tensor_type) {
+                // TODO: confirm if ravel is required as numpy data is always flat
+                //const py::array_t<float> flatdata = np.attr("ravel")(data);
+                //const py::buffer_info info        = flatdata.request();
+                const py::buffer_info info        = data.request();
+                if (data.dtype() == py::dtype::of<std::float_t>()) {
+                    return self.tensor(
+                            info.ptr, data.size(), sizeof(float), kp::Tensor::TensorDataTypes::eFloat, tensor_type);
+                } else if (data.dtype() == py::dtype::of<std::uint32_t>()) {
+                    return self.tensor(
+                            info.ptr, data.size(), sizeof(uint32_t), kp::Tensor::TensorDataTypes::eUnsignedInt, tensor_type);
+                } else if (data.dtype() == py::dtype::of<std::int32_t>()) {
+                    return self.tensor(
+                            info.ptr, data.size(), sizeof(int32_t), kp::Tensor::TensorDataTypes::eInt, tensor_type);
+                } else if (data.dtype() == py::dtype::of<std::double_t>()) {
+                    return self.tensor(
+                            info.ptr, data.size(), sizeof(double), kp::Tensor::TensorDataTypes::eDouble, tensor_type);
+                } else if (data.dtype() == py::dtype::of<bool>()) {
+                    return self.tensor(
+                            info.ptr, data.size(), sizeof(bool), kp::Tensor::TensorDataTypes::eBool, tensor_type);
+                } else {
+                    throw std::runtime_error("Kompute Python no valid dtype supported");
+                }
+            },
+            DOC(kp, Manager, tensorT),
+            py::arg("data"), py::arg("tensor_type") = kp::Tensor::TensorTypes::eDevice)
         .def("algorithm", [](kp::Manager& self,
                              const std::vector<std::shared_ptr<kp::Tensor>>& tensors,
                              const py::bytes& spirv,
diff --git a/python/test/test_array_multiplication.py b/python/test/test_array_multiplication.py
index 0dab581c6..e8de68328 100644
--- a/python/test/test_array_multiplication.py
+++ b/python/test/test_array_multiplication.py
@@ -9,9 +9,9 @@ def test_array_multiplication():
     mgr = kp.Manager()
 
     # 2. Create Kompute Tensors to hold data
-    tensor_in_a = mgr.tensor([2, 2, 2])
-    tensor_in_b = mgr.tensor([1, 2, 3])
-    tensor_out = mgr.tensor([0, 0, 0])
+    tensor_in_a = mgr.tensor(np.array([2, 2, 2]))
+    tensor_in_b = mgr.tensor(np.array([1, 2, 3]))
+    tensor_out = mgr.tensor(np.array([0, 0, 0]))
 
     params = [tensor_in_a, tensor_in_b, tensor_out]
 
diff --git a/python/test/test_logistic_regression.py b/python/test/test_logistic_regression.py
index 4bd0c28fa..862758413 100644
--- a/python/test/test_logistic_regression.py
+++ b/python/test/test_logistic_regression.py
@@ -1,4 +1,5 @@
 import pyshader as ps
+import numpy as np
 import kp
 
 def test_logistic_regression():
@@ -46,21 +47,21 @@ def test_logistic_regression():
     mgr = kp.Manager(0)
 
     # First we create input and ouput tensors for shader
-    tensor_x_i = mgr.tensor([0.0, 1.0, 1.0, 1.0, 1.0])
-    tensor_x_j = mgr.tensor([0.0, 0.0, 0.0, 1.0, 1.0])
+    tensor_x_i = mgr.tensor(np.array([0.0, 1.0, 1.0, 1.0, 1.0]))
+    tensor_x_j = mgr.tensor(np.array([0.0, 0.0, 0.0, 1.0, 1.0]))
 
-    tensor_y = mgr.tensor([0.0, 0.0, 0.0, 1.0, 1.0])
+    tensor_y = mgr.tensor(np.array([0.0, 0.0, 0.0, 1.0, 1.0]))
 
-    tensor_w_in = mgr.tensor([0.001, 0.001])
-    tensor_w_out_i = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0])
-    tensor_w_out_j = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0])
+    tensor_w_in = mgr.tensor(np.array([0.001, 0.001]))
+    tensor_w_out_i = mgr.tensor(np.array([0.0, 0.0, 0.0, 0.0, 0.0]))
+    tensor_w_out_j = mgr.tensor(np.array([0.0, 0.0, 0.0, 0.0, 0.0]))
 
-    tensor_b_in = mgr.tensor([0.0])
-    tensor_b_out = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0])
+    tensor_b_in = mgr.tensor(np.array([0.0]))
+    tensor_b_out = mgr.tensor(np.array([0.0, 0.0, 0.0, 0.0, 0.0]))
 
-    tensor_l_out = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0])
+    tensor_l_out = mgr.tensor(np.array([0.0, 0.0, 0.0, 0.0, 0.0]))
 
-    tensor_m = mgr.tensor([ tensor_y.size() ])
+    tensor_m = mgr.tensor(np.array([ tensor_y.size() ]))
 
     # We store them in an array for easier interaction
     params = [tensor_x_i, tensor_x_j, tensor_y, tensor_w_in, tensor_w_out_i,
@@ -91,9 +92,9 @@ def test_logistic_regression():
 
         # Calculate the parameters based on the respective derivatives calculated
         for j_iter in range(tensor_b_out.size()):
-            tensor_w_in[0] -= learning_rate * tensor_w_out_i.data()[j_iter]
-            tensor_w_in[1] -= learning_rate * tensor_w_out_j.data()[j_iter]
-            tensor_b_in[0] -= learning_rate * tensor_b_out.data()[j_iter]
+            tensor_w_in.data()[0] -= learning_rate * tensor_w_out_i.data()[j_iter]
+            tensor_w_in.data()[1] -= learning_rate * tensor_w_out_j.data()[j_iter]
+            tensor_b_in.data()[0] -= learning_rate * tensor_b_out.data()[j_iter]
 
     assert tensor_w_in.data()[0] < 0.01
     assert tensor_w_in.data()[0] > 0.0
diff --git a/setup.py b/setup.py
index ee3521064..733c4c185 100644
--- a/setup.py
+++ b/setup.py
@@ -57,7 +57,7 @@ class CMakeBuild(build_ext):
         else:
             cmake_args += ['-DKOMPUTE_EXTRA_CXX_FLAGS="-fPIC"']
             cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg]
-            build_args += ['--', '-j2']
+            build_args += ['--', '-j']
 
         env = os.environ.copy()
         env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(env.get('CXXFLAGS', ''),
diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp
index df9549aab..572f0e4da 100755
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@@ -762,7 +762,7 @@ class Shader
      * GLSL compiler
      * @return The compiled SPIR-V binary in unsigned int32 format
      */
-    static std::vector<uint32_t> compile_sources(
+    static std::vector<uint32_t> compileSources(
       const std::vector<std::string>& sources,
       const std::vector<std::string>& files = {},
       const std::string& entryPoint = "main",
@@ -783,7 +783,7 @@ class Shader
      * GLSL compiler
      * @return The compiled SPIR-V binary in unsigned int32 format
      */
-    static std::vector<uint32_t> compile_source(
+    static std::vector<uint32_t> compileSource(
       const std::string& source,
       const std::string& entryPoint = "main",
       std::vector<std::pair<std::string, std::string>> definitions = {},
@@ -981,7 +981,7 @@ class Tensor
     // TODO: move to cpp
     template <typename T>
     T* data() {
-        return this->mRawData;
+        return (T*)this->mRawData;
     }
 
     template <typename T>
@@ -1008,7 +1008,7 @@ class Tensor
     void* mRawData;
 
   private:
-    void rawMapData() {
+    void mapRawData() {
 
         KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
 
@@ -1025,12 +1025,36 @@ class Tensor
         }
 
         vk::DeviceSize bufferSize = this->memorySize();
+
         // Given we request coherent host memory we don't need to invalidate / flush
         this->mRawData = this->mDevice->mapMemory(
           *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());
+
         vk::MappedMemoryRange mappedMemoryRange(*hostVisibleMemory, 0, bufferSize);
     }
 
+    void unmapRawData() {
+
+        KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
+
+        std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
+
+        if (this->mTensorType == TensorTypes::eHost) {
+            hostVisibleMemory = this->mPrimaryMemory;
+        } else if (this->mTensorType == TensorTypes::eDevice) {
+            hostVisibleMemory = this->mStagingMemory;
+        } else {
+            KP_LOG_WARN(
+              "Kompute Tensor mapping data not supported on storage tensor");
+            return;
+        }
+
+        vk::DeviceSize bufferSize = this->memorySize();
+        vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize);
+        this->mDevice->flushMappedMemoryRanges(1, &mappedRange);
+        this->mDevice->unmapMemory(*hostVisibleMemory);
+    }
+
     // -------------- NEVER OWNED RESOURCES
     std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
     std::shared_ptr<vk::Device> mDevice;
@@ -2011,6 +2035,23 @@ class Manager
         return this->tensorT<float>(data, tensorType);
     }
 
+    std::shared_ptr<Tensor> tensor(
+      void* data,
+      uint32_t elementTotalCount,
+      uint32_t elementMemorySize,
+      const Tensor::TensorDataTypes& dataType,
+      Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
+    {
+        std::shared_ptr<Tensor> tensor{ new kp::Tensor(
+          this->mPhysicalDevice, this->mDevice, data, elementTotalCount, elementMemorySize, dataType, tensorType) };
+
+        if (this->mManageResources) {
+            this->mManagedTensors.push_back(tensor);
+        }
+
+        return tensor;
+    }
+
     /**
      * Create a managed algorithm that will be destroyed by this manager
      * if it hasn't been destroyed by its reference count going to zero.
diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp
index c39f5d6b5..6eb2042eb 100644
--- a/src/include/kompute/Manager.hpp
+++ b/src/include/kompute/Manager.hpp
@@ -98,6 +98,23 @@ class Manager
         return this->tensorT<float>(data, tensorType);
     }
 
+    std::shared_ptr<Tensor> tensor(
+      void* data,
+      uint32_t elementTotalCount,
+      uint32_t elementMemorySize,
+      const Tensor::TensorDataTypes& dataType,
+      Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
+    {
+        std::shared_ptr<Tensor> tensor{ new kp::Tensor(
+          this->mPhysicalDevice, this->mDevice, data, elementTotalCount, elementMemorySize, dataType, tensorType) };
+
+        if (this->mManageResources) {
+            this->mManagedTensors.push_back(tensor);
+        }
+
+        return tensor;
+    }
+
     /**
      * Create a managed algorithm that will be destroyed by this manager
      * if it hasn't been destroyed by its reference count going to zero.
diff --git a/src/include/kompute/Tensor.hpp b/src/include/kompute/Tensor.hpp
index efc3cda18..0194e208f 100644
--- a/src/include/kompute/Tensor.hpp
+++ b/src/include/kompute/Tensor.hpp
@@ -190,7 +190,7 @@ class Tensor
     // TODO: move to cpp
     template <typename T>
     T* data() {
-        return this->mRawData;
+        return (T*)this->mRawData;
     }
 
     template <typename T>
diff --git a/test/TestDestroy.cpp b/test/TestDestroy.cpp
index 72eeaf72b..defd40998 100644
--- a/test/TestDestroy.cpp
+++ b/test/TestDestroy.cpp
@@ -78,14 +78,14 @@ TEST(TestDestroy, TestDestroyTensorVector)
               ->record<kp::OpTensorSyncLocal>(algo->getTensors())
               ->eval();
 
+            EXPECT_EQ(tensorA->vector(), std::vector<float>({ 2, 2, 2 }));
+            EXPECT_EQ(tensorB->vector(), std::vector<float>({ 3, 3, 3 }));
+
             tensorA->destroy();
             tensorB->destroy();
 
             EXPECT_FALSE(tensorA->isInit());
             EXPECT_FALSE(tensorB->isInit());
-
-            EXPECT_EQ(tensorA->vector(), std::vector<float>({ 2, 2, 2 }));
-            EXPECT_EQ(tensorB->vector(), std::vector<float>({ 3, 3, 3 }));
         }
     }
 }
diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp
index b934f7e83..effc75227 100644
--- a/test/TestMultipleAlgoExecutions.cpp
+++ b/test/TestMultipleAlgoExecutions.cpp
@@ -201,40 +201,3 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)
     EXPECT_EQ(tensorA->vector(), std::vector<float>({ 3, 3, 3 }));
 }
 
-TEST(TestMultipleAlgoExecutions, SequenceAlgoDestroyOutsideManagerScope)
-{
-    std::shared_ptr<kp::TensorT<float>> tensorA = nullptr;
-
-    {
-        std::shared_ptr<kp::Sequence> sq = nullptr;
-        {
-            kp::Manager mgr;
-
-            tensorA = mgr.tensor({ 0, 0, 0 });
-
-            std::string shader(R"(
-              #version 450
-              layout (local_size_x = 1) in;
-              layout(set = 0, binding = 0) buffer a { float pa[]; };
-              void main() {
-                  uint index = gl_GlobalInvocationID.x;
-                  pa[index] = pa[index] + 1;
-              })");
-
-            std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
-
-            std::shared_ptr<kp::Algorithm> algorithm =
-              mgr.algorithm({ tensorA }, spirv);
-
-            sq = mgr.sequence();
-
-            sq->record<kp::OpTensorSyncDevice>({ tensorA })->eval();
-
-            sq->record<kp::OpAlgoDispatch>(algorithm)->eval()->eval()->eval();
-
-            sq->record<kp::OpTensorSyncLocal>({ tensorA })->eval();
-        }
-    }
-
-    EXPECT_EQ(tensorA->vector(), std::vector<float>({ 3, 3, 3 }));
-}

From 6a7f410675a8b7416f48dcfc458267a3c03e73e7 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sun, 7 Mar 2021 11:12:01 +0000
Subject: [PATCH 11/16] Updated to use flatdata on the python

---
 python/src/main.cpp | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/python/src/main.cpp b/python/src/main.cpp
index eab8e5ef4..a82cd160d 100644
--- a/python/src/main.cpp
+++ b/python/src/main.cpp
@@ -169,12 +169,13 @@ PYBIND11_MODULE(kp, m) {
         .def("sequence", &kp::Manager::sequence, DOC(kp, Manager, sequence),
                 py::arg("queue_index") = 0, py::arg("total_timestamps") = 0)
         .def("tensor", [np](kp::Manager& self,
-                            const py::array_t<float> data,
+                            const py::array_t<float>& data,
                             kp::Tensor::TensorTypes tensor_type) {
-                const py::buffer_info info        = data.request();
+                const py::array_t<float>& flatdata = np.attr("ravel")(data);
+                const py::buffer_info info        = flatdata.request();
                 return self.tensor(
                         info.ptr,
-                        data.size(),
+                        flatdata.size(),
                         sizeof(float),
                         kp::Tensor::TensorDataTypes::eFloat,
                         tensor_type);
@@ -182,27 +183,26 @@ PYBIND11_MODULE(kp, m) {
             DOC(kp, Manager, tensor),
             py::arg("data"), py::arg("tensor_type") = kp::Tensor::TensorTypes::eDevice)
         .def("tensor_t", [np](kp::Manager& self,
-                            const py::array data,
+                            const py::array& data,
                             kp::Tensor::TensorTypes tensor_type) {
-                // TODO: confirm if ravel is required as numpy data is always flat
-                //const py::array_t<float> flatdata = np.attr("ravel")(data);
-                //const py::buffer_info info        = flatdata.request();
-                const py::buffer_info info        = data.request();
-                if (data.dtype() == py::dtype::of<std::float_t>()) {
+                // TODO: Suppport strides in numpy format
+                const py::array_t<float>& flatdata = np.attr("ravel")(data);
+                const py::buffer_info info        = flatdata.request();
+                if (flatdata.dtype() == py::dtype::of<std::float_t>()) {
                     return self.tensor(
-                            info.ptr, data.size(), sizeof(float), kp::Tensor::TensorDataTypes::eFloat, tensor_type);
-                } else if (data.dtype() == py::dtype::of<std::uint32_t>()) {
+                            info.ptr, flatdata.size(), sizeof(float), kp::Tensor::TensorDataTypes::eFloat, tensor_type);
+                } else if (flatdata.dtype() == py::dtype::of<std::uint32_t>()) {
                     return self.tensor(
-                            info.ptr, data.size(), sizeof(uint32_t), kp::Tensor::TensorDataTypes::eUnsignedInt, tensor_type);
-                } else if (data.dtype() == py::dtype::of<std::int32_t>()) {
+                            info.ptr, flatdata.size(), sizeof(uint32_t), kp::Tensor::TensorDataTypes::eUnsignedInt, tensor_type);
+                } else if (flatdata.dtype() == py::dtype::of<std::int32_t>()) {
                     return self.tensor(
-                            info.ptr, data.size(), sizeof(int32_t), kp::Tensor::TensorDataTypes::eInt, tensor_type);
-                } else if (data.dtype() == py::dtype::of<std::double_t>()) {
+                            info.ptr, flatdata.size(), sizeof(int32_t), kp::Tensor::TensorDataTypes::eInt, tensor_type);
+                } else if (flatdata.dtype() == py::dtype::of<std::double_t>()) {
                     return self.tensor(
-                            info.ptr, data.size(), sizeof(double), kp::Tensor::TensorDataTypes::eDouble, tensor_type);
-                } else if (data.dtype() == py::dtype::of<bool>()) {
+                            info.ptr, flatdata.size(), sizeof(double), kp::Tensor::TensorDataTypes::eDouble, tensor_type);
+                } else if (flatdata.dtype() == py::dtype::of<bool>()) {
                     return self.tensor(
-                            info.ptr, data.size(), sizeof(bool), kp::Tensor::TensorDataTypes::eBool, tensor_type);
+                            info.ptr, flatdata.size(), sizeof(bool), kp::Tensor::TensorDataTypes::eBool, tensor_type);
                 } else {
                     throw std::runtime_error("Kompute Python no valid dtype supported");
                 }

From 8abb2313d0d08010226a83100c3fdda5bcb2a89f Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sun, 7 Mar 2021 12:16:25 +0000
Subject: [PATCH 12/16] Updated python and cpp end to end test and readme to
 show support for different types on tensor

---
 README.md                           | 30 +++++++++++++++++------------
 python/src/main.cpp                 |  5 ++++-
 python/test/test_kompute.py         | 13 +++++++------
 test/TestMultipleAlgoExecutions.cpp | 18 +++++++++--------
 4 files changed, 39 insertions(+), 27 deletions(-)

diff --git a/README.md b/README.md
index 41596cb00..7a7375a6a 100644
--- a/README.md
+++ b/README.md
@@ -55,10 +55,13 @@ void kompute(const std::string& shader) {
     kp::Manager mgr; 
 
     // 2. Create and initialise Kompute Tensors through manager
+
+    // Default tensor constructor simplifies creation of float values
     auto tensorInA = mgr.tensor({ 2., 2., 2. });
     auto tensorInB = mgr.tensor({ 1., 2., 3. });
-    auto tensorOutA = mgr.tensor({ 0., 0., 0. });
-    auto tensorOutB = mgr.tensor({ 0., 0., 0. });
+    // Explicit type constructor supports uint32, int32, double, float and bool
+    auto tensorOutA = mgr.tensorT<uint32_t>({ 0, 0, 0 });
+    auto tensorOutB = mgr.tensorT<uint32_t>({ 0, 0, 0 });
 
     std::vector<std::shared_ptr<kp::Tensor>> params = {tensorInA, tensorInB, tensorOutA, tensorOutB};
 
@@ -109,8 +112,8 @@ int main() {
         // The input tensors bind index is relative to index in parameter passed
         layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; };
         layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; };
-        layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; };
-        layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; };
+        layout(set = 0, binding = 2) buffer buf_out_a { uint out_a[]; };
+        layout(set = 0, binding = 3) buffer buf_out_b { uint out_b[]; };
 
         // Kompute supports push constants updated on dispatch
         layout(push_constant) uniform PushConstants {
@@ -122,8 +125,8 @@ int main() {
 
         void main() {
             uint index = gl_GlobalInvocationID.x;
-            out_a[index] += in_a[index] * in_b[index];
-            out_b[index] += const_one * push_const.val;
+            out_a[index] += uint( in_a[index] * in_b[index] );
+            out_b[index] += uint( const_one * push_const.val );
         }
     )");
 
@@ -144,10 +147,13 @@ def kompute(shader):
     mgr = kp.Manager()
 
     # 2. Create and initialise Kompute Tensors through manager
+
+    # Default tensor constructor simplifies creation of float values
     tensor_in_a = mgr.tensor([2, 2, 2])
     tensor_in_b = mgr.tensor([1, 2, 3])
-    tensor_out_a = mgr.tensor([0, 0, 0])
-    tensor_out_b = mgr.tensor([0, 0, 0])
+    # Explicit type constructor supports uint32, int32, double, float and bool
+    tensor_out_a = mgr.tensor_t(np.array([0, 0, 0], dtype=np.uint32))
+    tensor_out_b = mgr.tensor_t(np.array([0, 0, 0], dtype=np.uint32))
 
     params = [tensor_in_a, tensor_in_b, tensor_out_a, tensor_out_b]
 
@@ -194,8 +200,8 @@ if __name__ == "__main__":
         // The input tensors bind index is relative to index in parameter passed
         layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; };
         layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; };
-        layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; };
-        layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; };
+        layout(set = 0, binding = 2) buffer buf_out_a { uint out_a[]; };
+        layout(set = 0, binding = 3) buffer buf_out_b { uint out_b[]; };
 
         // Kompute supports push constants updated on dispatch
         layout(push_constant) uniform PushConstants {
@@ -207,8 +213,8 @@ if __name__ == "__main__":
 
         void main() {
             uint index = gl_GlobalInvocationID.x;
-            out_a[index] += in_a[index] * in_b[index];
-            out_b[index] += const_one * push_const.val;
+            out_a[index] += uint( in_a[index] * in_b[index] );
+            out_b[index] += uint( const_one * push_const.val );
         }
     """
 
diff --git a/python/src/main.cpp b/python/src/main.cpp
index a82cd160d..495d0ed0c 100644
--- a/python/src/main.cpp
+++ b/python/src/main.cpp
@@ -173,6 +173,7 @@ PYBIND11_MODULE(kp, m) {
                             kp::Tensor::TensorTypes tensor_type) {
                 const py::array_t<float>& flatdata = np.attr("ravel")(data);
                 const py::buffer_info info        = flatdata.request();
+                KP_LOG_DEBUG("Kompute Python Manager tensor() creating tensor float with data size {}", flatdata.size());
                 return self.tensor(
                         info.ptr,
                         flatdata.size(),
@@ -186,8 +187,10 @@ PYBIND11_MODULE(kp, m) {
                             const py::array& data,
                             kp::Tensor::TensorTypes tensor_type) {
                 // TODO: Suppport strides in numpy format
-                const py::array_t<float>& flatdata = np.attr("ravel")(data);
+                const py::array& flatdata = np.attr("ravel")(data);
                 const py::buffer_info info        = flatdata.request();
+                KP_LOG_DEBUG("Kompute Python Manager creating tensor_T with data size {} dtype {}",
+                        flatdata.size(), std::string(py::str(flatdata.dtype())));
                 if (flatdata.dtype() == py::dtype::of<std::float_t>()) {
                     return self.tensor(
                             info.ptr, flatdata.size(), sizeof(float), kp::Tensor::TensorDataTypes::eFloat, tensor_type);
diff --git a/python/test/test_kompute.py b/python/test/test_kompute.py
index 47887930a..736768053 100644
--- a/python/test/test_kompute.py
+++ b/python/test/test_kompute.py
@@ -36,8 +36,9 @@ def test_end_to_end():
 
     tensor_in_a = mgr.tensor([2, 2, 2])
     tensor_in_b = mgr.tensor([1, 2, 3])
-    tensor_out_a = mgr.tensor([0, 0, 0])
-    tensor_out_b = mgr.tensor([0, 0, 0])
+    # Explicit type constructor supports int, in32, double, float and int
+    tensor_out_a = mgr.tensor_t(np.array([0, 0, 0], dtype=np.uint32))
+    tensor_out_b = mgr.tensor_t(np.array([0, 0, 0], dtype=np.uint32))
 
     params = [tensor_in_a, tensor_in_b, tensor_out_a, tensor_out_b]
 
@@ -49,8 +50,8 @@ def test_end_to_end():
         // The input tensors bind index is relative to index in parameter passed
         layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; };
         layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; };
-        layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; };
-        layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; };
+        layout(set = 0, binding = 2) buffer buf_out_a { uint out_a[]; };
+        layout(set = 0, binding = 3) buffer buf_out_b { uint out_b[]; };
 
         // Kompute supports push constants updated on dispatch
         layout(push_constant) uniform PushConstants {
@@ -62,8 +63,8 @@ def test_end_to_end():
 
         void main() {
             uint index = gl_GlobalInvocationID.x;
-            out_a[index] += in_a[index] * in_b[index];
-            out_b[index] += const_one * push_const.val;
+            out_a[index] += uint( in_a[index] * in_b[index] );
+            out_b[index] += uint( const_one * push_const.val );
         }
     """
 
diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp
index effc75227..f9e066f47 100644
--- a/test/TestMultipleAlgoExecutions.cpp
+++ b/test/TestMultipleAlgoExecutions.cpp
@@ -8,10 +8,12 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality)
 
     kp::Manager mgr;
 
+    // Default tensor constructor simplifies creation of float values
     auto tensorInA = mgr.tensor({ 2., 2., 2. });
     auto tensorInB = mgr.tensor({ 1., 2., 3. });
-    auto tensorOutA = mgr.tensor({ 0., 0., 0. });
-    auto tensorOutB = mgr.tensor({ 0., 0., 0. });
+    // Explicit type constructor supports int, in32, double, float and int
+    auto tensorOutA = mgr.tensorT<uint32_t>({ 0, 0, 0 });
+    auto tensorOutB = mgr.tensorT<uint32_t>({ 0, 0, 0 });
 
     std::string shader = (R"(
         #version 450
@@ -21,8 +23,8 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality)
         // The input tensors bind index is relative to index in parameter passed
         layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; };
         layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; };
-        layout(set = 0, binding = 2) buffer buf_out_a { float out_a[]; };
-        layout(set = 0, binding = 3) buffer buf_out_b { float out_b[]; };
+        layout(set = 0, binding = 2) buffer buf_out_a { uint out_a[]; };
+        layout(set = 0, binding = 3) buffer buf_out_b { uint out_b[]; };
 
         // Kompute supports push constants updated on dispatch
         layout(push_constant) uniform PushConstants {
@@ -34,8 +36,8 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality)
 
         void main() {
             uint index = gl_GlobalInvocationID.x;
-            out_a[index] += in_a[index] * in_b[index];
-            out_b[index] += const_one * push_const.val;
+            out_a[index] += uint( in_a[index] * in_b[index] );
+            out_b[index] += uint( const_one * push_const.val );
         }
     )");
 
@@ -64,8 +66,8 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality)
 
     sq->evalAwait();
 
-    EXPECT_EQ(tensorOutA->vector(), std::vector<float>({ 4, 8, 12 }));
-    EXPECT_EQ(tensorOutB->vector(), std::vector<float>({ 10, 10, 10 }));
+    EXPECT_EQ(tensorOutA->vector(), std::vector<uint32_t>({ 4, 8, 12 }));
+    EXPECT_EQ(tensorOutB->vector(), std::vector<uint32_t>({ 10, 10, 10 }));
 }
 
 TEST(TestMultipleAlgoExecutions, SingleSequenceRecord)

From df0dfd351f41f93884baa166f922c4b77d10a42b Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sun, 7 Mar 2021 13:37:54 +0000
Subject: [PATCH 13/16] Added types tests

---
 python/test/test_kompute.py      |  21 ----
 python/test/test_tensor_types.py | 206 +++++++++++++++++++++++++++++++
 src/Tensor.cpp                   |   2 +-
 3 files changed, 207 insertions(+), 22 deletions(-)
 create mode 100644 python/test/test_tensor_types.py

diff --git a/python/test/test_kompute.py b/python/test/test_kompute.py
index 736768053..e1bcee940 100644
--- a/python/test/test_kompute.py
+++ b/python/test/test_kompute.py
@@ -9,27 +9,6 @@ DIRNAME = os.path.dirname(os.path.abspath(__file__))
 
 kp_log = logging.getLogger("kp")
 
-# TODO: Add example with file
-#def test_opalgobase_file():
-#    """
-#    Test basic OpMult operation
-#    """
-#
-#    tensor_in_a = kp.Tensor([2, 2, 2])
-#    tensor_in_b = kp.Tensor([1, 2, 3])
-#    tensor_out = kp.Tensor([0, 0, 0])
-#
-#    mgr = kp.Manager()
-#    mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])
-#
-#    shader_path = os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp.spv")
-#
-#    mgr.eval_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shader_path)
-#
-#    mgr.eval_tensor_sync_local_def([tensor_out])
-#
-#    assert tensor_out.data() == [2.0, 4.0, 6.0]
-
 def test_end_to_end():
 
     mgr = kp.Manager()
diff --git a/python/test/test_tensor_types.py b/python/test/test_tensor_types.py
new file mode 100644
index 000000000..b1d90fe03
--- /dev/null
+++ b/python/test/test_tensor_types.py
@@ -0,0 +1,206 @@
+import pyshader as ps
+import os
+import pytest
+import kp
+import numpy as np
+
+
+def test_type_float():
+
+    shader = """
+        #version 450
+        layout(set = 0, binding = 0) buffer tensorLhs {float valuesLhs[];};
+        layout(set = 0, binding = 1) buffer tensorRhs {float valuesRhs[];};
+        layout(set = 0, binding = 2) buffer tensorOutput { float valuesOutput[];};
+        layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+        void main()
+        {
+            uint index = gl_GlobalInvocationID.x;
+            valuesOutput[index] = valuesLhs[index] * valuesRhs[index];
+        }
+    """
+
+    spirv = kp.Shader.compile_source(shader)
+
+    arr_in_a = np.array([123., 153., 231.], dtype=np.float32)
+    arr_in_b = np.array([9482, 1208, 1238], dtype=np.float32)
+    arr_out = np.array([0, 0, 0], dtype=np.float32)
+
+    mgr = kp.Manager()
+
+    tensor_in_a = mgr.tensor(arr_in_a)
+    tensor_in_b = mgr.tensor(arr_in_b)
+    tensor_out = mgr.tensor(arr_out)
+
+    params = [tensor_in_a, tensor_in_b, tensor_out]
+
+    (mgr.sequence()
+        .record(kp.OpTensorSyncDevice(params))
+        .record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv)))
+        .record(kp.OpTensorSyncLocal([tensor_out]))
+        .eval())
+
+    assert np.all(tensor_out.data() == arr_in_a * arr_in_b)
+
+
+def test_type_float_double_incorrect():
+
+    shader = """
+        #version 450
+        layout(set = 0, binding = 0) buffer tensorLhs {float valuesLhs[];};
+        layout(set = 0, binding = 1) buffer tensorRhs {float valuesRhs[];};
+        layout(set = 0, binding = 2) buffer tensorOutput { float valuesOutput[];};
+        layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+        void main()
+        {
+            uint index = gl_GlobalInvocationID.x;
+            valuesOutput[index] = valuesLhs[index] * valuesRhs[index];
+        }
+    """
+
+    spirv = kp.Shader.compile_source(shader)
+
+    arr_in_a = np.array([123., 153., 231.], dtype=np.float32)
+    arr_in_b = np.array([9482, 1208, 1238], dtype=np.uint32)
+    arr_out = np.array([0, 0, 0], dtype=np.float32)
+
+    mgr = kp.Manager()
+
+    tensor_in_a = mgr.tensor_t(arr_in_a)
+    tensor_in_b = mgr.tensor_t(arr_in_b)
+    tensor_out = mgr.tensor_t(arr_out)
+
+    params = [tensor_in_a, tensor_in_b, tensor_out]
+
+    (mgr.sequence()
+        .record(kp.OpTensorSyncDevice(params))
+        .record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv)))
+        .record(kp.OpTensorSyncLocal([tensor_out]))
+        .eval())
+
+    assert np.all(tensor_out.data() != arr_in_a * arr_in_b)
+
+@pytest.mark.skipif("swiftshader" in os.environ.get("VK_ICD_FILENAMES"),
+                    reason="Swiftshader doesn't support double")
+def test_type_double():
+
+    shader = """
+        #version 450
+        layout(set = 0, binding = 0) buffer tensorLhs { double valuesLhs[]; };
+        layout(set = 0, binding = 1) buffer tensorRhs { double valuesRhs[]; };
+        layout(set = 0, binding = 2) buffer tensorOutput { double valuesOutput[]; };
+        layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+        void main()
+        {
+            uint index = gl_GlobalInvocationID.x;
+            valuesOutput[index] = valuesLhs[index] * valuesRhs[index];
+        }
+    """
+
+    spirv = kp.Shader.compile_source(shader)
+
+    arr_in_a = np.array([123., 153., 231.], dtype=np.float64)
+    arr_in_b = np.array([9482, 1208, 1238], dtype=np.float64)
+    arr_out = np.array([0, 0, 0], dtype=np.float64)
+
+    mgr = kp.Manager()
+
+    tensor_in_a = mgr.tensor_t(arr_in_a)
+    tensor_in_b = mgr.tensor_t(arr_in_b)
+    tensor_out = mgr.tensor_t(arr_out)
+
+    params = [tensor_in_a, tensor_in_b, tensor_out]
+
+    (mgr.sequence()
+        .record(kp.OpTensorSyncDevice(params))
+        .record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv)))
+        .record(kp.OpTensorSyncLocal([tensor_out]))
+        .eval())
+
+    print(f"Dtype value {tensor_out.data().dtype}")
+
+    assert np.all(tensor_out.data() == arr_in_a * arr_in_b)
+
+def test_type_int():
+
+    shader = """
+        #version 450
+        layout(set = 0, binding = 0) buffer tensorLhs { int valuesLhs[]; };
+        layout(set = 0, binding = 1) buffer tensorRhs { int valuesRhs[]; };
+        layout(set = 0, binding = 2) buffer tensorOutput { int valuesOutput[]; };
+        layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+        void main()
+        {
+            uint index = gl_GlobalInvocationID.x;
+            valuesOutput[index] = valuesLhs[index] * valuesRhs[index];
+        }
+    """
+
+    spirv = kp.Shader.compile_source(shader)
+
+    arr_in_a = np.array([123, 153, 231], dtype=np.int32)
+    arr_in_b = np.array([9482, 1208, 1238], dtype=np.int32)
+    arr_out = np.array([0, 0, 0], dtype=np.int32)
+
+    mgr = kp.Manager()
+
+    tensor_in_a = mgr.tensor_t(arr_in_a)
+    tensor_in_b = mgr.tensor_t(arr_in_b)
+    tensor_out = mgr.tensor_t(arr_out)
+
+    params = [tensor_in_a, tensor_in_b, tensor_out]
+
+    (mgr.sequence()
+        .record(kp.OpTensorSyncDevice(params))
+        .record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv)))
+        .record(kp.OpTensorSyncLocal([tensor_out]))
+        .eval())
+
+    print(f"Dtype value {tensor_out.data().dtype}")
+
+    assert np.all(tensor_out.data() == arr_in_a * arr_in_b)
+
+def test_type_unsigned_int():
+
+    shader = """
+        #version 450
+        layout(set = 0, binding = 0) buffer tensorLhs { uint valuesLhs[]; };
+        layout(set = 0, binding = 1) buffer tensorRhs { uint valuesRhs[]; };
+        layout(set = 0, binding = 2) buffer tensorOutput { uint valuesOutput[]; };
+        layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+        void main()
+        {
+            uint index = gl_GlobalInvocationID.x;
+            valuesOutput[index] = valuesLhs[index] * valuesRhs[index];
+        }
+    """
+
+    spirv = kp.Shader.compile_source(shader)
+
+    arr_in_a = np.array([123, 153, 231], dtype=np.uint32)
+    arr_in_b = np.array([9482, 1208, 1238], dtype=np.uint32)
+    arr_out = np.array([0, 0, 0], dtype=np.uint32)
+
+    mgr = kp.Manager()
+
+    tensor_in_a = mgr.tensor_t(arr_in_a)
+    tensor_in_b = mgr.tensor_t(arr_in_b)
+    tensor_out = mgr.tensor_t(arr_out)
+
+    params = [tensor_in_a, tensor_in_b, tensor_out]
+
+    (mgr.sequence()
+        .record(kp.OpTensorSyncDevice(params))
+        .record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv)))
+        .record(kp.OpTensorSyncLocal([tensor_out]))
+        .eval())
+
+    print(f"Dtype value {tensor_out.data().dtype}")
+
+    assert np.all(tensor_out.data() == arr_in_a * arr_in_b)
+
diff --git a/src/Tensor.cpp b/src/Tensor.cpp
index 8b96be163..947714693 100644
--- a/src/Tensor.cpp
+++ b/src/Tensor.cpp
@@ -175,7 +175,7 @@ Tensor::recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
 vk::DescriptorBufferInfo
 Tensor::constructDescriptorBufferInfo()
 {
-    KP_LOG_WARN("Kompute Tensor construct descriptor buffer info size {}", this->memorySize());
+    KP_LOG_DEBUG("Kompute Tensor construct descriptor buffer info size {}", this->memorySize());
     vk::DeviceSize bufferSize = this->memorySize();
     return vk::DescriptorBufferInfo(*this->mPrimaryBuffer,
                                     0, // offset

From 5ff7b4aa7821c5d1142a5e78ba67fc4027ad311a Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sun, 7 Mar 2021 14:10:38 +0000
Subject: [PATCH 14/16] Added single header

---
 single_include/kompute/Kompute.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp
index 572f0e4da..9b41e1ead 100755
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@@ -2001,7 +2001,7 @@ class Manager
      * If zero (default), disables latching of timestamps.
      * @returns Shared pointer with initialised sequence
      */
-    std::shared_ptr<Sequence> sequence(uint32_t queueIndex = 0, uint32_t nrOfTimestamps = 0);
+    std::shared_ptr<Sequence> sequence(uint32_t queueIndex = 0, uint32_t totalTimestamps = 0);
 
     /**
      * Create a managed tensor that will be destroyed by this manager

From 6fd19b9d05fb2de7fbc545f4f7144266f98c98d1 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sun, 7 Mar 2021 14:11:32 +0000
Subject: [PATCH 15/16] Fixed conflicts

---
 python/src/docstrings.hpp | 75 +++++++++++++++++++++++++++------------
 python/src/main.cpp       | 12 -------
 2 files changed, 52 insertions(+), 35 deletions(-)

diff --git a/python/src/docstrings.hpp b/python/src/docstrings.hpp
index a5bda0a4d..d4593edb8 100644
--- a/python/src/docstrings.hpp
+++ b/python/src/docstrings.hpp
@@ -252,7 +252,11 @@ nrOfTimestamps The maximum number of timestamps to allocate. If zero
 (default), disables latching of timestamps. @returns Shared pointer
 with initialised sequence)doc";
 
-static const char *__doc_kp_Manager_tensor =
+static const char *__doc_kp_Manager_tensor = R"doc()doc";
+
+static const char *__doc_kp_Manager_tensor_2 = R"doc()doc";
+
+static const char *__doc_kp_Manager_tensorT =
 R"doc(Create a managed tensor that will be destroyed by this manager if it
 hasn't been destroyed by its reference count going to zero.
 
@@ -679,6 +683,20 @@ across GPUs. Each tensor would have a respective Vulkan memory and
 buffer, which would be used to store their respective data. The
 tensors can be used for GPU data storage or transfer.)doc";
 
+static const char *__doc_kp_TensorT = R"doc()doc";
+
+static const char *__doc_kp_TensorT_TensorT = R"doc()doc";
+
+static const char *__doc_kp_TensorT_data = R"doc()doc";
+
+static const char *__doc_kp_TensorT_dataType = R"doc()doc";
+
+static const char *__doc_kp_TensorT_operator_array = R"doc()doc";
+
+static const char *__doc_kp_TensorT_setData = R"doc()doc";
+
+static const char *__doc_kp_TensorT_vector = R"doc()doc";
+
 static const char *__doc_kp_Tensor_Tensor =
 R"doc(Constructor with data provided which would be used to create the
 respective vulkan buffer and memory.
@@ -689,6 +707,18 @@ respective vulkan buffer and memory.
 tensor @param tensorTypes Type for the tensor which is of type
 TensorTypes)doc";
 
+static const char *__doc_kp_Tensor_TensorDataTypes = R"doc()doc";
+
+static const char *__doc_kp_Tensor_TensorDataTypes_eBool = R"doc()doc";
+
+static const char *__doc_kp_Tensor_TensorDataTypes_eDouble = R"doc()doc";
+
+static const char *__doc_kp_Tensor_TensorDataTypes_eFloat = R"doc()doc";
+
+static const char *__doc_kp_Tensor_TensorDataTypes_eInt = R"doc()doc";
+
+static const char *__doc_kp_Tensor_TensorDataTypes_eUnsignedInt = R"doc()doc";
+
 static const char *__doc_kp_Tensor_TensorTypes =
 R"doc(Type for tensors created: Device allows memory to be transferred from
 staging buffers. Staging are host memory visible. Storage are device
@@ -714,13 +744,14 @@ without exposing it.
 
 static const char *__doc_kp_Tensor_createBuffer = R"doc()doc";
 
-static const char *__doc_kp_Tensor_data =
-R"doc(Returns the vector of data currently contained by the Tensor. It is
-important to ensure that there is no out-of-sync data with the GPU
-memory.
+static const char *__doc_kp_Tensor_data = R"doc()doc";
 
-@return Reference to vector of elements representing the data in the
-tensor.)doc";
+static const char *__doc_kp_Tensor_dataType =
+R"doc(Retrieve the underlying data type of the Tensor
+
+@return Data type of tensor of type kp::Tensor::TensorDataTypes)doc";
+
+static const char *__doc_kp_Tensor_dataTypeMemorySize = R"doc()doc";
 
 static const char *__doc_kp_Tensor_destroy =
 R"doc(Destroys and frees the GPU resources which include the buffer and
@@ -740,7 +771,9 @@ resources.
 
 @returns Boolean stating whether tensor is initialized)doc";
 
-static const char *__doc_kp_Tensor_mData = R"doc()doc";
+static const char *__doc_kp_Tensor_mDataType = R"doc()doc";
+
+static const char *__doc_kp_Tensor_mDataTypeMemorySize = R"doc()doc";
 
 static const char *__doc_kp_Tensor_mDevice = R"doc()doc";
 
@@ -758,29 +791,21 @@ static const char *__doc_kp_Tensor_mPrimaryBuffer = R"doc()doc";
 
 static const char *__doc_kp_Tensor_mPrimaryMemory = R"doc()doc";
 
+static const char *__doc_kp_Tensor_mRawData = R"doc()doc";
+
+static const char *__doc_kp_Tensor_mSize = R"doc()doc";
+
 static const char *__doc_kp_Tensor_mStagingBuffer = R"doc()doc";
 
 static const char *__doc_kp_Tensor_mStagingMemory = R"doc()doc";
 
 static const char *__doc_kp_Tensor_mTensorType = R"doc()doc";
 
-static const char *__doc_kp_Tensor_mapDataFromHostMemory =
-R"doc(Maps data from the Host Visible GPU memory into the data vector. It
-requires the Tensor to be of staging type for it to work.)doc";
-
-static const char *__doc_kp_Tensor_mapDataIntoHostMemory =
-R"doc(Maps data from the data vector into the Host Visible GPU memory. It
-requires the tensor to be of staging type for it to work.)doc";
+static const char *__doc_kp_Tensor_mapRawData = R"doc()doc";
 
 static const char *__doc_kp_Tensor_memorySize = R"doc()doc";
 
-static const char *__doc_kp_Tensor_operator_array =
-R"doc(Overrides the subscript operator to expose the underlying data's
-subscript operator which in this case would be its underlying
-vector's.
-
-@param i The index where the element will be returned from. @return
-Returns the element in the position requested.)doc";
+static const char *__doc_kp_Tensor_rawData = R"doc()doc";
 
 static const char *__doc_kp_Tensor_rebuild =
 R"doc(Function to trigger reinitialisation of the tensor buffer and memory
@@ -829,7 +854,7 @@ would only be relevant for kp::Tensors of type eDevice.
 @param createBarrier Whether to create a barrier that ensures the data
 is copied before further operations. Default is true.)doc";
 
-static const char *__doc_kp_Tensor_setData =
+static const char *__doc_kp_Tensor_setRawData =
 R"doc(Sets / resets the vector data of the tensor. This function does not
 perform any copies into GPU memory and is only performed on the host.)doc";
 
@@ -844,6 +869,10 @@ R"doc(Retrieve the tensor type of the Tensor
 
 @return Tensor type of tensor)doc";
 
+static const char *__doc_kp_Tensor_unmapRawData = R"doc()doc";
+
+static const char *__doc_kp_Tensor_vector = R"doc()doc";
+
 #if defined(__GNUG__)
 #pragma GCC diagnostic pop
 #endif
diff --git a/python/src/main.cpp b/python/src/main.cpp
index 495d0ed0c..9e065c213 100644
--- a/python/src/main.cpp
+++ b/python/src/main.cpp
@@ -131,7 +131,6 @@ PYBIND11_MODULE(kp, m) {
                 DOC(kp, Sequence, evalAwait))
         .def("eval_await", [](kp::Sequence& self, uint32_t wait) { return self.evalAwait(wait); },
                 DOC(kp, Sequence, evalAwait))
-<<<<<<< HEAD
         .def("is_recording", &kp::Sequence::isRecording,
                 DOC(kp, Sequence, isRecording))
         .def("is_running", &kp::Sequence::isRunning,
@@ -150,17 +149,6 @@ PYBIND11_MODULE(kp, m) {
     py::class_<kp::Manager, std::shared_ptr<kp::Manager>>(m, "Manager", DOC(kp, Manager))
         .def(py::init(), DOC(kp, Manager, Manager))
         .def(py::init<uint32_t>(), DOC(kp, Manager, Manager_2))
-=======
-        .def("is_recording", &kp::Sequence::isRecording, DOC(kp, Sequence, isRecording))
-        .def("is_running", &kp::Sequence::isRunning, DOC(kp, Sequence, isRunning))
-        .def("is_init", &kp::Sequence::isInit, DOC(kp, Sequence, isInit))
-        .def("clear", &kp::Sequence::clear, DOC(kp, Sequence, clear))
-        .def("destroy", &kp::Sequence::destroy, DOC(kp, Sequence, destroy));
-
-    py::class_<kp::Manager, std::shared_ptr<kp::Manager>>(m, "Manager")
-        .def(py::init())
-        .def(py::init<uint32_t>())
->>>>>>> cc1a6cc (Updated tests and rebased)
         .def(py::init<uint32_t,const std::vector<uint32_t>&,const std::vector<std::string>&>(),
                 DOC(kp, Manager, Manager_2),
                 py::arg("device") = 0,

From 2e1022410baf5c8971b9c4b97c33f53d2cc31181 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sun, 7 Mar 2021 14:20:31 +0000
Subject: [PATCH 16/16] Updated compile_shader to compileShader

---
 test/TestSequence.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/TestSequence.cpp b/test/TestSequence.cpp
index 090a6317b..ca3b9a485 100644
--- a/test/TestSequence.cpp
+++ b/test/TestSequence.cpp
@@ -117,7 +117,7 @@ TEST(TestSequence, SequenceTimestamps)
           pa[index] = pa[index] + 1;
       })");
 
-    std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
+    std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
     
     auto seq = mgr.sequence(0, 100); //100 timestamps
     seq->record<kp::OpTensorSyncDevice>({ tensorA })