From 4fddf74ca7092edfc9fe5e46b45a225eec1109f2 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sun, 28 Feb 2021 17:07:17 +0000
Subject: [PATCH] Updated examples

---
 .../app/src/main/cpp/KomputeModelML.cpp       |  95 ++--
 .../app/src/main/cpp/KomputeModelML.hpp       |   5 +-
 examples/array_multiplication/src/Main.cpp    |  11 +-
 .../kompute_summator/KomputeSummatorNode.cpp  |  13 +-
 .../kompute_model_ml/KomputeModelMLNode.cpp   |  83 ++--
 .../gdnative_shared/src/KomputeModelML.cpp    | 101 ++--
 .../gdnative_shared/src/KomputeModelML.hpp    |   4 +-
 examples/logistic_regression/src/Main.cpp     |  45 +-
 single_include/kompute/Kompute.hpp            | 439 ++++++++++--------
 src/include/kompute/Core.hpp                  |  16 +-
 src/include/kompute/operations/OpBase.hpp     |   1 -
 11 files changed, 408 insertions(+), 405 deletions(-)
diff --git a/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp b/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp
index f1884760a..647cd5236 100755
--- a/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp
+++ b/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp
@@ -20,61 +20,62 @@ void KomputeModelML::train(std::vector<float> yData, std::vector<float> xIData,
     uint32_t ITERATIONS = 100;
     float learningRate = 0.1;
 
-    std::shared_ptr<kp::Tensor> xI{ new kp::Tensor(xIData) };
-    std::shared_ptr<kp::Tensor> xJ{ new kp::Tensor(xJData) };
-
-    std::shared_ptr<kp::Tensor> y{ new kp::Tensor(yData) };
-
-    std::shared_ptr<kp::Tensor> wIn{ new kp::Tensor({ 0.001, 0.001 }) };
-    std::shared_ptr<kp::Tensor> wOutI{ new kp::Tensor(zerosData) };
-    std::shared_ptr<kp::Tensor> wOutJ{ new kp::Tensor(zerosData) };
-
-    std::shared_ptr<kp::Tensor> bIn{ new kp::Tensor({ 0 }) };
-    std::shared_ptr<kp::Tensor> bOut{ new kp::Tensor(zerosData) };
-
-    std::shared_ptr<kp::Tensor> lOut{ new kp::Tensor(zerosData) };
-
-    std::vector<std::shared_ptr<kp::Tensor>> params = { xI,  xJ,    y,
-                                                        wIn, wOutI, wOutJ,
-                                                        bIn, bOut,  lOut };
-
     {
         kp::Manager mgr;
 
-        {
-            mgr.rebuild(params);
+        std::shared_ptr<kp::Tensor> xI = mgr.tensor(xIData);
+        std::shared_ptr<kp::Tensor> xJ = mgr.tensor(xJData);
 
-            std::shared_ptr<kp::Sequence> sq = mgr.sequence();
+        std::shared_ptr<kp::Tensor> y = mgr.tensor(yData);
 
-            // Record op algo base
-            sq->begin();
+        std::shared_ptr<kp::Tensor> wIn = mgr.tensor({ 0.001, 0.001 });
+        std::shared_ptr<kp::Tensor> wOutI = mgr.tensor(zerosData);
+        std::shared_ptr<kp::Tensor> wOutJ = mgr.tensor(zerosData);
 
-            sq->record<kp::OpTensorSyncDevice>({ wIn, bIn });
+        std::shared_ptr<kp::Tensor> bIn = mgr.tensor({ 0 });
+        std::shared_ptr<kp::Tensor> bOut = mgr.tensor(zerosData);
 
-            // Newer versions of Android are able to use shaderc to read raw string
-            sq->record<kp::OpAlgoCreate>(
-                    params, kp::Shader::compile_source(LR_SHADER));
+        std::shared_ptr<kp::Tensor> lOut = mgr.tensor(zerosData);
 
-            sq->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
+        std::vector<std::shared_ptr<kp::Tensor>> params = { xI,  xJ,    y,
+                                                            wIn, wOutI, wOutJ,
+                                                            bIn, bOut,  lOut };
 
-            sq->end();
+        std::vector<uint32_t> spirv(
+                    (uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
+                    (uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv
+                        + kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));
 
-            // Iterate across all expected iterations
-            for (size_t i = 0; i < ITERATIONS; i++) {
+        std::shared_ptr<kp::Algorithm> algo =
+                mgr.algorithm(params, spirv, kp::Workgroup({ 5 }), kp::Constants({ 5.0 }));
 
-                sq->eval();
+        mgr.sequence()->eval<kp::OpTensorSyncDevice>(params);
 
-                for (size_t j = 0; j < bOut->size(); j++) {
-                    wIn->data()[0] -= learningRate * wOutI->data()[j];
-                    wIn->data()[1] -= learningRate * wOutJ->data()[j];
-                    bIn->data()[0] -= learningRate * bOut->data()[j];
-                }
+        std::shared_ptr<kp::Sequence> sq = mgr.sequence()
+            ->record<kp::OpTensorSyncDevice>({ wIn, bIn })
+            ->record<kp::OpAlgoDispatch>(algo)
+            ->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
+
+        // Iterate across all expected iterations
+        for (size_t i = 0; i < ITERATIONS; i++) {
+
+            sq->eval();
+
+            for (size_t j = 0; j < bOut->size(); j++) {
+                wIn->data()[0] -= learningRate * wOutI->data()[j];
+                wIn->data()[1] -= learningRate * wOutJ->data()[j];
+                bIn->data()[0] -= learningRate * bOut->data()[j];
             }
         }
-    }
 
-    this->mWeights = kp::Tensor(wIn->data());
-    this->mBias = kp::Tensor(bIn->data());
+        KP_LOG_INFO("RESULT: <<<<<<<<<<<<<<<<<<<");
+        KP_LOG_INFO("{}", wIn->data()[0]);
+        KP_LOG_INFO("{}", wIn->data()[1]);
+        KP_LOG_INFO("{}", bIn->data()[0]);
+
+        this->mWeights = wIn;
+        this->mBias = bIn;
+    }
 }
 
 std::vector<float> KomputeModelML::predict(std::vector<float> xI, std::vector<float> xJ) {
@@ -88,9 +89,9 @@ std::vector<float> KomputeModelML::predict(std::vector<float> xI, std::vector<fl
     for (size_t i = 0; i < xI.size(); i++) {
         float xIVal = xI[i];
         float xJVal = xJ[i];
-        float result = (xIVal * this->mWeights.data()[0]
-                        + xJVal * this->mWeights.data()[1]
-                        + this->mBias.data()[0]);
+        float result = (xIVal * this->mWeights->data()[0]
+                        + xJVal * this->mWeights->data()[1]
+                        + this->mBias->data()[0]);
 
         // Instead of using sigmoid we'll just return full numbers
         float var = result > 0 ? 1 : 0;
@@ -103,13 +104,13 @@ std::vector<float> KomputeModelML::predict(std::vector<float> xI, std::vector<fl
 std::vector<float> KomputeModelML::get_params() {
     std::vector<float> retVector;
 
-    if(this->mWeights.size() + this->mBias.size() == 0) {
+    if(this->mWeights->size() + this->mBias->size() == 0) {
         return retVector;
     }
 
-    retVector.push_back(this->mWeights.data()[0]);
-    retVector.push_back(this->mWeights.data()[1]);
-    retVector.push_back(this->mBias.data()[0]);
+    retVector.push_back(this->mWeights->data()[0]);
+    retVector.push_back(this->mWeights->data()[1]);
+    retVector.push_back(this->mBias->data()[0]);
     retVector.push_back(99.0);
 
     return retVector;
diff --git a/examples/android/android-simple/app/src/main/cpp/KomputeModelML.hpp b/examples/android/android-simple/app/src/main/cpp/KomputeModelML.hpp
index 335f05805..093edbafc 100755
--- a/examples/android/android-simple/app/src/main/cpp/KomputeModelML.hpp
+++ b/examples/android/android-simple/app/src/main/cpp/KomputeModelML.hpp
@@ -4,6 +4,7 @@
 
 #include <vector>
 #include <string>
+#include <memory>
 
 #include "kompute/Kompute.hpp"
 
@@ -20,8 +21,8 @@ public:
     std::vector<float> get_params();
 
 private:
-    kp::Tensor mWeights;
-    kp::Tensor mBias;
+    std::shared_ptr<kp::Tensor> mWeights;
+    std::shared_ptr<kp::Tensor> mBias;
 
 };
 
diff --git a/examples/array_multiplication/src/Main.cpp b/examples/array_multiplication/src/Main.cpp
index 8ec611e15..fd823bca8 100755
--- a/examples/array_multiplication/src/Main.cpp
+++ b/examples/array_multiplication/src/Main.cpp
@@ -37,11 +37,14 @@ int main()
         }
       )");
 
-    mgr.evalOpDefault<kp::OpAlgoCreate>(
-            { tensorInA, tensorInB, tensorOut },
-            kp::Shader::compile_source(shader));
+    std::vector<std::shared_ptr<kp::Tensor>> params = { tensorInA, tensorInB, tensorOut };
 
-    mgr.evalOpDefault<kp::OpTensorSyncLocal>({tensorOut});
+    std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(params, kp::Shader::compile_source(shader));
+
+    mgr.sequence()
+        ->record<kp::OpTensorSyncDevice>(params)
+        ->record<kp::OpAlgoDispatch>(algo)
+        ->record<kp::OpTensorSyncLocal>(params);
 
     // prints "Output {  0  4  12  }"
     std::cout<< "Output: {  ";
diff --git a/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp b/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp
index 2e9f1bc00..f50c56d5c 100644
--- a/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp
+++ b/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp
@@ -31,7 +31,7 @@ void KomputeSummatorNode::_init() {
     std::cout << "CALLING INIT" << std::endl;
     this->mPrimaryTensor = this->mManager.tensor({ 0.0 });
     this->mSecondaryTensor = this->mManager.tensor({ 0.0 });
-    this->mSequence = this->mManager.sequence("AdditionSeq");
+    this->mSequence = this->mManager.sequence();
 
     // We now record the steps in the sequence
     if (std::shared_ptr<kp::Sequence> sq = this->mSequence)
@@ -51,7 +51,11 @@ void KomputeSummatorNode::_init() {
             }
         )");
 
-        sq->begin();
+        std::shared_ptr<kp::Algorithm> algo =
+          mgr.algorithm(
+                { this->mPrimaryTensor, this->mSecondaryTensor },
+                kp::Shader::compile_source(shader));
+
 
         // First we ensure secondary tensor loads to GPU
         // No need to sync the primary tensor as it should not be changed
@@ -59,15 +63,12 @@ void KomputeSummatorNode::_init() {
                 { this->mSecondaryTensor });
 
         // Then we run the operation with both tensors
-        sq->record<kp::OpAlgoCreate>(
-            { this->mPrimaryTensor, this->mSecondaryTensor }, 
-            kp::Shader::compile_source(shader));
+        sq->record<kp::OpAlgoDispatch>(algo)
 
         // We map the result back to local 
         sq->record<kp::OpTensorSyncLocal>(
                 { this->mPrimaryTensor });
 
-        sq->end();
     }
     else {
         throw std::runtime_error("Sequence pointer no longer available");
diff --git a/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp b/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp
index 57490a8d4..081315a4b 100644
--- a/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp
+++ b/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp
@@ -29,54 +29,41 @@ void KomputeModelMLNode::train(Array yArr, Array xIArr, Array xJArr) {
     uint32_t ITERATIONS = 100;
     float learningRate = 0.1;
 
-    std::shared_ptr<kp::Tensor> xI{ new kp::Tensor(xIData) };
-    std::shared_ptr<kp::Tensor> xJ{ new kp::Tensor(xJData) };
-
-    std::shared_ptr<kp::Tensor> y{ new kp::Tensor(yData) };
-
-    std::shared_ptr<kp::Tensor> wIn{ new kp::Tensor({ 0.001, 0.001 }) };
-    std::shared_ptr<kp::Tensor> wOutI{ new kp::Tensor(zerosData) };
-    std::shared_ptr<kp::Tensor> wOutJ{ new kp::Tensor(zerosData) };
-
-    std::shared_ptr<kp::Tensor> bIn{ new kp::Tensor({ 0 }) };
-    std::shared_ptr<kp::Tensor> bOut{ new kp::Tensor(zerosData) };
-
-    std::shared_ptr<kp::Tensor> lOut{ new kp::Tensor(zerosData) };
-
-    std::vector<std::shared_ptr<kp::Tensor>> params = { xI,  xJ,    y,
-                                                        wIn, wOutI, wOutJ,
-                                                        bIn, bOut,  lOut };
-
     {
         kp::Manager mgr;
 
-        mgr.rebuild(params);
+        std::shared_ptr<kp::Tensor> xI = mgr.tensor(xIData);
+        std::shared_ptr<kp::Tensor> xJ = mgr.tensor(xJData);
+
+        std::shared_ptr<kp::Tensor> y = mgr.tensor(yData);
+
+        std::shared_ptr<kp::Tensor> wIn = mgr.tensor({ 0.001, 0.001 });
+        std::shared_ptr<kp::Tensor> wOutI = mgr.tensor(zerosData);
+        std::shared_ptr<kp::Tensor> wOutJ = mgr.tensor(zerosData);
+
+        std::shared_ptr<kp::Tensor> bIn = mgr.tensor({ 0 });
+        std::shared_ptr<kp::Tensor> bOut = mgr.tensor(zerosData);
+
+        std::shared_ptr<kp::Tensor> lOut = mgr.tensor(zerosData);
+
+        std::vector<std::shared_ptr<kp::Tensor>> params = { xI,  xJ,    y,
+                                                            wIn, wOutI, wOutJ,
+                                                            bIn, bOut,  lOut };
 
         {
-            std::shared_ptr<kp::Sequence> sq = mgr.sequence();
+            std::vector<uint32_t> spirv(
+                        (uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
+                        (uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv
+                            + kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));
 
-            // Record op algo base
-            sq->begin();
+            std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(params, spirv);
 
-            sq->record<kp::OpTensorSyncDevice>({ wIn, bIn });
+            mgr.sequence()->eval<kp::OpTensorSyncDevice>(params);
 
-#ifdef KOMPUTE_ANDROID_SHADER_FROM_STRING
-            // Newer versions of Android are able to use shaderc to read raw string
-            sq->record<kp::OpAlgoCreate>(
-                    params, std::vector<char>(LR_SHADER.begin(), LR_SHADER.end()));
-#else
-            // Older versions of Android require the SPIRV binary directly
-            sq->record<kp::OpAlgoCreate>(
-                    params, std::vector<char>(
-                            kp::shader_data::shaders_glsl_logisticregression_comp_spv,
-                            kp::shader_data::shaders_glsl_logisticregression_comp_spv
-                                + kp::shader_data::shaders_glsl_logisticregression_comp_spv_len
-                    ));
-#endif
-
-            sq->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
-
-            sq->end();
+            std::shared_ptr<kp::Sequence> sq = mgr.sequence()
+                ->record<kp::OpTensorSyncDevice>({ wIn, bIn })
+                ->record<kp::OpAlgoDispatch>(algo)
+                ->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
 
             // Iterate across all expected iterations
             for (size_t i = 0; i < ITERATIONS; i++) {
@@ -90,15 +77,15 @@ void KomputeModelMLNode::train(Array yArr, Array xIArr, Array xJArr) {
                 }
             }
         }
+
+        KP_LOG_INFO("RESULT: <<<<<<<<<<<<<<<<<<<");
+        KP_LOG_INFO(wIn->data()[0]);
+        KP_LOG_INFO(wIn->data()[1]);
+        KP_LOG_INFO(bIn->data()[0]);
+
+        this->mWeights = kp::Tensor(wIn->data());
+        this->mBias = kp::Tensor(bIn->data());
     }
-
-    KP_LOG_INFO("RESULT: <<<<<<<<<<<<<<<<<<<");
-    KP_LOG_INFO(wIn->data()[0]);
-    KP_LOG_INFO(wIn->data()[1]);
-    KP_LOG_INFO(bIn->data()[0]);
-
-    this->mWeights = kp::Tensor(wIn->data());
-    this->mBias = kp::Tensor(bIn->data());
 }
 
 Array KomputeModelMLNode::predict(Array xI, Array xJ) {
diff --git a/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp b/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp
index 1a01febd0..1222fe867 100644
--- a/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp
+++ b/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp
@@ -33,54 +33,41 @@ void KomputeModelML::train(Array yArr, Array xIArr, Array xJArr) {
     uint32_t ITERATIONS = 100;
     float learningRate = 0.1;
 
-    std::shared_ptr<kp::Tensor> xI{ new kp::Tensor(xIData) };
-    std::shared_ptr<kp::Tensor> xJ{ new kp::Tensor(xJData) };
-
-    std::shared_ptr<kp::Tensor> y{ new kp::Tensor(yData) };
-
-    std::shared_ptr<kp::Tensor> wIn{ new kp::Tensor({ 0.001, 0.001 }) };
-    std::shared_ptr<kp::Tensor> wOutI{ new kp::Tensor(zerosData) };
-    std::shared_ptr<kp::Tensor> wOutJ{ new kp::Tensor(zerosData) };
-
-    std::shared_ptr<kp::Tensor> bIn{ new kp::Tensor({ 0 }) };
-    std::shared_ptr<kp::Tensor> bOut{ new kp::Tensor(zerosData) };
-
-    std::shared_ptr<kp::Tensor> lOut{ new kp::Tensor(zerosData) };
-
-    std::vector<std::shared_ptr<kp::Tensor>> params = { xI,  xJ,    y,
-                                                        wIn, wOutI, wOutJ,
-                                                        bIn, bOut,  lOut };
-
     {
         kp::Manager mgr;
 
+        std::shared_ptr<kp::Tensor> xI = mgr.tensor(xIData);
+        std::shared_ptr<kp::Tensor> xJ = mgr.tensor(xJData);
+
+        std::shared_ptr<kp::Tensor> y = mgr.tensor(yData);
+
+        std::shared_ptr<kp::Tensor> wIn = mgr.tensor({ 0.001, 0.001 });
+        std::shared_ptr<kp::Tensor> wOutI = mgr.tensor(zerosData);
+        std::shared_ptr<kp::Tensor> wOutJ = mgr.tensor(zerosData);
+
+        std::shared_ptr<kp::Tensor> bIn = mgr.tensor({ 0 });
+        std::shared_ptr<kp::Tensor> bOut = mgr.tensor(zerosData);
+
+        std::shared_ptr<kp::Tensor> lOut = mgr.tensor(zerosData);
+
+        std::vector<std::shared_ptr<kp::Tensor>> params = { xI,  xJ,    y,
+                                                            wIn, wOutI, wOutJ,
+                                                            bIn, bOut,  lOut };
+
         {
-            mgr.rebuild(params);
+            std::vector<uint32_t> spirv(
+                        (uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
+                        (uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv
+                            + kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));
 
-            std::shared_ptr<kp::Sequence> sq = mgr.sequence();
+            std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(params, spirv);
 
-            // Record op algo base
-            sq->begin();
+            mgr.sequence()->eval<kp::OpTensorSyncDevice>(params);
 
-            sq->record<kp::OpTensorSyncDevice>({ wIn, bIn });
-
-#ifdef KOMPUTE_ANDROID_SHADER_FROM_STRING
-            // Newer versions of Android are able to use shaderc to read raw string
-            sq->record<kp::OpAlgoCreate>(
-                    params, std::vector<char>(LR_SHADER.begin(), LR_SHADER.end()));
-#else
-            // Older versions of Android require the SPIRV binary directly
-            sq->record<kp::OpAlgoCreate>(
-                    params, std::vector<char>(
-                            kp::shader_data::shaders_glsl_logisticregression_comp_spv,
-                            kp::shader_data::shaders_glsl_logisticregression_comp_spv
-                                + kp::shader_data::shaders_glsl_logisticregression_comp_spv_len
-                    ));
-#endif
-
-            sq->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
-
-            sq->end();
+            std::shared_ptr<kp::Sequence> sq = mgr.sequence()
+                ->record<kp::OpTensorSyncDevice>({ wIn, bIn })
+                ->record<kp::OpAlgoDispatch>(algo)
+                ->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
 
             // Iterate across all expected iterations
             for (size_t i = 0; i < ITERATIONS; i++) {
@@ -94,15 +81,15 @@ void KomputeModelML::train(Array yArr, Array xIArr, Array xJArr) {
                 }
             }
         }
+
+        KP_LOG_INFO("RESULT: <<<<<<<<<<<<<<<<<<<");
+        KP_LOG_INFO(wIn->data()[0]);
+        KP_LOG_INFO(wIn->data()[1]);
+        KP_LOG_INFO(bIn->data()[0]);
+
+        this->mWeights = wIn;
+        this->mBias = bIn;
     }
-
-    KP_LOG_INFO("RESULT: <<<<<<<<<<<<<<<<<<<");
-    KP_LOG_INFO(wIn->data()[0]);
-    KP_LOG_INFO(wIn->data()[1]);
-    KP_LOG_INFO(bIn->data()[0]);
-
-    this->mWeights = kp::Tensor(wIn->data());
-    this->mBias = kp::Tensor(bIn->data());
 }
 
 Array KomputeModelML::predict(Array xI, Array xJ) {
@@ -116,9 +103,9 @@ Array KomputeModelML::predict(Array xI, Array xJ) {
     for (size_t i = 0; i < xI.size(); i++) {
         float xIVal = xI[i];
         float xJVal = xJ[i];
-        float result = (xIVal * this->mWeights.data()[0]
-                + xJVal * this->mWeights.data()[1]
-                + this->mBias.data()[0]);
+        float result = (xIVal * this->mWeights->data()[0]
+                + xJVal * this->mWeights->data()[1]
+                + this->mBias->data()[0]);
 
         // Instead of using sigmoid we'll just return full numbers
         Variant var = result > 0 ? 1 : 0;
@@ -131,15 +118,15 @@ Array KomputeModelML::predict(Array xI, Array xJ) {
 Array KomputeModelML::get_params() {
     Array retArray;
 
-    KP_LOG_INFO(this->mWeights.size() + this->mBias.size());
+    KP_LOG_INFO(this->mWeights->size() + this->mBias->size());
 
-    if(this->mWeights.size() + this->mBias.size() == 0) {
+    if(this->mWeights->size() + this->mBias->size() == 0) {
         return retArray;
     }
 
-    retArray.push_back(this->mWeights.data()[0]);
-    retArray.push_back(this->mWeights.data()[1]);
-    retArray.push_back(this->mBias.data()[0]);
+    retArray.push_back(this->mWeights->data()[0]);
+    retArray.push_back(this->mWeights->data()[1]);
+    retArray.push_back(this->mBias->data()[0]);
     retArray.push_back(99.0);
 
     return retArray;
diff --git a/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.hpp b/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.hpp
index 1f87fbb69..69bab4f19 100644
--- a/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.hpp
+++ b/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.hpp
@@ -28,8 +28,8 @@ public:
     static void _register_methods();
 
 private:
-    kp::Tensor mWeights;
-    kp::Tensor mBias;
+    std::shared_ptr<kp::Tensor> mWeights;
+    std::shared_ptr<kp::Tensor> mBias;
 };
 
 static std::string LR_SHADER = R"(
diff --git a/examples/logistic_regression/src/Main.cpp b/examples/logistic_regression/src/Main.cpp
index 769699ca7..c435575e2 100755
--- a/examples/logistic_regression/src/Main.cpp
+++ b/examples/logistic_regression/src/Main.cpp
@@ -15,44 +15,39 @@ int main()
     uint32_t ITERATIONS = 100;
     float learningRate = 0.1;
 
-    std::shared_ptr<kp::Tensor> xI{ new kp::Tensor({ 0, 1, 1, 1, 1 }) };
-    std::shared_ptr<kp::Tensor> xJ{ new kp::Tensor({ 0, 0, 0, 1, 1 }) };
+    kp::Manager mgr;
 
-    std::shared_ptr<kp::Tensor> y{ new kp::Tensor({ 0, 0, 0, 1, 1 }) };
+    std::shared_ptr<kp::Tensor> xI = mgr.tensor({ 0, 1, 1, 1, 1 });
+    std::shared_ptr<kp::Tensor> xJ = mgr.tensor({ 0, 0, 0, 1, 1 });
 
-    std::shared_ptr<kp::Tensor> wIn{ new kp::Tensor({ 0.001, 0.001 }) };
-    std::shared_ptr<kp::Tensor> wOutI{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
-    std::shared_ptr<kp::Tensor> wOutJ{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
+    std::shared_ptr<kp::Tensor> y = mgr.tensor({ 0, 0, 0, 1, 1 });
 
-    std::shared_ptr<kp::Tensor> bIn{ new kp::Tensor({ 0 }) };
-    std::shared_ptr<kp::Tensor> bOut{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
+    std::shared_ptr<kp::Tensor> wIn = mgr.tensor({ 0.001, 0.001 });
+    std::shared_ptr<kp::Tensor> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
+    std::shared_ptr<kp::Tensor> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });
 
-    std::shared_ptr<kp::Tensor> lOut{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
+    std::shared_ptr<kp::Tensor> bIn = mgr.tensor({ 0 });
+    std::shared_ptr<kp::Tensor> bOut = mgr.tensor({ 0, 0, 0, 0, 0 });
+
+    std::shared_ptr<kp::Tensor> lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
 
     std::vector<std::shared_ptr<kp::Tensor>> params = { xI,  xJ,    y,
                                                         wIn, wOutI, wOutJ,
                                                         bIn, bOut,  lOut };
 
-    kp::Manager mgr;
-
-    mgr.rebuild(params);
-
-    std::shared_ptr<kp::Sequence> sq = mgr.sequence();
-
-    // Record op algo base
-    sq->begin();
-
-    sq->record<kp::OpTensorSyncDevice>({ wIn, bIn });
-
-    sq->record<kp::OpAlgoCreate>(
-        params, std::vector<uint32_t>(
+    std::vector<uint32_t> spirv(
                 (uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
                 (uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv
-                    + kp::shader_data::shaders_glsl_logisticregression_comp_spv_len)));
+                    + kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));
 
-    sq->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
+    std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(params, spirv);
 
-    sq->end();
+    mgr.sequence()->eval<kp::OpTensorSyncDevice>(params);
+
+    std::shared_ptr<kp::Sequence> sq = mgr.sequence()
+        ->record<kp::OpTensorSyncDevice>({ wIn, bIn })
+        ->record<kp::OpAlgoDispatch>(algo)
+        ->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
 
     // Iterate across all expected iterations
     for (size_t i = 0; i < ITERATIONS; i++) {
diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp
index 0bf66d593..52d574ad3 100755
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@@ -647,12 +647,19 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error;
 #define KP_LOG_DEBUG(...)
 #else
 #if defined(VK_USE_PLATFORM_ANDROID_KHR)
-#define KP_LOG_DEBUG(...)                                             \
-    ((void)__android_log_print(ANDROID_LOG_DEBUG, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
+#define KP_LOG_DEBUG(...)                                                      \
+    ((void)__android_log_write(                                                \
+      ANDROID_LOG_DEBUG, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__).c_str()))
 #elif defined(KOMPUTE_BUILD_PYTHON)
 #define KP_LOG_DEBUG(...) kp_debug(fmt::format(__VA_ARGS__))
 #else
-#define KP_LOG_DEBUG(...) fmt::print("[{} {}] [debug] [{}:{}] {}\n", __DATE__, __TIME__, __FILE__, __LINE__, fmt::format(__VA_ARGS__))
+#define KP_LOG_DEBUG(...)                                                      \
+    fmt::print("[{} {}] [debug] [{}:{}] {}\n",                                 \
+               __DATE__,                                                       \
+               __TIME__,                                                       \
+               __FILE__,                                                       \
+               __LINE__,                                                       \
+               fmt::format(__VA_ARGS__))
 #endif // VK_USE_PLATFORM_ANDROID_KHR
 #endif // SPDLOG_ACTIVE_LEVEL > 1
 
@@ -660,12 +667,19 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error;
 #define KP_LOG_INFO(...)
 #else
 #if defined(VK_USE_PLATFORM_ANDROID_KHR)
-#define KP_LOG_INFO(...)                                              \
-    ((void)__android_log_print(ANDROID_LOG_INFO, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
+#define KP_LOG_INFO(...)                                                       \
+    ((void)__android_log_write(                                                \
+      ANDROID_LOG_INFO, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__).c_str()))
 #elif defined(KOMPUTE_BUILD_PYTHON)
 #define KP_LOG_INFO(...) kp_info(fmt::format(__VA_ARGS__))
 #else
-#define KP_LOG_INFO(...) fmt::print("[{} {}] [debug] [{}:{}] {}\n", __DATE__, __TIME__, __FILE__, __LINE__, fmt::format(__VA_ARGS__))
+#define KP_LOG_INFO(...)                                                       \
+    fmt::print("[{} {}] [debug] [{}:{}] {}\n",                                 \
+               __DATE__,                                                       \
+               __TIME__,                                                       \
+               __FILE__,                                                       \
+               __LINE__,                                                       \
+               fmt::format(__VA_ARGS__))
 #endif // VK_USE_PLATFORM_ANDROID_KHR
 #endif // SPDLOG_ACTIVE_LEVEL > 2
 
@@ -673,12 +687,19 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error;
 #define KP_LOG_WARN(...)
 #else
 #if defined(VK_USE_PLATFORM_ANDROID_KHR)
-#define KP_LOG_WARN(...)                                              \
-    ((void)__android_log_print(ANDROID_LOG_WARN, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
+#define KP_LOG_WARN(...)                                                       \
+    ((void)__android_log_write(                                                \
+      ANDROID_LOG_WARN, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__).c_str()))
 #elif defined(KOMPUTE_BUILD_PYTHON)
 #define KP_LOG_WARN(...) kp_warning(fmt::format(__VA_ARGS__))
 #else
-#define KP_LOG_WARN(...) fmt::print("[{} {}] [debug] [{}:{}] {}\n", __DATE__, __TIME__, __FILE__, __LINE__, fmt::format(__VA_ARGS__))
+#define KP_LOG_WARN(...)                                                       \
+    fmt::print("[{} {}] [debug] [{}:{}] {}\n",                                 \
+               __DATE__,                                                       \
+               __TIME__,                                                       \
+               __FILE__,                                                       \
+               __LINE__,                                                       \
+               fmt::format(__VA_ARGS__))
 #endif // VK_USE_PLATFORM_ANDROID_KHR
 #endif // SPDLOG_ACTIVE_LEVEL > 3
 
@@ -686,12 +707,19 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error;
 #define KP_LOG_ERROR(...)
 #else
 #if defined(VK_USE_PLATFORM_ANDROID_KHR)
-#define KP_LOG_ERROR(...)                                             \
-    ((void)__android_log_print(ANDROID_LOG_ERROR, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
+#define KP_LOG_ERROR(...)                                                      \
+    ((void)__android_log_write(                                                \
+      ANDROID_LOG_ERROR, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__).c_str()))
 #elif defined(KOMPUTE_BUILD_PYTHON)
 #define KP_LOG_ERROR(...) kp_error(fmt::format(__VA_ARGS__))
 #else
-#define KP_LOG_ERROR(...) fmt::print("[{} {}] [debug] [{}:{}] {}\n", __DATE__, __TIME__, __FILE__, __LINE__, fmt::format(__VA_ARGS__))
+#define KP_LOG_ERROR(...)                                                      \
+    fmt::print("[{} {}] [debug] [{}:{}] {}\n",                                 \
+               __DATE__,                                                       \
+               __TIME__,                                                       \
+               __FILE__,                                                       \
+               __LINE__,                                                       \
+               fmt::format(__VA_ARGS__))
 #endif // VK_USE_PLATFORM_ANDROID_KHR
 #endif // SPDLOG_ACTIVE_LEVEL > 4
 #endif // KOMPUTE_SPDLOG_ENABLED
@@ -701,9 +729,9 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error;
 #include <iostream>
 #include <vector>
 
+#include <SPIRV/GlslangToSpv.h>
 #include <glslang/Include/ResourceLimits.h>
 #include <glslang/Public/ShaderLang.h>
-#include <SPIRV/GlslangToSpv.h>
 
 namespace kp {
 
@@ -711,157 +739,161 @@ namespace kp {
 // Has been adobted by:
 // https://github.com/KhronosGroup/glslang/blob/master/StandAlone/ResourceLimits.cpp
 const TBuiltInResource defaultResource = {
-/* .MaxLights = */ 0,
-/* .MaxClipPlanes = */ 0,
-/* .MaxTextureUnits = */ 0,
-/* .MaxTextureCoords = */ 0,
-/* .MaxVertexAttribs = */ 64,
-/* .MaxVertexUniformComponents = */ 4096,
-/* .MaxVaryingFloats = */ 64,
-/* .MaxVertexTextureImageUnits = */ 0,
-/* .MaxCombinedTextureImageUnits = */ 0,
-/* .MaxTextureImageUnits = */ 0,
-/* .MaxFragmentUniformComponents = */ 0,
-/* .MaxDrawBuffers = */ 0,
-/* .MaxVertexUniformVectors = */ 128,
-/* .MaxVaryingVectors = */ 8,
-/* .MaxFragmentUniformVectors = */ 0,
-/* .MaxVertexOutputVectors = */ 16,
-/* .MaxFragmentInputVectors = */ 0,
-/* .MinProgramTexelOffset = */ -8,
-/* .MaxProgramTexelOffset = */ 7,
-/* .MaxClipDistances = */ 8,
-/* .MaxComputeWorkGroupCountX = */ 65535,
-/* .MaxComputeWorkGroupCountY = */ 65535,
-/* .MaxComputeWorkGroupCountZ = */ 65535,
-/* .MaxComputeWorkGroupSizeX = */ 1024,
-/* .MaxComputeWorkGroupSizeY = */ 1024,
-/* .MaxComputeWorkGroupSizeZ = */ 64,
-/* .MaxComputeUniformComponents = */ 1024,
-/* .MaxComputeTextureImageUnits = */ 16,
-/* .MaxComputeImageUniforms = */ 8,
-/* .MaxComputeAtomicCounters = */ 8,
-/* .MaxComputeAtomicCounterBuffers = */ 1,
-/* .MaxVaryingComponents = */ 60,
-/* .MaxVertexOutputComponents = */ 64,
-/* .MaxGeometryInputComponents = */ 64,
-/* .MaxGeometryOutputComponents = */ 128,
-/* .MaxFragmentInputComponents = */ 0,
-/* .MaxImageUnits = */ 0,
-/* .MaxCombinedImageUnitsAndFragmentOutputs = */ 0,
-/* .MaxCombinedShaderOutputResources = */ 8,
-/* .MaxImageSamples = */ 0,
-/* .MaxVertexImageUniforms = */ 0,
-/* .MaxTessControlImageUniforms = */ 0,
-/* .MaxTessEvaluationImageUniforms = */ 0,
-/* .MaxGeometryImageUniforms = */ 0,
-/* .MaxFragmentImageUniforms = */ 0,
-/* .MaxCombinedImageUniforms = */ 0,
-/* .MaxGeometryTextureImageUnits = */ 0,
-/* .MaxGeometryOutputVertices = */ 256,
-/* .MaxGeometryTotalOutputComponents = */ 1024,
-/* .MaxGeometryUniformComponents = */ 1024,
-/* .MaxGeometryVaryingComponents = */ 64,
-/* .MaxTessControlInputComponents = */ 128,
-/* .MaxTessControlOutputComponents = */ 128,
-/* .MaxTessControlTextureImageUnits = */ 0,
-/* .MaxTessControlUniformComponents = */ 1024,
-/* .MaxTessControlTotalOutputComponents = */ 4096,
-/* .MaxTessEvaluationInputComponents = */ 128,
-/* .MaxTessEvaluationOutputComponents = */ 128,
-/* .MaxTessEvaluationTextureImageUnits = */ 16,
-/* .MaxTessEvaluationUniformComponents = */ 1024,
-/* .MaxTessPatchComponents = */ 120,
-/* .MaxPatchVertices = */ 32,
-/* .MaxTessGenLevel = */ 64,
-/* .MaxViewports = */ 16,
-/* .MaxVertexAtomicCounters = */ 0,
-/* .MaxTessControlAtomicCounters = */ 0,
-/* .MaxTessEvaluationAtomicCounters = */ 0,
-/* .MaxGeometryAtomicCounters = */ 0,
-/* .MaxFragmentAtomicCounters = */ 0,
-/* .MaxCombinedAtomicCounters = */ 8,
-/* .MaxAtomicCounterBindings = */ 1,
-/* .MaxVertexAtomicCounterBuffers = */ 0,
-/* .MaxTessControlAtomicCounterBuffers = */ 0,
-/* .MaxTessEvaluationAtomicCounterBuffers = */ 0,
-/* .MaxGeometryAtomicCounterBuffers = */ 0,
-/* .MaxFragmentAtomicCounterBuffers = */ 0,
-/* .MaxCombinedAtomicCounterBuffers = */ 1,
-/* .MaxAtomicCounterBufferSize = */ 16384,
-/* .MaxTransformFeedbackBuffers = */ 4,
-/* .MaxTransformFeedbackInterleavedComponents = */ 64,
-/* .MaxCullDistances = */ 8,
-/* .MaxCombinedClipAndCullDistances = */ 8,
-/* .MaxSamples = */ 4,
-/* .maxMeshOutputVerticesNV = */ 256,
-/* .maxMeshOutputPrimitivesNV = */ 512,
-/* .maxMeshWorkGroupSizeX_NV = */ 32,
-/* .maxMeshWorkGroupSizeY_NV = */ 1,
-/* .maxMeshWorkGroupSizeZ_NV = */ 1,
-/* .maxTaskWorkGroupSizeX_NV = */ 32,
-/* .maxTaskWorkGroupSizeY_NV = */ 1,
-/* .maxTaskWorkGroupSizeZ_NV = */ 1,
-/* .maxMeshViewCountNV = */ 4,
-/* .maxDualSourceDrawBuffersEXT = */ 1,
+    /* .MaxLights = */ 0,
+    /* .MaxClipPlanes = */ 0,
+    /* .MaxTextureUnits = */ 0,
+    /* .MaxTextureCoords = */ 0,
+    /* .MaxVertexAttribs = */ 64,
+    /* .MaxVertexUniformComponents = */ 4096,
+    /* .MaxVaryingFloats = */ 64,
+    /* .MaxVertexTextureImageUnits = */ 0,
+    /* .MaxCombinedTextureImageUnits = */ 0,
+    /* .MaxTextureImageUnits = */ 0,
+    /* .MaxFragmentUniformComponents = */ 0,
+    /* .MaxDrawBuffers = */ 0,
+    /* .MaxVertexUniformVectors = */ 128,
+    /* .MaxVaryingVectors = */ 8,
+    /* .MaxFragmentUniformVectors = */ 0,
+    /* .MaxVertexOutputVectors = */ 16,
+    /* .MaxFragmentInputVectors = */ 0,
+    /* .MinProgramTexelOffset = */ -8,
+    /* .MaxProgramTexelOffset = */ 7,
+    /* .MaxClipDistances = */ 8,
+    /* .MaxComputeWorkGroupCountX = */ 65535,
+    /* .MaxComputeWorkGroupCountY = */ 65535,
+    /* .MaxComputeWorkGroupCountZ = */ 65535,
+    /* .MaxComputeWorkGroupSizeX = */ 1024,
+    /* .MaxComputeWorkGroupSizeY = */ 1024,
+    /* .MaxComputeWorkGroupSizeZ = */ 64,
+    /* .MaxComputeUniformComponents = */ 1024,
+    /* .MaxComputeTextureImageUnits = */ 16,
+    /* .MaxComputeImageUniforms = */ 8,
+    /* .MaxComputeAtomicCounters = */ 8,
+    /* .MaxComputeAtomicCounterBuffers = */ 1,
+    /* .MaxVaryingComponents = */ 60,
+    /* .MaxVertexOutputComponents = */ 64,
+    /* .MaxGeometryInputComponents = */ 64,
+    /* .MaxGeometryOutputComponents = */ 128,
+    /* .MaxFragmentInputComponents = */ 0,
+    /* .MaxImageUnits = */ 0,
+    /* .MaxCombinedImageUnitsAndFragmentOutputs = */ 0,
+    /* .MaxCombinedShaderOutputResources = */ 8,
+    /* .MaxImageSamples = */ 0,
+    /* .MaxVertexImageUniforms = */ 0,
+    /* .MaxTessControlImageUniforms = */ 0,
+    /* .MaxTessEvaluationImageUniforms = */ 0,
+    /* .MaxGeometryImageUniforms = */ 0,
+    /* .MaxFragmentImageUniforms = */ 0,
+    /* .MaxCombinedImageUniforms = */ 0,
+    /* .MaxGeometryTextureImageUnits = */ 0,
+    /* .MaxGeometryOutputVertices = */ 256,
+    /* .MaxGeometryTotalOutputComponents = */ 1024,
+    /* .MaxGeometryUniformComponents = */ 1024,
+    /* .MaxGeometryVaryingComponents = */ 64,
+    /* .MaxTessControlInputComponents = */ 128,
+    /* .MaxTessControlOutputComponents = */ 128,
+    /* .MaxTessControlTextureImageUnits = */ 0,
+    /* .MaxTessControlUniformComponents = */ 1024,
+    /* .MaxTessControlTotalOutputComponents = */ 4096,
+    /* .MaxTessEvaluationInputComponents = */ 128,
+    /* .MaxTessEvaluationOutputComponents = */ 128,
+    /* .MaxTessEvaluationTextureImageUnits = */ 16,
+    /* .MaxTessEvaluationUniformComponents = */ 1024,
+    /* .MaxTessPatchComponents = */ 120,
+    /* .MaxPatchVertices = */ 32,
+    /* .MaxTessGenLevel = */ 64,
+    /* .MaxViewports = */ 16,
+    /* .MaxVertexAtomicCounters = */ 0,
+    /* .MaxTessControlAtomicCounters = */ 0,
+    /* .MaxTessEvaluationAtomicCounters = */ 0,
+    /* .MaxGeometryAtomicCounters = */ 0,
+    /* .MaxFragmentAtomicCounters = */ 0,
+    /* .MaxCombinedAtomicCounters = */ 8,
+    /* .MaxAtomicCounterBindings = */ 1,
+    /* .MaxVertexAtomicCounterBuffers = */ 0,
+    /* .MaxTessControlAtomicCounterBuffers = */ 0,
+    /* .MaxTessEvaluationAtomicCounterBuffers = */ 0,
+    /* .MaxGeometryAtomicCounterBuffers = */ 0,
+    /* .MaxFragmentAtomicCounterBuffers = */ 0,
+    /* .MaxCombinedAtomicCounterBuffers = */ 1,
+    /* .MaxAtomicCounterBufferSize = */ 16384,
+    /* .MaxTransformFeedbackBuffers = */ 4,
+    /* .MaxTransformFeedbackInterleavedComponents = */ 64,
+    /* .MaxCullDistances = */ 8,
+    /* .MaxCombinedClipAndCullDistances = */ 8,
+    /* .MaxSamples = */ 4,
+    /* .maxMeshOutputVerticesNV = */ 256,
+    /* .maxMeshOutputPrimitivesNV = */ 512,
+    /* .maxMeshWorkGroupSizeX_NV = */ 32,
+    /* .maxMeshWorkGroupSizeY_NV = */ 1,
+    /* .maxMeshWorkGroupSizeZ_NV = */ 1,
+    /* .maxTaskWorkGroupSizeX_NV = */ 32,
+    /* .maxTaskWorkGroupSizeY_NV = */ 1,
+    /* .maxTaskWorkGroupSizeZ_NV = */ 1,
+    /* .maxMeshViewCountNV = */ 4,
+    /* .maxDualSourceDrawBuffersEXT = */ 1,
+
+    /* .limits = */
+    {
+      /* .nonInductiveForLoops = */ 1,
+      /* .whileLoops = */ 1,
+      /* .doWhileLoops = */ 1,
+      /* .generalUniformIndexing = */ 1,
+      /* .generalAttributeMatrixVectorIndexing = */ 1,
+      /* .generalVaryingIndexing = */ 1,
+      /* .generalSamplerIndexing = */ 1,
+      /* .generalVariableIndexing = */ 1,
+      /* .generalConstantMatrixVectorIndexing = */ 1,
+    }
+};
 
-/* .limits = */ {
-    /* .nonInductiveForLoops = */ 1,
-    /* .whileLoops = */ 1,
-    /* .doWhileLoops = */ 1,
-    /* .generalUniformIndexing = */ 1,
-    /* .generalAttributeMatrixVectorIndexing = */ 1,
-    /* .generalVaryingIndexing = */ 1,
-    /* .generalSamplerIndexing = */ 1,
-    /* .generalVariableIndexing = */ 1,
-    /* .generalConstantMatrixVectorIndexing = */ 1,
-}};
-    
 /**
     Shader utily class with functions to compile and process glsl files.
 */
-class Shader {
-public:
+class Shader
+{
+  public:
     /**
      * Compile multiple sources with optional filenames. Currently this function
      * uses the glslang C++ interface which is not thread safe so this funciton
      * should not be called from multiple threads concurrently. If you have a
-     * online shader processing multithreading use-case that can't use offline 
+     * online shader processing multithreading use-case that can't use offline
      * compilation please open an issue.
      *
      * @param sources A list of raw glsl shaders in string format
      * @param files A list of file names respective to each of the sources
      * @param entryPoint The function name to use as entry point
      * @param definitions List of pairs containing key value definitions
-     * @param resourcesLimit A list that contains the resource limits for the GLSL compiler
+     * @param resourcesLimit A list that contains the resource limits for the
+     * GLSL compiler
      * @return The compiled SPIR-V binary in unsigned int32 format
      */
     static std::vector<uint32_t> compile_sources(
-            const std::vector<std::string>& sources,
-            const std::vector<std::string>& files = {},
-            const std::string& entryPoint = "main",
-            std::vector<std::pair<std::string,std::string>> definitions = {},
-            const TBuiltInResource& resources = defaultResource);
+      const std::vector<std::string>& sources,
+      const std::vector<std::string>& files = {},
+      const std::string& entryPoint = "main",
+      std::vector<std::pair<std::string, std::string>> definitions = {},
+      const TBuiltInResource& resources = defaultResource);
 
     /**
-     * Compile a single glslang source from string value. Currently this function
-     * uses the glslang C++ interface which is not thread safe so this funciton
-     * should not be called from multiple threads concurrently. If you have a
-     * online shader processing multithreading use-case that can't use offline 
-     * compilation please open an issue.
+     * Compile a single glslang source from string value. Currently this
+     * function uses the glslang C++ interface which is not thread safe so this
+     * funciton should not be called from multiple threads concurrently. If you
+     * have a online shader processing multithreading use-case that can't use
+     * offline compilation please open an issue.
      *
      * @param source An individual raw glsl shader in string format
      * @param entryPoint The function name to use as entry point
      * @param definitions List of pairs containing key value definitions
-     * @param resourcesLimit A list that contains the resource limits for the GLSL compiler
+     * @param resourcesLimit A list that contains the resource limits for the
+     * GLSL compiler
      * @return The compiled SPIR-V binary in unsigned int32 format
      */
     static std::vector<uint32_t> compile_source(
-            const std::string& source,
-            const std::string& entryPoint = "main",
-            std::vector<std::pair<std::string,std::string>> definitions = {},
-            const TBuiltInResource& resources = defaultResource);
-
+      const std::string& source,
+      const std::string& entryPoint = "main",
+      std::vector<std::pair<std::string, std::string>> definitions = {},
+      const TBuiltInResource& resources = defaultResource);
 };
 
 }
@@ -919,7 +951,7 @@ class Tensor
      * otherwise there is no need to copy from host memory.
      */
     void rebuild(const std::vector<float>& data,
-           TensorTypes tensorType = TensorTypes::eDevice);
+                 TensorTypes tensorType = TensorTypes::eDevice);
 
     /**
      * Destroys and frees the GPU resources which include the buffer and memory.
@@ -990,9 +1022,8 @@ class Tensor
      * @param createBarrier Whether to create a barrier that ensures the data is
      * copied before further operations. Default is true.
      */
-    void recordCopyFromStagingToDevice(
-      const vk::CommandBuffer& commandBuffer,
-      bool createBarrier);
+    void recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer,
+                                       bool createBarrier);
 
     /**
      * Records a copy from the internal device memory to the staging memory
@@ -1003,9 +1034,8 @@ class Tensor
      * @param createBarrier Whether to create a barrier that ensures the data is
      * copied before further operations. Default is true.
      */
-    void recordCopyFromDeviceToStaging(
-      const vk::CommandBuffer& commandBuffer,
-      bool createBarrier);
+    void recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer,
+                                       bool createBarrier);
 
     /**
      * Records the buffer memory barrier into the command buffer which
@@ -1017,12 +1047,11 @@ class Tensor
      * @param scrStageMask Pipeline stage flags for source stage mask
      * @param dstStageMask Pipeline stage flags for destination stage mask
      */
-    void recordBufferMemoryBarrier(
-      const vk::CommandBuffer& commandBuffer,
-      vk::AccessFlagBits srcAccessMask,
-      vk::AccessFlagBits dstAccessMask,
-      vk::PipelineStageFlagBits srcStageMask,
-      vk::PipelineStageFlagBits dstStageMask);
+    void recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
+                                   vk::AccessFlagBits srcAccessMask,
+                                   vk::AccessFlagBits dstAccessMask,
+                                   vk::PipelineStageFlagBits srcStageMask,
+                                   vk::PipelineStageFlagBits dstStageMask);
 
     /**
      * Constructs a vulkan descriptor buffer info which can be used to specify
@@ -1070,11 +1099,11 @@ class Tensor
                             std::shared_ptr<vk::DeviceMemory> memory,
                             vk::MemoryPropertyFlags memoryPropertyFlags);
     void recordCopyBuffer(const vk::CommandBuffer& commandBuffer,
-                    std::shared_ptr<vk::Buffer> bufferFrom,
-                    std::shared_ptr<vk::Buffer> bufferTo,
-                    vk::DeviceSize bufferSize,
-                    vk::BufferCopy copyRegion,
-                    bool createBarrier);
+                          std::shared_ptr<vk::Buffer> bufferFrom,
+                          std::shared_ptr<vk::Buffer> bufferTo,
+                          vk::DeviceSize bufferSize,
+                          vk::BufferCopy copyRegion,
+                          bool createBarrier);
 
     // Private util functions
     vk::BufferUsageFlags getPrimaryBufferUsageFlags();
@@ -1094,8 +1123,7 @@ namespace kp {
 */
 class Algorithm
 {
-public:
-
+  public:
     /**
      *  Default constructor for Algorithm
      *
@@ -1103,12 +1131,11 @@ public:
      *  @param commandBuffer The vulkan command buffer to bind the pipeline and
      * shaders
      */
-    Algorithm(
-            std::shared_ptr<vk::Device> device,
-            const std::vector<std::shared_ptr<Tensor>>& tensors = {},
-            const std::vector<uint32_t>& spirv = {},
-            const Workgroup& workgroup = {},
-            const Constants& specializationConstants = {});
+    Algorithm(std::shared_ptr<vk::Device> device,
+              const std::vector<std::shared_ptr<Tensor>>& tensors = {},
+              const std::vector<uint32_t>& spirv = {},
+              const Workgroup& workgroup = {},
+              const Constants& specializationConstants = {});
 
     /**
      * Initialiser for the shader data provided to the algorithm as well as
@@ -1116,14 +1143,13 @@ public:
      *
      * @param shaderFileData The bytes in spir-v format of the shader
      * @tensorParams The Tensors to be used in the Algorithm / shader for
-     * @specalizationInstalces The specialization parameters to pass to the function
-     * processing
+     * @specalizationInstalces The specialization parameters to pass to the
+     * function processing
      */
-    void rebuild(
-            const std::vector<std::shared_ptr<Tensor>>& tensors = {},
-            const std::vector<uint32_t>& spirv = {},
-            const Workgroup& workgroup = {},
-            const Constants& specializationConstants = {});
+    void rebuild(const std::vector<std::shared_ptr<Tensor>>& tensors = {},
+                 const std::vector<uint32_t>& spirv = {},
+                 const Workgroup& workgroup = {},
+                 const Constants& specializationConstants = {});
 
     /**
      * Destructor for Algorithm which is responsible for freeing and desroying
@@ -1143,7 +1169,8 @@ public:
 
     void bindCore(const vk::CommandBuffer& commandBuffer);
 
-    void bindPush(const vk::CommandBuffer& commandBuffer, const Constants& pushConstants);
+    void bindPush(const vk::CommandBuffer& commandBuffer,
+                  const Constants& pushConstants);
 
     bool isInit();
 
@@ -1155,7 +1182,7 @@ public:
 
     void destroy();
 
-private:
+  private:
     // -------------- NEVER OWNED RESOURCES
     std::shared_ptr<vk::Device> mDevice;
     std::vector<std::shared_ptr<Tensor>> mTensors;
@@ -1489,7 +1516,7 @@ namespace kp {
 /**
  *  Container of operations that can be sent to GPU as batch
  */
-class Sequence: public std::enable_shared_from_this<Sequence>
+class Sequence : public std::enable_shared_from_this<Sequence>
 {
   public:
     /**
@@ -1526,8 +1553,9 @@ class Sequence: public std::enable_shared_from_this<Sequence>
      * which allows for extensible configurations on initialisation.
      */
     template<typename T, typename... TArgs>
-    std::shared_ptr<Sequence>
-    record(std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
+    std::shared_ptr<Sequence> record(
+      std::vector<std::shared_ptr<Tensor>> tensors,
+      TArgs&&... params)
     {
         KP_LOG_DEBUG("Kompute Sequence record function started");
 
@@ -1536,14 +1564,13 @@ class Sequence: public std::enable_shared_from_this<Sequence>
                       "OpBase derived classes");
 
         KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
-        std::shared_ptr<T> op{
-            new T(tensors, std::forward<TArgs>(params)...) };
+        std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
 
         return this->record(op);
     }
     template<typename T, typename... TArgs>
-    std::shared_ptr<Sequence>
-    record(std::shared_ptr<Algorithm> algorithm, TArgs&&... params)
+    std::shared_ptr<Sequence> record(std::shared_ptr<Algorithm> algorithm,
+                                     TArgs&&... params)
     {
         KP_LOG_DEBUG("Kompute Sequence record function started");
 
@@ -1552,8 +1579,8 @@ class Sequence: public std::enable_shared_from_this<Sequence>
                       "OpBase derived classes");
 
         KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
-        std::shared_ptr<T> op{
-            new T(algorithm, std::forward<TArgs>(params)...) };
+        std::shared_ptr<T> op{ new T(algorithm,
+                                     std::forward<TArgs>(params)...) };
 
         return this->record(op);
     }
@@ -1576,8 +1603,8 @@ class Sequence: public std::enable_shared_from_this<Sequence>
      */
     // TODO: Aim to have only a single function with tensors/algorithm
     template<typename T, typename... TArgs>
-    std::shared_ptr<Sequence>
-    eval(std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
+    std::shared_ptr<Sequence> eval(std::vector<std::shared_ptr<Tensor>> tensors,
+                                   TArgs&&... params)
     {
         KP_LOG_DEBUG("Kompute Sequence record function started");
 
@@ -1586,16 +1613,16 @@ class Sequence: public std::enable_shared_from_this<Sequence>
                       "OpBase derived classes");
 
         KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
-        std::shared_ptr<T> op{
-            new T(tensors, std::forward<TArgs>(params)...) };
+        std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
 
-        // TODO: Aim to be able to handle errors when returning without throw except
+        // TODO: Aim to be able to handle errors when returning without throw
+        // except
         return this->eval(op);
     }
     // Needded as otherise can't use initialiser list
     template<typename T, typename... TArgs>
-    std::shared_ptr<Sequence>
-    eval(std::shared_ptr<Algorithm> algorithm, TArgs&&... params)
+    std::shared_ptr<Sequence> eval(std::shared_ptr<Algorithm> algorithm,
+                                   TArgs&&... params)
     {
         KP_LOG_DEBUG("Kompute Sequence record function started");
 
@@ -1604,8 +1631,8 @@ class Sequence: public std::enable_shared_from_this<Sequence>
                       "OpBase derived classes");
 
         KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
-        std::shared_ptr<T> op{
-            new T(algorithm, std::forward<TArgs>(params)...) };
+        std::shared_ptr<T> op{ new T(algorithm,
+                                     std::forward<TArgs>(params)...) };
 
         return this->eval(op);
     }
@@ -1627,8 +1654,9 @@ class Sequence: public std::enable_shared_from_this<Sequence>
      * @return shared_ptr<Sequence> of the Sequence class itself
      */
     template<typename T, typename... TArgs>
-    std::shared_ptr<Sequence>
-    evalAsync(std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
+    std::shared_ptr<Sequence> evalAsync(
+      std::vector<std::shared_ptr<Tensor>> tensors,
+      TArgs&&... params)
     {
         KP_LOG_DEBUG("Kompute Sequence record function started");
 
@@ -1637,15 +1665,14 @@ class Sequence: public std::enable_shared_from_this<Sequence>
                       "OpBase derived classes");
 
         KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
-        std::shared_ptr<T> op{
-            new T(tensors, std::forward<TArgs>(params)...) };
+        std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
 
         return this->evalAsync(op);
     }
     // Needed as otherwise it's not possible to use initializer lists
     template<typename T, typename... TArgs>
-    std::shared_ptr<Sequence>
-    evalAsync(std::shared_ptr<Algorithm> algorithm, TArgs&&... params)
+    std::shared_ptr<Sequence> evalAsync(std::shared_ptr<Algorithm> algorithm,
+                                        TArgs&&... params)
     {
         KP_LOG_DEBUG("Kompute Sequence record function started");
 
@@ -1654,8 +1681,8 @@ class Sequence: public std::enable_shared_from_this<Sequence>
                       "OpBase derived classes");
 
         KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
-        std::shared_ptr<T> op{
-            new T(algorithm, std::forward<TArgs>(params)...) };
+        std::shared_ptr<T> op{ new T(algorithm,
+                                     std::forward<TArgs>(params)...) };
 
         return this->evalAsync(op);
     }
@@ -1670,7 +1697,8 @@ class Sequence: public std::enable_shared_from_this<Sequence>
     std::shared_ptr<Sequence> evalAwait(uint64_t waitFor = UINT64_MAX);
 
     /**
-     * Clear function clears all operations currently recorded and starts recording again.
+     * Clear function clears all operations currently recorded and starts
+     * recording again.
      */
     void clear();
 
@@ -1821,10 +1849,10 @@ class Manager
       Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice);
 
     std::shared_ptr<Algorithm> algorithm(
-            const std::vector<std::shared_ptr<Tensor>>& tensors = {},
-            const std::vector<uint32_t>& spirv = {},
-            const Workgroup& workgroup = {},
-            const Constants& specializationConstants = {});
+      const std::vector<std::shared_ptr<Tensor>>& tensors = {},
+      const std::vector<uint32_t>& spirv = {},
+      const Workgroup& workgroup = {},
+      const Constants& specializationConstants = {});
 
     void destroy();
     void clear();
@@ -1856,7 +1884,8 @@ class Manager
 
     // Create functions
     void createInstance();
-    void createDevice(const std::vector<uint32_t>& familyQueueIndices = {}, uint32_t hysicalDeviceIndex = 0);
+    void createDevice(const std::vector<uint32_t>& familyQueueIndices = {},
+                      uint32_t hysicalDeviceIndex = 0);
 };
 
 } // End namespace kp
diff --git a/src/include/kompute/Core.hpp b/src/include/kompute/Core.hpp
index b50bf081d..3510a2021 100644
--- a/src/include/kompute/Core.hpp
+++ b/src/include/kompute/Core.hpp
@@ -61,8 +61,8 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error;
 #else
 #if defined(VK_USE_PLATFORM_ANDROID_KHR)
 #define KP_LOG_DEBUG(...)                                                      \
-    ((void)__android_log_print(                                                \
-      ANDROID_LOG_DEBUG, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
+    ((void)__android_log_write(                                                \
+      ANDROID_LOG_DEBUG, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__).c_str()))
 #elif defined(KOMPUTE_BUILD_PYTHON)
 #define KP_LOG_DEBUG(...) kp_debug(fmt::format(__VA_ARGS__))
 #else
@@ -81,8 +81,8 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error;
 #else
 #if defined(VK_USE_PLATFORM_ANDROID_KHR)
 #define KP_LOG_INFO(...)                                                       \
-    ((void)__android_log_print(                                                \
-      ANDROID_LOG_INFO, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
+    ((void)__android_log_write(                                                \
+      ANDROID_LOG_INFO, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__).c_str()))
 #elif defined(KOMPUTE_BUILD_PYTHON)
 #define KP_LOG_INFO(...) kp_info(fmt::format(__VA_ARGS__))
 #else
@@ -101,8 +101,8 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error;
 #else
 #if defined(VK_USE_PLATFORM_ANDROID_KHR)
 #define KP_LOG_WARN(...)                                                       \
-    ((void)__android_log_print(                                                \
-      ANDROID_LOG_WARN, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
+    ((void)__android_log_write(                                                \
+      ANDROID_LOG_WARN, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__).c_str()))
 #elif defined(KOMPUTE_BUILD_PYTHON)
 #define KP_LOG_WARN(...) kp_warning(fmt::format(__VA_ARGS__))
 #else
@@ -121,8 +121,8 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error;
 #else
 #if defined(VK_USE_PLATFORM_ANDROID_KHR)
 #define KP_LOG_ERROR(...)                                                      \
-    ((void)__android_log_print(                                                \
-      ANDROID_LOG_ERROR, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
+    ((void)__android_log_write(                                                \
+      ANDROID_LOG_ERROR, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__).c_str()))
 #elif defined(KOMPUTE_BUILD_PYTHON)
 #define KP_LOG_ERROR(...) kp_error(fmt::format(__VA_ARGS__))
 #else
diff --git a/src/include/kompute/operations/OpBase.hpp b/src/include/kompute/operations/OpBase.hpp
index ba1e892d5..34818fcf0 100644
--- a/src/include/kompute/operations/OpBase.hpp
+++ b/src/include/kompute/operations/OpBase.hpp
@@ -1,7 +1,6 @@
 #pragma once
 
 #include "kompute/Core.hpp"
-
 #include "kompute/Tensor.hpp"
 #include "kompute/Algorithm.hpp"