From 3304767f2cf0e1902fe190758e7a95c45d0bc6fd Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Fri, 26 Feb 2021 18:58:19 +0000
Subject: [PATCH] Updated to enable for opmult to work

---
 single_include/kompute/Kompute.hpp            |  4 +-
 src/Algorithm.cpp                             | 37 ++++++++-----------
 src/Manager.cpp                               |  4 +-
 src/OpAlgoDispatch.cpp                        |  6 ++-
 src/OpTensorSyncDevice.cpp                    |  2 +
 src/Sequence.cpp                              |  4 +-
 .../kompute/operations/OpAlgoDispatch.hpp     |  2 +-
 src/include/kompute/operations/OpMult.hpp     |  2 +-
 test/TestAsyncOperations.cpp                  |  9 +++--
 test/TestDestroy.cpp                          |  6 ++-
 test/TestLogisticRegression.cpp               | 12 ++++--
 11 files changed, 49 insertions(+), 39 deletions(-)
diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp
index e60a5bdf0..79bc6e1b4 100755
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@@ -1686,7 +1686,7 @@ class OpAlgoDispatch : public OpBase
 {
   public:
 
-    OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm);
+    OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm, bool skipAlgoInit = false);
 
     /**
      * Default destructor, which is in charge of destroying the algorithm
@@ -1745,7 +1745,7 @@ class OpMult : public OpAlgoDispatch
      * @param komputeWorkgroup Optional parameter to specify the layout for processing
      */
     OpMult(std::vector<std::shared_ptr<Tensor>> tensors, std::shared_ptr<Algorithm> algorithm)
-        : OpAlgoDispatch(algorithm)
+        : OpAlgoDispatch(algorithm, true)
     {
         KP_LOG_DEBUG("Kompute OpMult constructor with params");
 
diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp
index aee9ddd36..f00bc1090 100644
--- a/src/Algorithm.cpp
+++ b/src/Algorithm.cpp
@@ -15,7 +15,14 @@ Algorithm::Algorithm(
     KP_LOG_DEBUG("Kompute Algorithm Constructor with device");
 
     this->mDevice = device;
-    this->rebuild(tensors, spirv, workgroup, specializationConstants, pushConstants);
+
+    if (tensors.size() && spirv.size()) {
+        KP_LOG_INFO("Kompute Algorithm initialising with tensor size: {} and spirv size: {}", tensors.size(), spirv.size());
+        this->rebuild(tensors, spirv, workgroup, specializationConstants, pushConstants);
+    }
+    else {
+        KP_LOG_INFO("Kompute Algorithm constructor with empty tensors and or spirv so not rebuilding vulkan components");
+    }
 }
 
 Algorithm::~Algorithm()
@@ -39,10 +46,10 @@ Algorithm::rebuild(
     this->mSpirv = spirv;
     this->mSpecializationConstants = specializationConstants;
     this->mPushConstants = pushConstants;
-    this->setWorkgroup(workgroup);
+    this->setWorkgroup(workgroup, this->mTensors.size() ? this->mTensors[0]->size() : 1);
 
     // Descriptor pool is created first so if available then destroy all before rebuild
-    if (this->mFreeDescriptorPool) {
+    if (this->isInit()) {
         this->destroy();
     }
 
@@ -340,20 +347,6 @@ Algorithm::recordDispatch(std::shared_ptr<vk::CommandBuffer> commandBuffer)
 {
     KP_LOG_DEBUG("Kompute Algorithm calling record dispatch");
 
-    if(this->mPipelineCache) {
-        KP_LOG_WARN("Value valid");
-    }
-    else {
-        KP_LOG_WARN("NOT Value valid");
-    }
-
-    if(this->mPipeline) {
-        KP_LOG_WARN("Value valid");
-    }
-    else {
-        KP_LOG_WARN("NOT Value valid");
-    }
-
     commandBuffer->bindPipeline(vk::PipelineBindPoint::eCompute,
                                        *this->mPipeline);
 
@@ -376,10 +369,7 @@ Algorithm::recordDispatch(std::shared_ptr<vk::CommandBuffer> commandBuffer)
 void
 Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize) {
 
-    KP_LOG_INFO("Kompute OpAlgoCreate setting dispatch size X: {}, Y: {}, Z: {}",
-                this->mWorkgroup[0],
-                this->mWorkgroup[1],
-                this->mWorkgroup[2]);
+    KP_LOG_INFO("Kompute OpAlgoCreate setting dispatch size");
 
     // The dispatch size is set up based on either explicitly provided template
     // parameters or by default it would take the shape and size of the tensors
@@ -394,6 +384,11 @@ Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize) {
     } else {
         this->mWorkgroup = { minSize, 1, 1 };
     }
+
+    KP_LOG_INFO("Kompute OpAlgoCreate set dispatch size X: {}, Y: {}, Z: {}",
+                this->mWorkgroup[0],
+                this->mWorkgroup[1],
+                this->mWorkgroup[2]);
 }
 
 const Workgroup&
diff --git a/src/Manager.cpp b/src/Manager.cpp
index 833069d9f..9f25e1826 100644
--- a/src/Manager.cpp
+++ b/src/Manager.cpp
@@ -33,7 +33,7 @@ Manager::Manager()
 Manager::Manager(uint32_t physicalDeviceIndex,
                  const std::vector<uint32_t>& familyQueueIndices)
 {
-    this->mManageResources = false;
+    this->mManageResources = true;
 
     this->createInstance();
     this->createDevice(familyQueueIndices, physicalDeviceIndex);
@@ -43,7 +43,7 @@ Manager::Manager(std::shared_ptr<vk::Instance> instance,
                  std::shared_ptr<vk::PhysicalDevice> physicalDevice,
                  std::shared_ptr<vk::Device> device)
 {
-    this->mManageResources = true;
+    this->mManageResources = false;
 
     this->mInstance = instance;
     this->mPhysicalDevice = physicalDevice;
diff --git a/src/OpAlgoDispatch.cpp b/src/OpAlgoDispatch.cpp
index a20900189..09050fba0 100644
--- a/src/OpAlgoDispatch.cpp
+++ b/src/OpAlgoDispatch.cpp
@@ -4,10 +4,14 @@
 
 namespace kp {
 
-OpAlgoDispatch::OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm)
+OpAlgoDispatch::OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm, bool skipAlgoCheck)
 {
     KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor");
 
+    if (!skipAlgoCheck && !algorithm->isInit()) {
+        throw std::runtime_error("Kompute OpAlgoDispatch constructor with non initialised algorithm");
+    }
+
     this->mAlgorithm = algorithm;
 }
 
diff --git a/src/OpTensorSyncDevice.cpp b/src/OpTensorSyncDevice.cpp
index 2cdd4e443..a73224297 100644
--- a/src/OpTensorSyncDevice.cpp
+++ b/src/OpTensorSyncDevice.cpp
@@ -19,6 +19,8 @@ OpTensorSyncDevice::OpTensorSyncDevice(
 OpTensorSyncDevice::~OpTensorSyncDevice()
 {
     KP_LOG_DEBUG("Kompute OpTensorSyncDevice destructor started");
+
+    this->mTensors.clear();
 }
 
 void
diff --git a/src/Sequence.cpp b/src/Sequence.cpp
index e341e734c..9424e119d 100644
--- a/src/Sequence.cpp
+++ b/src/Sequence.cpp
@@ -78,8 +78,7 @@ Sequence::eval()
 std::shared_ptr<Sequence>
 Sequence::eval(std::shared_ptr<OpBase> op) {
     this->clear();
-    this->record(op);
-    this->eval();
+    return this->record(op)->eval();
 }
 
 std::shared_ptr<Sequence>
@@ -88,6 +87,7 @@ Sequence::evalAsync()
     if (this->isRecording()) {
         this->end();
     }
+
     if (this->mIsRunning) {
         throw std::runtime_error("Kompute Sequence evalAsync called when an eval async was "
                     "called without successful wait");
diff --git a/src/include/kompute/operations/OpAlgoDispatch.hpp b/src/include/kompute/operations/OpAlgoDispatch.hpp
index 1b5ab1bf0..e61c3166d 100644
--- a/src/include/kompute/operations/OpAlgoDispatch.hpp
+++ b/src/include/kompute/operations/OpAlgoDispatch.hpp
@@ -17,7 +17,7 @@ class OpAlgoDispatch : public OpBase
 {
   public:
 
-    OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm);
+    OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm, bool skipAlgoInit = false);
 
     /**
      * Default destructor, which is in charge of destroying the algorithm
diff --git a/src/include/kompute/operations/OpMult.hpp b/src/include/kompute/operations/OpMult.hpp
index 992b0e8a0..fea38bdee 100644
--- a/src/include/kompute/operations/OpMult.hpp
+++ b/src/include/kompute/operations/OpMult.hpp
@@ -33,7 +33,7 @@ class OpMult : public OpAlgoDispatch
      * @param komputeWorkgroup Optional parameter to specify the layout for processing
      */
     OpMult(std::vector<std::shared_ptr<Tensor>> tensors, std::shared_ptr<Algorithm> algorithm)
-        : OpAlgoDispatch(algorithm)
+        : OpAlgoDispatch(algorithm, true)
     {
         KP_LOG_DEBUG("Kompute OpMult constructor with params");
 
diff --git a/test/TestAsyncOperations.cpp b/test/TestAsyncOperations.cpp
index 6e5ba8adf..da9f8d887 100644
--- a/test/TestAsyncOperations.cpp
+++ b/test/TestAsyncOperations.cpp
@@ -80,8 +80,11 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
 
     std::vector<std::shared_ptr<kp::Tensor>> inputsAsyncB;
 
+    std::vector<std::shared_ptr<kp::Algorithm>> algosAsync;
+
     for (uint32_t i = 0; i < numParallel; i++) {
         inputsAsyncB.push_back(mgr.tensor(data));
+        algosAsync.push_back(mgr.algorithm({inputsAsyncB[i]}, spirv));
     }
 
     std::vector<std::shared_ptr<kp::Sequence>> sqs;
@@ -93,7 +96,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
     auto startAsync = std::chrono::high_resolution_clock::now();
 
     for (uint32_t i = 0; i < numParallel; i++) {
-        sqs[i]->evalAsync<kp::OpAlgoDispatch>(algorithms[i]);
+        sqs[i]->evalAsync<kp::OpAlgoDispatch>(algosAsync[i]);
     }
 
     for (uint32_t i = 0; i < numParallel; i++) {
@@ -157,8 +160,8 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution)
 
     sq1->eval<kp::OpTensorSyncLocal>({ tensorA, tensorB });
 
-    std::shared_ptr<kp::Algorithm> algo1 = mgr.algorithm({tensorA});
-    std::shared_ptr<kp::Algorithm> algo2 = mgr.algorithm({tensorB});
+    std::shared_ptr<kp::Algorithm> algo1 = mgr.algorithm({tensorA}, spirv);
+    std::shared_ptr<kp::Algorithm> algo2 = mgr.algorithm({tensorB}, spirv);
 
     sq1->evalAsync<kp::OpAlgoDispatch>(algo1);
     sq2->evalAsync<kp::OpAlgoDispatch>(algo2);
diff --git a/test/TestDestroy.cpp b/test/TestDestroy.cpp
index 43b7c8e1a..cf753267e 100644
--- a/test/TestDestroy.cpp
+++ b/test/TestDestroy.cpp
@@ -74,7 +74,7 @@ TEST(TestDestroy, TestDestroyTensorVector)
             mgr.sequence()
                 ->record<kp::OpTensorSyncDevice>(algo->getTensors())
                 ->record<kp::OpAlgoDispatch>(algo)
-                ->record<kp::OpTensorSyncDevice>(algo->getTensors())
+                ->record<kp::OpTensorSyncLocal>(algo->getTensors())
                 ->eval();
 
             tensorA->destroy();
@@ -111,12 +111,14 @@ TEST(TestDestroy, TestDestroySequenceSingle)
 
             tensorA = mgr.tensor({0, 0, 0});
 
-            mgr.sequence()
+            sq = mgr.sequence()
                 ->record<kp::OpTensorSyncDevice>({tensorA})
                 ->record<kp::OpAlgoDispatch>(mgr.algorithm({tensorA}, spirv))
                 ->record<kp::OpTensorSyncLocal>({tensorA})
                 ->eval();
 
+            sq->destroy();
+
             EXPECT_FALSE(sq->isInit());
         }
     }
diff --git a/test/TestLogisticRegression.cpp b/test/TestLogisticRegression.cpp
index 16c08afb9..f7ad9eda1 100644
--- a/test/TestLogisticRegression.cpp
+++ b/test/TestLogisticRegression.cpp
@@ -32,13 +32,15 @@ TEST(TestLogisticRegression, TestMainLogisticRegression)
                                                         wIn, wOutI, wOutJ,
                                                         bIn, bOut,  lOut };
 
+        mgr.sequence()->eval<kp::OpTensorSyncDevice>(params);
+
         std::vector<uint32_t> spirv = std::vector<uint32_t>(
-            (uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
-            (uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv +
-              kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));
+            (uint32_t*)kp::shader_data::test_shaders_glsl_test_logistic_regression_comp_spv,
+            (uint32_t*)(kp::shader_data::test_shaders_glsl_test_logistic_regression_comp_spv +
+              kp::shader_data::test_shaders_glsl_test_logistic_regression_comp_spv_len));
 
         std::shared_ptr<kp::Algorithm> algorithm =
-            mgr.algorithm(params, spirv, kp::Workgroup(), kp::Constants({5.0}));
+            mgr.algorithm(params, spirv, kp::Workgroup({5}), kp::Constants({5.0}));
 
         std::shared_ptr<kp::Sequence> sq =
             mgr.sequence()
@@ -104,6 +106,8 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy)
                                                         wIn, wOutI, wOutJ,
                                                         bIn, bOut,  lOut };
 
+        mgr.sequence()->record<kp::OpTensorSyncDevice>(params)->eval();
+
         std::vector<uint32_t> spirv = std::vector<uint32_t>(
             (uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
             (uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv +