Further tests added to new structure

2021-02-25 22:33:08 +00:00 · 2021-02-25 22:33:08 +00:00 · 6378583a23
commit 6378583a23
parent 3f1288271d
17 changed files with 636 additions and 514 deletions
--- a/single_include/AggregateHeaders.cpp
+++ b/single_include/AggregateHeaders.cpp
@ -10,5 +10,6 @@
 #include "kompute/operations/OpTensorCopy.hpp"
 #include "kompute/operations/OpTensorSyncDevice.hpp"
 #include "kompute/operations/OpTensorSyncLocal.hpp"
+#include "kompute/operations/OpAlgoDispatch.hpp"
 #include "kompute/Algorithm.hpp"
 #include "kompute/Tensor.hpp"
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@ -928,7 +928,9 @@ class Tensor
    /**
     * Destroys and frees the GPU resources which include the buffer and memory.
     */
-    void freeMemoryDestroyGPUResources();
+    void destroy();
+
+    bool isInit();

    /**
     * Returns the vector of data currently contained by the Tensor. It is
@ -1129,10 +1131,6 @@ public:
            const Constants& specializationConstants = {},
            const Constants& pushConstants = {});

-    bool isInit();
-
-    void freeMemoryDestroyGPUResources();
-
    /**
     * Destructor for Algorithm which is responsible for freeing and desroying
     * respective pipelines and owned parameter groups.
@ -1149,11 +1147,21 @@ public:
     */
    void recordDispatch(std::shared_ptr<vk::CommandBuffer> commandBuffer);

+    bool isInit();
+
    void setWorkgroup(const Workgroup& workgroup, uint32_t minSize = 1);

+    const Workgroup& getWorkgroup();
+    const Constants& getSpecializationConstants();
+    const Constants& getPushConstants();
+    const std::vector<std::shared_ptr<Tensor>>& getTensors();
+
+    void destroy();
+
 private:
    // -------------- NEVER OWNED RESOURCES
    std::shared_ptr<vk::Device> mDevice;
+    std::vector<std::shared_ptr<Tensor>> mTensors;

    // -------------- OPTIONALLY OWNED RESOURCES
    std::shared_ptr<vk::DescriptorSetLayout> mDescriptorSetLayout;
@ -1184,7 +1192,7 @@ private:
    void createPipeline();

    // Parameters
-    void createParameters(const std::vector<std::shared_ptr<Tensor>>& tensorParams);
+    void createParameters();
 };

 } // End namespace kp
@ -1270,6 +1278,10 @@ class Sequence: public std::enable_shared_from_this<Sequence>
     */
    ~Sequence();

+    /**
+     */
+    std::shared_ptr<Sequence> record(std::shared_ptr<OpBase> op);
+
    /**
     * Record function for operation to be added to the GPU queue in batch. This
     * template requires classes to be derived from the OpBase class. This
@ -1280,7 +1292,146 @@ class Sequence: public std::enable_shared_from_this<Sequence>
     * @param TArgs Template parameters that are used to initialise operation
     * which allows for extensible configurations on initialisation.
     */
-    std::shared_ptr<Sequence> record(std::shared_ptr<OpBase> op);
+    template<typename T, typename... TArgs>
+    std::shared_ptr<Sequence>
+    record(std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
+    {
+        KP_LOG_DEBUG("Kompute Sequence record function started");
+
+        static_assert(std::is_base_of<OpBase, T>::value,
+                      "Kompute Sequence record(...) template only valid with "
+                      "OpBase derived classes");
+
+        KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
+        std::shared_ptr<T> op{
+            new T(tensors, std::forward<TArgs>(params)...) };
+
+        return this->record(op);
+    }
+    template<typename T, typename... TArgs>
+    std::shared_ptr<Sequence>
+    record(std::shared_ptr<Algorithm> algorithm, TArgs&&... params)
+    {
+        KP_LOG_DEBUG("Kompute Sequence record function started");
+
+        static_assert(std::is_base_of<OpBase, T>::value,
+                      "Kompute Sequence record(...) template only valid with "
+                      "OpBase derived classes");
+
+        KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
+        std::shared_ptr<T> op{
+            new T(algorithm, std::forward<TArgs>(params)...) };
+
+        return this->record(op);
+    }
+
+    /**
+     * Eval sends all the recorded and stored operations in the vector of
+     * operations into the gpu as a submit job with a barrier.
+     *
+     * @return shared_ptr<Sequence> of the Sequence class itself
+     */
+    std::shared_ptr<Sequence> eval();
+
+    std::shared_ptr<Sequence> eval(std::shared_ptr<OpBase> op);
+
+    /**
+     * Eval sends all the recorded and stored operations in the vector of
+     * operations into the gpu as a submit job with a barrier.
+     *
+     * @return shared_ptr<Sequence> of the Sequence class itself
+     */
+    template<typename T, typename... TArgs>
+    std::shared_ptr<Sequence>
+    eval(std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
+    {
+        KP_LOG_DEBUG("Kompute Sequence record function started");
+
+        static_assert(std::is_base_of<OpBase, T>::value,
+                      "Kompute Sequence record(...) template only valid with "
+                      "OpBase derived classes");
+
+        KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
+        std::shared_ptr<T> op{
+            new T(tensors, std::forward<TArgs>(params)...) };
+
+        return this->eval(op);
+    }
+    // Needded as otherise can't use initialiser list
+    template<typename T, typename... TArgs>
+    std::shared_ptr<Sequence>
+    eval(std::shared_ptr<Algorithm> algorithm, TArgs&&... params)
+    {
+        KP_LOG_DEBUG("Kompute Sequence record function started");
+
+        static_assert(std::is_base_of<OpBase, T>::value,
+                      "Kompute Sequence record(...) template only valid with "
+                      "OpBase derived classes");
+
+        KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
+        std::shared_ptr<T> op{
+            new T(algorithm, std::forward<TArgs>(params)...) };
+
+        return this->eval(op);
+    }
+
+    /**
+     * Eval Async sends all the recorded and stored operations in the vector of
+     * operations into the gpu as a submit job with a barrier. EvalAwait() must
+     * be called after to ensure the sequence is terminated correctly.
+     *
+     * @return Boolean stating whether execution was successful.
+     */
+    std::shared_ptr<Sequence> evalAsync();
+
+    /**
+     * Eval sends all the recorded and stored operations in the vector of
+     * operations into the gpu as a submit job with a barrier.
+     *
+     * @return shared_ptr<Sequence> of the Sequence class itself
+     */
+    template<typename T, typename... TArgs>
+    std::shared_ptr<Sequence>
+    evalAsync(std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
+    {
+        KP_LOG_DEBUG("Kompute Sequence record function started");
+
+        static_assert(std::is_base_of<OpBase, T>::value,
+                      "Kompute Sequence record(...) template only valid with "
+                      "OpBase derived classes");
+
+        KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
+        std::shared_ptr<T> op{
+            new T(tensors, std::forward<TArgs>(params)...) };
+
+        return this->evalAsync(op);
+    }
+    // Needed as otherwise it's not possible to use initializer lists
+    template<typename T, typename... TArgs>
+    std::shared_ptr<Sequence>
+    evalAsync(std::shared_ptr<Algorithm> algorithm, TArgs&&... params)
+    {
+        KP_LOG_DEBUG("Kompute Sequence record function started");
+
+        static_assert(std::is_base_of<OpBase, T>::value,
+                      "Kompute Sequence record(...) template only valid with "
+                      "OpBase derived classes");
+
+        KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
+        std::shared_ptr<T> op{
+            new T(algorithm, std::forward<TArgs>(params)...) };
+
+        return this->evalAsync(op);
+    }
+
+    /**
+     * Eval Await waits for the fence to finish processing and then once it
+     * finishes, it runs the postEval of all operations.
+     *
+     * @param waitFor Number of milliseconds to wait before timing out.
+     * @return Boolean stating whether execution was successful.
+     */
+    std::shared_ptr<Sequence> evalAwait(uint64_t waitFor = UINT64_MAX);

    /**
     * Clear function clears all operations currently recorded and starts recording again.
@ -1303,32 +1454,6 @@ class Sequence: public std::enable_shared_from_this<Sequence>
     */
    void end();

-    /**
-     * Eval sends all the recorded and stored operations in the vector of
-     * operations into the gpu as a submit job with a barrier.
-     *
-     * @return Boolean stating whether execution was successful.
-     */
-    std::shared_ptr<Sequence> eval();
-
-    /**
-     * Eval Async sends all the recorded and stored operations in the vector of
-     * operations into the gpu as a submit job with a barrier. EvalAwait() must
-     * be called after to ensure the sequence is terminated correctly.
-     *
-     * @return Boolean stating whether execution was successful.
-     */
-    std::shared_ptr<Sequence> evalAsync();
-
-    /**
-     * Eval Await waits for the fence to finish processing and then once it
-     * finishes, it runs the postEval of all operations.
-     *
-     * @param waitFor Number of milliseconds to wait before timing out.
-     * @return Boolean stating whether execution was successful.
-     */
-    std::shared_ptr<Sequence> evalAwait(uint64_t waitFor = UINT64_MAX);
-
    /**
     * Returns true if the sequence is currently in recording activated.
     *
@ -1336,6 +1461,8 @@ class Sequence: public std::enable_shared_from_this<Sequence>
     */
    bool isRecording();

+    bool isInit();
+
    /**
     * Returns true if the sequence is currently running - mostly used for async
     * workloads.
@ -1348,7 +1475,7 @@ class Sequence: public std::enable_shared_from_this<Sequence>
     * Destroys and frees the GPU resources which include the buffer and memory
     * and sets the sequence as init=False.
     */
-    void freeMemoryDestroyGPUResources();
+    void destroy();

  private:
    // -------------- NEVER OWNED RESOURCES
@ -1444,6 +1571,8 @@ class Manager
     * they would like to create the resources on.
     *
     * @param physicalDeviceIndex The index of the physical device to use
+     * @param manageResources (Optional) Whether to manage the memory of the
+     * resources created and destroy when the manager is destroyed.
     * @param familyQueueIndices (Optional) List of queue indices to add for
     * explicit allocation
     * @param totalQueues The total number of compute queues to create.
@ -1462,8 +1591,7 @@ class Manager
     */
    Manager(std::shared_ptr<vk::Instance> instance,
            std::shared_ptr<vk::PhysicalDevice> physicalDevice,
-            std::shared_ptr<vk::Device> device,
-            uint32_t physicalDeviceIndex);
+            std::shared_ptr<vk::Device> device);

    /**
     * Manager destructor which would ensure all owned resources are destroyed
@ -1506,12 +1634,14 @@ class Manager
            const Constants& specializationConstants = {},
            const Constants& pushConstants = {});

+    void destroy();
+    void clear();
+
  private:
    // -------------- OPTIONALLY OWNED RESOURCES
    std::shared_ptr<vk::Instance> mInstance = nullptr;
    bool mFreeInstance = false;
    std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice = nullptr;
-    uint32_t mPhysicalDeviceIndex = -1;
    std::shared_ptr<vk::Device> mDevice = nullptr;
    bool mFreeDevice = false;

@ -1523,7 +1653,7 @@ class Manager
    std::vector<uint32_t> mComputeQueueFamilyIndices;
    std::vector<std::shared_ptr<vk::Queue>> mComputeQueues;

-    uint32_t mCurrentSequenceIndex = -1;
+    bool mManageResources = false;

 #if DEBUG
 #ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS
@ -1534,7 +1664,7 @@ class Manager

    // Create functions
    void createInstance();
-    void createDevice(const std::vector<uint32_t>& familyQueueIndices = {});
+    void createDevice(const std::vector<uint32_t>& familyQueueIndices = {}, uint32_t hysicalDeviceIndex = 0);
 };

 } // End namespace kp
@ -1553,8 +1683,7 @@ class OpAlgoDispatch : public OpBase
 {
  public:

-    OpAlgoDispatch(const std::vector<std::shared_ptr<Tensor>>& tensors,
-           const std::shared_ptr<kp::Algorithm>& algorithm);
+    OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm);

    /**
     * Default destructor, which is in charge of destroying the algorithm
@ -1586,7 +1715,6 @@ class OpAlgoDispatch : public OpBase

 private:
    // -------------- ALWAYS OWNED RESOURCES
-    std::vector<std::shared_ptr<Tensor>> mTensors;
    std::shared_ptr<Algorithm> mAlgorithm;
 };

--- a/src/Algorithm.cpp
+++ b/src/Algorithm.cpp
@ -22,7 +22,7 @@ Algorithm::~Algorithm()
 {
    KP_LOG_DEBUG("Kompute Algorithm Destructor started");

-    this->freeMemoryDestroyGPUResources();
+    this->destroy();
 }

 void
@ -35,23 +35,35 @@ Algorithm::rebuild(
 {
    KP_LOG_DEBUG("Kompute Algorithm rebuild started");

-    this->setWorkgroup(workgroup);
+    this->mTensors = tensors;
    this->mSpirv = spirv;
    this->mSpecializationConstants = specializationConstants;
    this->mPushConstants = pushConstants;
+    this->setWorkgroup(workgroup);

    // Descriptor pool is created first so if available then destroy all before rebuild
    if (this->mFreeDescriptorPool) {
-        this->freeMemoryDestroyGPUResources();
+        this->destroy();
    }

-    this->createParameters(tensors);
+    this->createParameters();
    this->createShaderModule();
    this->createPipeline();
 }

+bool
+Algorithm::isInit() {
+    return this->mPipeline &&
+        this->mPipelineCache &&
+        this->mPipelineLayout &&
+        this->mDescriptorPool &&
+        this->mDescriptorSet &&
+        this->mDescriptorSetLayout &&
+        this->mShaderModule;
+}
+
 void
-Algorithm::freeMemoryDestroyGPUResources() {
+Algorithm::destroy() {

    if (!this->mDevice) {
        KP_LOG_WARN(
@ -68,6 +80,7 @@ Algorithm::freeMemoryDestroyGPUResources() {
        this->mDevice->destroy(
          *this->mPipeline,
          (vk::Optional<const vk::AllocationCallbacks>)nullptr);
+        this->mPipeline = nullptr;
    }

    if (this->mFreePipelineCache) {
@ -79,6 +92,7 @@ Algorithm::freeMemoryDestroyGPUResources() {
        this->mDevice->destroy(
          *this->mPipelineCache,
          (vk::Optional<const vk::AllocationCallbacks>)nullptr);
+        this->mPipelineCache = nullptr;
    }

    if (this->mFreePipelineLayout) {
@ -90,6 +104,7 @@ Algorithm::freeMemoryDestroyGPUResources() {
        this->mDevice->destroy(
          *this->mPipelineLayout,
          (vk::Optional<const vk::AllocationCallbacks>)nullptr);
+        this->mPipelineLayout = nullptr;
    }

    if (this->mFreeShaderModule) {
@ -101,6 +116,7 @@ Algorithm::freeMemoryDestroyGPUResources() {
        this->mDevice->destroy(
          *this->mShaderModule,
          (vk::Optional<const vk::AllocationCallbacks>)nullptr);
+        this->mShaderModule = nullptr;
    }

    if (this->mFreeDescriptorSet) {
@ -111,6 +127,7 @@ Algorithm::freeMemoryDestroyGPUResources() {
        }
        this->mDevice->freeDescriptorSets(
          *this->mDescriptorPool, 1, this->mDescriptorSet.get());
+        this->mDescriptorSet = nullptr;
    }

    if (this->mFreeDescriptorSetLayout) {
@ -122,6 +139,7 @@ Algorithm::freeMemoryDestroyGPUResources() {
        this->mDevice->destroy(
          *this->mDescriptorSetLayout,
          (vk::Optional<const vk::AllocationCallbacks>)nullptr);
+        this->mDescriptorSetLayout = nullptr;
    }

    if (this->mFreeDescriptorPool) {
@ -133,18 +151,19 @@ Algorithm::freeMemoryDestroyGPUResources() {
        this->mDevice->destroy(
          *this->mDescriptorPool,
          (vk::Optional<const vk::AllocationCallbacks>)nullptr);
+        this->mDescriptorPool = nullptr;
    }
 }

 void
-Algorithm::createParameters(const std::vector<std::shared_ptr<Tensor>>& tensorParams)
+Algorithm::createParameters()
 {
    KP_LOG_DEBUG("Kompute Algorithm createParameters started");

    std::vector<vk::DescriptorPoolSize> descriptorPoolSizes = {
        vk::DescriptorPoolSize(
          vk::DescriptorType::eStorageBuffer,
-          static_cast<uint32_t>(tensorParams.size()) // Descriptor count
+          static_cast<uint32_t>(this->mTensors.size()) // Descriptor count
          )
    };

@ -161,7 +180,7 @@ Algorithm::createParameters(const std::vector<std::shared_ptr<Tensor>>& tensorPa
    this->mFreeDescriptorPool = true;

    std::vector<vk::DescriptorSetLayoutBinding> descriptorSetBindings;
-    for (size_t i = 0; i < tensorParams.size(); i++) {
+    for (size_t i = 0; i < this->mTensors.size(); i++) {
        descriptorSetBindings.push_back(
          vk::DescriptorSetLayoutBinding(i, // Binding index
                                         vk::DescriptorType::eStorageBuffer,
@ -193,11 +212,11 @@ Algorithm::createParameters(const std::vector<std::shared_ptr<Tensor>>& tensorPa
    this->mFreeDescriptorSet = true;

    KP_LOG_DEBUG("Kompute Algorithm updating descriptor sets");
-    for (size_t i = 0; i < tensorParams.size(); i++) {
+    for (size_t i = 0; i < this->mTensors.size(); i++) {
        std::vector<vk::WriteDescriptorSet> computeWriteDescriptorSets;

        vk::DescriptorBufferInfo descriptorBufferInfo =
-          tensorParams[i]->constructDescriptorBufferInfo();
+          this->mTensors[i]->constructDescriptorBufferInfo();

        computeWriteDescriptorSets.push_back(
          vk::WriteDescriptorSet(*this->mDescriptorSet,
@ -377,4 +396,24 @@ Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize) {
    }
 }

+const Workgroup&
+Algorithm::getWorkgroup() {
+    return this->mWorkgroup;
+}
+
+const Constants&
+Algorithm::getSpecializationConstants() {
+    return this->mSpecializationConstants;
+}
+
+const Constants&
+Algorithm::getPushConstants() {
+    return this->mPushConstants;
+}
+
+const std::vector<std::shared_ptr<Tensor>>&
+Algorithm::getTensors() {
+    return this->mTensors;
+}
+
 }
--- a/src/Manager.cpp
+++ b/src/Manager.cpp
@ -33,26 +33,33 @@ Manager::Manager()
 Manager::Manager(uint32_t physicalDeviceIndex,
                 const std::vector<uint32_t>& familyQueueIndices)
 {
-    this->mPhysicalDeviceIndex = physicalDeviceIndex;
+    this->mManageResources = false;

    this->createInstance();
-    this->createDevice(familyQueueIndices);
+    this->createDevice(familyQueueIndices, physicalDeviceIndex);
 }

 Manager::Manager(std::shared_ptr<vk::Instance> instance,
                 std::shared_ptr<vk::PhysicalDevice> physicalDevice,
-                 std::shared_ptr<vk::Device> device,
-                 uint32_t physicalDeviceIndex)
+                 std::shared_ptr<vk::Device> device)
 {
+    this->mManageResources = true;
+
    this->mInstance = instance;
    this->mPhysicalDevice = physicalDevice;
    this->mDevice = device;
-    this->mPhysicalDeviceIndex = physicalDeviceIndex;
 }

 Manager::~Manager()
 {
    KP_LOG_DEBUG("Kompute Manager Destructor started");
+    this->destroy();
+}
+
+void
+Manager::destroy() {
+
+    KP_LOG_DEBUG("Kompute Manager destroy() started");

    if (this->mDevice == nullptr) {
        KP_LOG_ERROR(
@ -60,32 +67,32 @@ Manager::~Manager()
        return;
    }

-    if (this->mManagedSequences.size()) {
+    if (this->mManageResources && this->mManagedSequences.size()) {
        KP_LOG_DEBUG("Kompute Manager explicitly running destructor for "
                     "managed sequences");
        for (const std::weak_ptr<Sequence>& weakSq : this->mManagedSequences) {
            if (std::shared_ptr<Sequence> sq = weakSq.lock()) {
-                sq->freeMemoryDestroyGPUResources();
+                sq->destroy();
            }
        }
        this->mManagedSequences.clear();
    }

-    if (this->mManagedAlgorithms.size()) {
+    if (this->mManageResources && this->mManagedAlgorithms.size()) {
        KP_LOG_DEBUG("Kompute Manager explicitly freeing algorithms");
        for (const std::weak_ptr<Algorithm>& weakAlgorithm : this->mManagedAlgorithms) {
            if (std::shared_ptr<Algorithm> algorithm = weakAlgorithm.lock()) {
-                algorithm->freeMemoryDestroyGPUResources();
+                algorithm->destroy();
            }
        }
        this->mManagedAlgorithms.clear();
    }

-    if (this->mManagedTensors.size()) {
+    if (this->mManageResources && this->mManagedTensors.size()) {
        KP_LOG_DEBUG("Kompute Manager explicitly freeing tensors");
        for (const std::weak_ptr<Tensor>& weakTensor : this->mManagedTensors) {
            if (std::shared_ptr<Tensor> tensor = weakTensor.lock()) {
-                tensor->freeMemoryDestroyGPUResources();
+                tensor->destroy();
            }
        }
        this->mManagedTensors.clear();
@ -95,6 +102,7 @@ Manager::~Manager()
        KP_LOG_INFO("Destroying device");
        this->mDevice->destroy(
          (vk::Optional<const vk::AllocationCallbacks>)nullptr);
+        this->mDevice = nullptr;
        KP_LOG_DEBUG("Kompute Manager Destroyed Device");
    }

@ -109,6 +117,7 @@ Manager::~Manager()
    if (this->mDebugReportCallback) {
        this->mInstance->destroyDebugReportCallbackEXT(
          this->mDebugReportCallback, nullptr, this->mDebugDispatcher);
+        this->mInstance = nullptr;
        KP_LOG_DEBUG("Kompute Manager Destroyed Debug Report Callback");
    }
 #endif
@ -117,6 +126,7 @@ Manager::~Manager()
    if (this->mFreeInstance) {
        this->mInstance->destroy(
          (vk::Optional<const vk::AllocationCallbacks>)nullptr);
+        this->mInstance = nullptr;
        KP_LOG_DEBUG("Kompute Manager Destroyed Instance");
    }
 }
@ -207,7 +217,31 @@ Manager::createInstance()
 }

 void
-Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices)
+Manager::clear() {
+    if (this->mManageResources) {
+        this->mManagedTensors.erase(
+                std::remove_if(
+                    begin(this->mManagedTensors),
+                    end(this->mManagedTensors),
+                    [](std::weak_ptr<Tensor> t) {return t.expired();}),
+                end(this->mManagedTensors));
+        this->mManagedAlgorithms.erase(
+                std::remove_if(
+                    begin(this->mManagedAlgorithms),
+                    end(this->mManagedAlgorithms),
+                    [](std::weak_ptr<Algorithm> t) {return t.expired();}),
+                end(this->mManagedAlgorithms));
+        this->mManagedSequences.erase(
+                std::remove_if(
+                    begin(this->mManagedSequences),
+                    end(this->mManagedSequences),
+                    [](std::weak_ptr<Sequence> t) {return t.expired();}),
+                end(this->mManagedSequences));
+    }
+}
+
+void
+Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices, uint32_t physicalDeviceIndex)
 {

    KP_LOG_DEBUG("Kompute Manager creating Device");
@ -215,7 +249,7 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices)
    if (this->mInstance == nullptr) {
        throw std::runtime_error("Kompute Manager instance is null");
    }
-    if (this->mPhysicalDeviceIndex < 0) {
+    if (physicalDeviceIndex < 0) {
        throw std::runtime_error(
          "Kompute Manager physical device index not provided");
    }
@ -226,7 +260,7 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices)
      this->mInstance->enumeratePhysicalDevices();

    vk::PhysicalDevice physicalDevice =
-      physicalDevices[this->mPhysicalDeviceIndex];
+      physicalDevices[physicalDeviceIndex];

    this->mPhysicalDevice =
      std::make_shared<vk::PhysicalDevice>(physicalDevice);
@ -235,7 +269,7 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices)
      physicalDevice.getProperties();

    KP_LOG_INFO("Using physical device index {} found {}",
-                this->mPhysicalDeviceIndex,
+                physicalDeviceIndex,
                physicalDeviceProperties.deviceName);

    if (!familyQueueIndices.size()) {
@ -321,7 +355,9 @@ Manager::tensor(
    std::shared_ptr<Tensor> tensor{
              new kp::Tensor(this->mPhysicalDevice, this->mDevice, data, tensorType) };

-    this->mManagedTensors.push_back(tensor);
+ 	if (this->mManageResources) {
+        this->mManagedTensors.push_back(tensor);
+    }

    return tensor;
 }
@ -345,7 +381,9 @@ Manager::algorithm(
                specializationConstants,
                pushConstants)};

-    this->mManagedAlgorithms.push_back(algorithm);
+ 	if (this->mManageResources) {
+        this->mManagedAlgorithms.push_back(algorithm);
+    }

    return algorithm;
 }
@ -362,7 +400,9 @@ Manager::sequence(uint32_t queueIndex)
                         this->mComputeQueues[queueIndex],
                         this->mComputeQueueFamilyIndices[queueIndex]) };

-    this->mManagedSequences.push_back(sq);
+ 	if (this->mManageResources) {
+        this->mManagedSequences.push_back(sq);
+    }

    return sq;
 }
--- a/src/OpAlgoDispatch.cpp
+++ b/src/OpAlgoDispatch.cpp
@ -4,12 +4,10 @@

 namespace kp {

-OpAlgoDispatch::OpAlgoDispatch(const std::vector<std::shared_ptr<Tensor>>& tensors,
-           const std::shared_ptr<kp::Algorithm>& algorithm)
+OpAlgoDispatch::OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm)
 {
    KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor");

-    this->mTensors = tensors;
    this->mAlgorithm = algorithm;
 }

@ -24,7 +22,7 @@ OpAlgoDispatch::record(std::shared_ptr<vk::CommandBuffer> commandBuffer)
    KP_LOG_DEBUG("Kompute OpAlgoDispatch record called");

    // Barrier to ensure the data is finished writing to buffer memory
-    for (std::shared_ptr<Tensor> tensor : this->mTensors) {
+    for (const std::shared_ptr<Tensor>& tensor : this->mAlgorithm->getTensors()) {
        tensor->recordBufferMemoryBarrier(
          commandBuffer,
          vk::AccessFlagBits::eHostWrite,
--- a/src/Sequence.cpp
+++ b/src/Sequence.cpp
@ -61,6 +61,12 @@ Sequence::end()
    }
 }

+void
+Sequence::clear() {
+    KP_LOG_DEBUG("Kompute Sequence  calling clear");
+    this->end();
+}
+
 std::shared_ptr<Sequence>
 Sequence::eval()
 {
@ -69,6 +75,13 @@ Sequence::eval()
    return this->evalAsync()->evalAwait();
 }

+std::shared_ptr<Sequence>
+Sequence::eval(std::shared_ptr<OpBase> op) {
+    this->clear();
+    this->record(op);
+    this->eval();
+}
+
 std::shared_ptr<Sequence>
 Sequence::evalAsync()
 {
@ -138,8 +151,16 @@ Sequence::isRecording()
    return this->mRecording;
 }

+bool
+Sequence::isInit() {
+    return this->mDevice &&
+        this->mCommandPool &&
+        this->mCommandBuffer &&
+        this->mComputeQueue;
+}
+
 void
-Sequence::freeMemoryDestroyGPUResources()
+Sequence::destroy()
 {
    KP_LOG_DEBUG("Kompute Sequence freeMemoryDestroyGPUResources called");

@ -189,6 +210,16 @@ Sequence::freeMemoryDestroyGPUResources()
        this->mOperations.clear();
    }

+    if (this->mDevice) {
+        this->mDevice = nullptr;
+    }
+    if (this->mPhysicalDevice) {
+        this->mPhysicalDevice = nullptr;
+    }
+    if (this->mComputeQueue) {
+        this->mComputeQueue = nullptr;
+    }
+
 }

 std::shared_ptr<Sequence>
--- a/src/Tensor.cpp
+++ b/src/Tensor.cpp
@ -76,6 +76,15 @@ Tensor::tensorType()
    return this->mTensorType;
 }

+bool
+Tensor::isInit() {
+    return this->mDevice &&
+        this->mPrimaryBuffer &&
+        this->mPrimaryMemory &&
+        this->mStagingBuffer &&
+        this->mStagingMemory;
+}
+
 void
 Tensor::setData(const std::vector<float>& data)
 {
@ -429,7 +438,7 @@ Tensor::allocateBindMemory(std::shared_ptr<vk::Buffer> buffer,
 }

 void
-Tensor::freeMemoryDestroyGPUResources()
+Tensor::destroy()
 {
    KP_LOG_DEBUG("Kompute Tensor started freeMemoryDestroyGPUResources()");

@ -495,6 +504,10 @@ Tensor::freeMemoryDestroyGPUResources()
        }
    }

+    if (this->mDevice) {
+        this->mDevice = nullptr;
+    }
+
    KP_LOG_DEBUG("Kompute Tensor successful freeMemoryDestroyGPUResources()");
 }

--- a/src/include/kompute/Algorithm.hpp
+++ b/src/include/kompute/Algorithm.hpp
@ -45,10 +45,6 @@ public:
            const Constants& specializationConstants = {},
            const Constants& pushConstants = {});

-    bool isInit();
-
-    void freeMemoryDestroyGPUResources();
-
    /**
     * Destructor for Algorithm which is responsible for freeing and desroying
     * respective pipelines and owned parameter groups.
@ -65,11 +61,21 @@ public:
     */
    void recordDispatch(std::shared_ptr<vk::CommandBuffer> commandBuffer);

+    bool isInit();
+
    void setWorkgroup(const Workgroup& workgroup, uint32_t minSize = 1);

+    const Workgroup& getWorkgroup();
+    const Constants& getSpecializationConstants();
+    const Constants& getPushConstants();
+    const std::vector<std::shared_ptr<Tensor>>& getTensors();
+
+    void destroy();
+
 private:
    // -------------- NEVER OWNED RESOURCES
    std::shared_ptr<vk::Device> mDevice;
+    std::vector<std::shared_ptr<Tensor>> mTensors;

    // -------------- OPTIONALLY OWNED RESOURCES
    std::shared_ptr<vk::DescriptorSetLayout> mDescriptorSetLayout;
@ -100,7 +106,7 @@ private:
    void createPipeline();

    // Parameters
-    void createParameters(const std::vector<std::shared_ptr<Tensor>>& tensorParams);
+    void createParameters();
 };

 } // End namespace kp
--- a/src/include/kompute/Manager.hpp
+++ b/src/include/kompute/Manager.hpp
@ -30,6 +30,8 @@ class Manager
     * they would like to create the resources on.
     *
     * @param physicalDeviceIndex The index of the physical device to use
+     * @param manageResources (Optional) Whether to manage the memory of the
+     * resources created and destroy when the manager is destroyed.
     * @param familyQueueIndices (Optional) List of queue indices to add for
     * explicit allocation
     * @param totalQueues The total number of compute queues to create.
@ -48,8 +50,7 @@ class Manager
     */
    Manager(std::shared_ptr<vk::Instance> instance,
            std::shared_ptr<vk::PhysicalDevice> physicalDevice,
-            std::shared_ptr<vk::Device> device,
-            uint32_t physicalDeviceIndex);
+            std::shared_ptr<vk::Device> device);

    /**
     * Manager destructor which would ensure all owned resources are destroyed
@ -92,12 +93,14 @@ class Manager
            const Constants& specializationConstants = {},
            const Constants& pushConstants = {});

+    void destroy();
+    void clear();
+
  private:
    // -------------- OPTIONALLY OWNED RESOURCES
    std::shared_ptr<vk::Instance> mInstance = nullptr;
    bool mFreeInstance = false;
    std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice = nullptr;
-    uint32_t mPhysicalDeviceIndex = -1;
    std::shared_ptr<vk::Device> mDevice = nullptr;
    bool mFreeDevice = false;

@ -109,7 +112,7 @@ class Manager
    std::vector<uint32_t> mComputeQueueFamilyIndices;
    std::vector<std::shared_ptr<vk::Queue>> mComputeQueues;

-    uint32_t mCurrentSequenceIndex = -1;
+    bool mManageResources = false;

 #if DEBUG
 #ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS
@ -120,7 +123,7 @@ class Manager

    // Create functions
    void createInstance();
-    void createDevice(const std::vector<uint32_t>& familyQueueIndices = {});
+    void createDevice(const std::vector<uint32_t>& familyQueueIndices = {}, uint32_t hysicalDeviceIndex = 0);
 };

 } // End namespace kp
--- a/src/include/kompute/Sequence.hpp
+++ b/src/include/kompute/Sequence.hpp
@ -31,6 +31,10 @@ class Sequence: public std::enable_shared_from_this<Sequence>
     */
    ~Sequence();

+    /**
+     */
+    std::shared_ptr<Sequence> record(std::shared_ptr<OpBase> op);
+
    /**
     * Record function for operation to be added to the GPU queue in batch. This
     * template requires classes to be derived from the OpBase class. This
@ -41,7 +45,148 @@ class Sequence: public std::enable_shared_from_this<Sequence>
     * @param TArgs Template parameters that are used to initialise operation
     * which allows for extensible configurations on initialisation.
     */
-    std::shared_ptr<Sequence> record(std::shared_ptr<OpBase> op);
+    template<typename T, typename... TArgs>
+    std::shared_ptr<Sequence>
+    record(std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
+    {
+        KP_LOG_DEBUG("Kompute Sequence record function started");
+
+        static_assert(std::is_base_of<OpBase, T>::value,
+                      "Kompute Sequence record(...) template only valid with "
+                      "OpBase derived classes");
+
+        KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
+        std::shared_ptr<T> op{
+            new T(tensors, std::forward<TArgs>(params)...) };
+
+        return this->record(op);
+    }
+    template<typename T, typename... TArgs>
+    std::shared_ptr<Sequence>
+    record(std::shared_ptr<Algorithm> algorithm, TArgs&&... params)
+    {
+        KP_LOG_DEBUG("Kompute Sequence record function started");
+
+        static_assert(std::is_base_of<OpBase, T>::value,
+                      "Kompute Sequence record(...) template only valid with "
+                      "OpBase derived classes");
+
+        KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
+        std::shared_ptr<T> op{
+            new T(algorithm, std::forward<TArgs>(params)...) };
+
+        return this->record(op);
+    }
+
+    /**
+     * Eval sends all the recorded and stored operations in the vector of
+     * operations into the gpu as a submit job with a barrier.
+     *
+     * @return shared_ptr<Sequence> of the Sequence class itself
+     */
+    std::shared_ptr<Sequence> eval();
+
+    std::shared_ptr<Sequence> eval(std::shared_ptr<OpBase> op);
+
+    /**
+     * Eval sends all the recorded and stored operations in the vector of
+     * operations into the gpu as a submit job with a barrier.
+     *
+     * @return shared_ptr<Sequence> of the Sequence class itself
+     */
+    // TODO: Aim to have only a single function with tensors/algorithm
+    template<typename T, typename... TArgs>
+    std::shared_ptr<Sequence>
+    eval(std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
+    {
+        KP_LOG_DEBUG("Kompute Sequence record function started");
+
+        static_assert(std::is_base_of<OpBase, T>::value,
+                      "Kompute Sequence record(...) template only valid with "
+                      "OpBase derived classes");
+
+        KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
+        std::shared_ptr<T> op{
+            new T(tensors, std::forward<TArgs>(params)...) };
+
+        // TODO: Aim to be able to handle errors when returning without throw except
+        return this->eval(op);
+    }
+    // Needded as otherise can't use initialiser list
+    template<typename T, typename... TArgs>
+    std::shared_ptr<Sequence>
+    eval(std::shared_ptr<Algorithm> algorithm, TArgs&&... params)
+    {
+        KP_LOG_DEBUG("Kompute Sequence record function started");
+
+        static_assert(std::is_base_of<OpBase, T>::value,
+                      "Kompute Sequence record(...) template only valid with "
+                      "OpBase derived classes");
+
+        KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
+        std::shared_ptr<T> op{
+            new T(algorithm, std::forward<TArgs>(params)...) };
+
+        return this->eval(op);
+    }
+
+    /**
+     * Eval Async sends all the recorded and stored operations in the vector of
+     * operations into the gpu as a submit job with a barrier. EvalAwait() must
+     * be called after to ensure the sequence is terminated correctly.
+     *
+     * @return Boolean stating whether execution was successful.
+     */
+    std::shared_ptr<Sequence> evalAsync();
+
+    /**
+     * Eval sends all the recorded and stored operations in the vector of
+     * operations into the gpu as a submit job with a barrier.
+     *
+     * @return shared_ptr<Sequence> of the Sequence class itself
+     */
+    template<typename T, typename... TArgs>
+    std::shared_ptr<Sequence>
+    evalAsync(std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
+    {
+        KP_LOG_DEBUG("Kompute Sequence record function started");
+
+        static_assert(std::is_base_of<OpBase, T>::value,
+                      "Kompute Sequence record(...) template only valid with "
+                      "OpBase derived classes");
+
+        KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
+        std::shared_ptr<T> op{
+            new T(tensors, std::forward<TArgs>(params)...) };
+
+        return this->evalAsync(op);
+    }
+    // Needed as otherwise it's not possible to use initializer lists
+    template<typename T, typename... TArgs>
+    std::shared_ptr<Sequence>
+    evalAsync(std::shared_ptr<Algorithm> algorithm, TArgs&&... params)
+    {
+        KP_LOG_DEBUG("Kompute Sequence record function started");
+
+        static_assert(std::is_base_of<OpBase, T>::value,
+                      "Kompute Sequence record(...) template only valid with "
+                      "OpBase derived classes");
+
+        KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
+        std::shared_ptr<T> op{
+            new T(algorithm, std::forward<TArgs>(params)...) };
+
+        return this->evalAsync(op);
+    }
+
+    /**
+     * Eval Await waits for the fence to finish processing and then once it
+     * finishes, it runs the postEval of all operations.
+     *
+     * @param waitFor Number of milliseconds to wait before timing out.
+     * @return Boolean stating whether execution was successful.
+     */
+    std::shared_ptr<Sequence> evalAwait(uint64_t waitFor = UINT64_MAX);

    /**
     * Clear function clears all operations currently recorded and starts recording again.
@ -64,32 +209,6 @@ class Sequence: public std::enable_shared_from_this<Sequence>
     */
    void end();

-    /**
-     * Eval sends all the recorded and stored operations in the vector of
-     * operations into the gpu as a submit job with a barrier.
-     *
-     * @return Boolean stating whether execution was successful.
-     */
-    std::shared_ptr<Sequence> eval();
-
-    /**
-     * Eval Async sends all the recorded and stored operations in the vector of
-     * operations into the gpu as a submit job with a barrier. EvalAwait() must
-     * be called after to ensure the sequence is terminated correctly.
-     *
-     * @return Boolean stating whether execution was successful.
-     */
-    std::shared_ptr<Sequence> evalAsync();
-
-    /**
-     * Eval Await waits for the fence to finish processing and then once it
-     * finishes, it runs the postEval of all operations.
-     *
-     * @param waitFor Number of milliseconds to wait before timing out.
-     * @return Boolean stating whether execution was successful.
-     */
-    std::shared_ptr<Sequence> evalAwait(uint64_t waitFor = UINT64_MAX);
-
    /**
     * Returns true if the sequence is currently in recording activated.
     *
@ -97,6 +216,9 @@ class Sequence: public std::enable_shared_from_this<Sequence>
     */
    bool isRecording();

+
+    bool isInit();
+
    /**
     * Returns true if the sequence is currently running - mostly used for async
     * workloads.
@ -109,7 +231,7 @@ class Sequence: public std::enable_shared_from_this<Sequence>
     * Destroys and frees the GPU resources which include the buffer and memory
     * and sets the sequence as init=False.
     */
-    void freeMemoryDestroyGPUResources();
+    void destroy();

  private:
    // -------------- NEVER OWNED RESOURCES
--- a/src/include/kompute/Tensor.hpp
+++ b/src/include/kompute/Tensor.hpp
@ -59,7 +59,9 @@ class Tensor
    /**
     * Destroys and frees the GPU resources which include the buffer and memory.
     */
-    void freeMemoryDestroyGPUResources();
+    void destroy();
+
+    bool isInit();

    /**
     * Returns the vector of data currently contained by the Tensor. It is
--- a/src/include/kompute/operations/OpAlgoDispatch.hpp
+++ b/src/include/kompute/operations/OpAlgoDispatch.hpp
@ -17,8 +17,7 @@ class OpAlgoDispatch : public OpBase
 {
  public:

-    OpAlgoDispatch(const std::vector<std::shared_ptr<Tensor>>& tensors,
-           const std::shared_ptr<kp::Algorithm>& algorithm);
+    OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm);

    /**
     * Default destructor, which is in charge of destroying the algorithm
@ -50,7 +49,6 @@ class OpAlgoDispatch : public OpBase

 private:
    // -------------- ALWAYS OWNED RESOURCES
-    std::vector<std::shared_ptr<Tensor>> mTensors;
    std::shared_ptr<Algorithm> mAlgorithm;
 };

--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@ -11,8 +11,7 @@ else()
 endif()

 file(GLOB test_kompute_CPP
-    "${CMAKE_CURRENT_SOURCE_DIR}/TestMain.cpp"
-    "${CMAKE_CURRENT_SOURCE_DIR}/TestWorkgroup.cpp"
+    "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp"
 )

 add_executable(test_kompute ${test_kompute_CPP})
--- a/test/TestAsyncOperations.cpp
+++ b/test/TestAsyncOperations.cpp
@ -37,25 +37,32 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
        }
    )");

+    std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
+
    std::vector<float> data(size, 0.0);
    std::vector<float> resultSync(size, 100000000);
    std::vector<float> resultAsync(size, 100000000);

    kp::Manager mgr;

+    std::shared_ptr<kp::Sequence> sq = mgr.sequence();
+
    std::vector<std::shared_ptr<kp::Tensor>> inputsSyncB;
+    std::vector<std::shared_ptr<kp::Algorithm>> algorithms;

    for (uint32_t i = 0; i < numParallel; i++) {
-        inputsSyncB.push_back(std::make_shared<kp::Tensor>(kp::Tensor(data)));
+        inputsSyncB.push_back(mgr.tensor(data));
+        algorithms.push_back(mgr.algorithm({ inputsSyncB[i] }, spirv));
    }

-    mgr.rebuild(inputsSyncB);
+    sq->eval<kp::OpTensorSyncDevice>(inputsSyncB);
+
+    mgr.sequence()->eval<kp::OpTensorSyncDevice>(inputsSyncB);

    auto startSync = std::chrono::high_resolution_clock::now();

    for (uint32_t i = 0; i < numParallel; i++) {
-        mgr.evalOpDefault<kp::OpAlgoCreate>(
-          { inputsSyncB[i] }, kp::Shader::compile_source(shader));
+        sq->eval<kp::OpAlgoDispatch>(algorithms[i]);
    }

    auto endSync = std::chrono::high_resolution_clock::now();
@ -63,7 +70,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
      std::chrono::duration_cast<std::chrono::microseconds>(endSync - startSync)
        .count();

-    mgr.evalOpDefault<kp::OpTensorSyncLocal>(inputsSyncB);
+    sq->eval<kp::OpTensorSyncLocal>(inputsSyncB);

    for (uint32_t i = 0; i < numParallel; i++) {
        EXPECT_EQ(inputsSyncB[i]->data(), resultSync);
@ -74,26 +81,23 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
    std::vector<std::shared_ptr<kp::Tensor>> inputsAsyncB;

    for (uint32_t i = 0; i < numParallel; i++) {
-        inputsAsyncB.push_back(std::make_shared<kp::Tensor>(kp::Tensor(data)));
+        inputsAsyncB.push_back(mgr.tensor(data));
    }

-    mgrAsync.rebuild(inputsAsyncB);
+    std::vector<std::shared_ptr<kp::Sequence>> sqs;

    for (uint32_t i = 0; i < numParallel; i++) {
-        mgrAsync.sequence("async" + std::to_string(i), i);
+        sqs.push_back(mgrAsync.sequence(i));
    }

    auto startAsync = std::chrono::high_resolution_clock::now();

    for (uint32_t i = 0; i < numParallel; i++) {
-        mgrAsync.evalOpAsync<kp::OpAlgoCreate>(
-          { inputsAsyncB[i] },
-          "async" + std::to_string(i),
-          kp::Shader::compile_source(shader));
+        sqs[i]->evalAsync<kp::OpAlgoDispatch>(algorithms[i]);
    }

    for (uint32_t i = 0; i < numParallel; i++) {
-        mgrAsync.evalOpAwait("async" + std::to_string(i));
+        sqs[i]->evalAwait();
    }

    auto endAsync = std::chrono::high_resolution_clock::now();
@ -101,7 +105,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
                           endAsync - startAsync)
                           .count();

-    mgrAsync.evalOpDefault<kp::OpTensorSyncLocal>({ inputsAsyncB });
+    sq->eval<kp::OpTensorSyncLocal>({ inputsAsyncB });

    for (uint32_t i = 0; i < numParallel; i++) {
        EXPECT_EQ(inputsAsyncB[i]->data(), resultAsync);
@ -138,32 +142,32 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution)
        }
    )");

+    std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
+
    std::vector<float> data(size, 0.0);
    std::vector<float> resultAsync(size, 100000000);

    kp::Manager mgr;

-    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(data) };
-    std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(data) };
+    std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(data);
+    std::shared_ptr<kp::Tensor> tensorB = mgr.tensor(data);

-    mgr.sequence("asyncOne");
-    mgr.sequence("asyncTwo");
+    std::shared_ptr<kp::Sequence> sq1 = mgr.sequence();
+    std::shared_ptr<kp::Sequence> sq2 = mgr.sequence();

-    mgr.rebuild({ tensorA, tensorB });
+    sq1->eval<kp::OpTensorSyncLocal>({ tensorA, tensorB });

-    std::vector<uint32_t> result = kp::Shader::compile_source(shader);
+    std::shared_ptr<kp::Algorithm> algo1 = mgr.algorithm({tensorA});
+    std::shared_ptr<kp::Algorithm> algo2 = mgr.algorithm({tensorB});

-    mgr.evalOpAsync<kp::OpAlgoCreate>(
-      { tensorA }, "asyncOne", kp::Shader::compile_source(shader));
+    sq1->evalAsync<kp::OpAlgoDispatch>(algo1);
+    sq2->evalAsync<kp::OpAlgoDispatch>(algo2);

-    mgr.evalOpAsync<kp::OpAlgoCreate>(
-      { tensorB }, "asyncTwo", kp::Shader::compile_source(shader));
+    sq1->evalAwait();
+    sq2->evalAwait();

-    mgr.evalOpAwait("asyncOne");
-    mgr.evalOpAwait("asyncTwo");
-
-    mgr.evalOpAsyncDefault<kp::OpTensorSyncLocal>({ tensorA, tensorB });
-    mgr.evalOpAwaitDefault();
+    sq1->evalAsync<kp::OpTensorSyncLocal>({ tensorA, tensorB });
+    sq1->evalAwait();

    EXPECT_EQ(tensorA->data(), resultAsync);
    EXPECT_EQ(tensorB->data(), resultAsync);
--- a/test/TestDestroy.cpp
+++ b/test/TestDestroy.cpp
@ -5,7 +5,7 @@

 TEST(TestDestroy, TestDestroyTensorSingle)
 {
-    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
+    std::shared_ptr<kp::Tensor> tensorA = nullptr;

    std::string shader(R"(
      #version 450
@ -16,37 +16,36 @@ TEST(TestDestroy, TestDestroyTensorSingle)
          pa[index] = pa[index] + 1;
      })");

+    std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
+
    {
        std::shared_ptr<kp::Sequence> sq = nullptr;

        {
            kp::Manager mgr;

-            mgr.rebuild({ tensorA });
+            tensorA = mgr.tensor({ 0, 0, 0 });

-            sq = mgr.sequence();
+            std::shared_ptr<kp::Algorithm> algo =
+                mgr.algorithm({ tensorA }, spirv);

-            sq->begin();
-            sq->record<kp::OpAlgoCreate>(
-              { tensorA }, kp::Shader::compile_source(shader));
-            sq->end();
-
-            sq->eval();
-
-            mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
-
-            mgr.destroy(tensorA);
+            mgr.sequence()
+                ->record<kp::OpAlgoDispatch>(algo)
+                ->eval()
+                ->eval<kp::OpTensorSyncLocal>(algo->getTensors());

+            tensorA->destroy();
            EXPECT_FALSE(tensorA->isInit());
        }
+        EXPECT_FALSE(tensorA->isInit());
    }
    EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
 }

 TEST(TestDestroy, TestDestroyTensorVector)
 {
-    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 1, 1, 1 }) };
-    std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 1, 1, 1 }) };
+    std::shared_ptr<kp::Tensor> tensorA = nullptr;
+    std::shared_ptr<kp::Tensor> tensorB = nullptr;

    std::string shader(R"(
      #version 450
@ -58,6 +57,7 @@ TEST(TestDestroy, TestDestroyTensorVector)
          pa[index] = pa[index] + 1;
          pb[index] = pb[index] + 2;
      })");
+    std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);

    {
        std::shared_ptr<kp::Sequence> sq = nullptr;
@ -65,20 +65,20 @@ TEST(TestDestroy, TestDestroyTensorVector)
        {
            kp::Manager mgr;

-            mgr.rebuild({ tensorA, tensorB });
+            tensorA = mgr.tensor({ 1, 1, 1 });
+            tensorB = mgr.tensor({ 1, 1, 1 });

-            sq = mgr.sequence();
+            std::shared_ptr<kp::Algorithm> algo = 
+                mgr.algorithm({tensorA, tensorB}, spirv);

-            sq->begin();
-            sq->record<kp::OpAlgoCreate>(
-              { tensorA, tensorB }, kp::Shader::compile_source(shader));
-            sq->end();
+            mgr.sequence()
+                ->record<kp::OpTensorSyncDevice>(algo->getTensors())
+                ->record<kp::OpAlgoDispatch>(algo)
+                ->record<kp::OpTensorSyncDevice>(algo->getTensors())
+                ->eval();

-            sq->eval();
-
-            mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA, tensorB });
-
-            mgr.destroy({ tensorA, tensorB });
+            tensorA->destroy();
+            tensorB->destroy();

            EXPECT_FALSE(tensorA->isInit());
            EXPECT_FALSE(tensorB->isInit());
@ -88,32 +88,9 @@ TEST(TestDestroy, TestDestroyTensorVector)
    EXPECT_EQ(tensorB->data(), std::vector<float>({ 3, 3, 3 }));
 }

-TEST(TestDestroy, TestDestroyTensorVectorUninitialised)
-{
-    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 1, 1, 1 }) };
-    std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 1, 1, 1 }) };
-
-    {
-        std::shared_ptr<kp::Sequence> sq = nullptr;
-
-        {
-            kp::Manager mgr;
-
-            mgr.rebuild({ tensorA, tensorB });
-
-            mgr.destroy({ tensorA, tensorB });
-
-            EXPECT_FALSE(tensorA->isInit());
-            EXPECT_FALSE(tensorB->isInit());
-        }
-    }
-    EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
-    EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
-}
-
 TEST(TestDestroy, TestDestroySequenceSingle)
 {
-    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
+    std::shared_ptr<kp::Tensor> tensorA = nullptr;

    std::string shader(R"(
      #version 450
@ -124,26 +101,21 @@ TEST(TestDestroy, TestDestroySequenceSingle)
          pa[index] = pa[index] + 1;
      })");

+    std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
+
    {
        std::shared_ptr<kp::Sequence> sq = nullptr;

        {
            kp::Manager mgr;

-            mgr.rebuild({ tensorA });
+            tensorA = mgr.tensor({0, 0, 0});

-            sq = mgr.sequence();
-
-            sq->begin();
-            sq->record<kp::OpAlgoCreate>(
-              { tensorA }, kp::Shader::compile_source(shader));
-            sq->end();
-
-            sq->eval();
-
-            mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
-
-            mgr.destroy(sq);
+            mgr.sequence()
+                ->record<kp::OpTensorSyncDevice>({tensorA})
+                ->record<kp::OpAlgoDispatch>(mgr.algorithm({tensorA}, spirv))
+                ->record<kp::OpTensorSyncLocal>({tensorA})
+                ->eval();

            EXPECT_FALSE(sq->isInit());
        }
@ -151,220 +123,3 @@ TEST(TestDestroy, TestDestroySequenceSingle)
    EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
 }

-TEST(TestDestroy, TestDestroySequenceVector)
-{
-    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
-
-    std::string shader(R"(
-      #version 450
-      layout (local_size_x = 1) in;
-      layout(set = 0, binding = 0) buffer a { float pa[]; };
-      void main() {
-          uint index = gl_GlobalInvocationID.x;
-          pa[index] = pa[index] + 1;
-      })");
-
-    {
-        std::shared_ptr<kp::Sequence> sq1 = nullptr;
-        std::shared_ptr<kp::Sequence> sq2 = nullptr;
-
-        {
-            kp::Manager mgr;
-
-            mgr.rebuild({ tensorA });
-
-            sq1 = mgr.sequence("One");
-            sq1->begin();
-            sq1->record<kp::OpAlgoCreate>(
-              { tensorA }, kp::Shader::compile_source(shader));
-            sq1->end();
-            sq1->eval();
-
-            sq2 = mgr.sequence("Two");
-            sq2->begin();
-            sq2->record<kp::OpAlgoCreate>(
-              { tensorA }, kp::Shader::compile_source(shader));
-            sq2->end();
-            sq2->eval();
-
-            mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
-
-            mgr.destroy({ sq1, sq2 });
-
-            EXPECT_FALSE(sq1->isInit());
-            EXPECT_FALSE(sq2->isInit());
-        }
-    }
-    EXPECT_EQ(tensorA->data(), std::vector<float>({ 2, 2, 2 }));
-}
-
-TEST(TestDestroy, TestDestroySequenceNameSingleInsideManager)
-{
-    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
-
-    std::string shader(R"(
-      #version 450
-      layout (local_size_x = 1) in;
-      layout(set = 0, binding = 0) buffer a { float pa[]; };
-      void main() {
-          uint index = gl_GlobalInvocationID.x;
-          pa[index] = pa[index] + 1;
-      })");
-
-    {
-        kp::Manager mgr;
-        {
-            mgr.rebuild({ tensorA });
-
-            mgr.evalOp<kp::OpAlgoCreate>(
-              { tensorA }, "one",
-              kp::Shader::compile_source(shader));
-
-            mgr.evalOp<kp::OpAlgoCreate>(
-              { tensorA }, "two",
-              kp::Shader::compile_source(shader));
-
-            mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
-
-            mgr.destroy("one");
-            mgr.destroy("two");
-        }
-    }
-    EXPECT_EQ(tensorA->data(), std::vector<float>({ 2, 2, 2 }));
-}
-
-TEST(TestDestroy, TestDestroySequenceNameSingleOutsideManager)
-{
-    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
-
-    std::string shader(R"(
-      #version 450
-      layout (local_size_x = 1) in;
-      layout(set = 0, binding = 0) buffer a { float pa[]; };
-      void main() {
-          uint index = gl_GlobalInvocationID.x;
-          pa[index] = pa[index] + 1;
-      })");
-
-    {
-        std::shared_ptr<kp::Sequence> sq1 = nullptr;
-
-        {
-            kp::Manager mgr;
-
-            mgr.rebuild({ tensorA });
-
-            sq1 = mgr.sequence("One");
-            sq1->begin();
-            sq1->record<kp::OpAlgoCreate>(
-              { tensorA }, kp::Shader::compile_source(shader));
-            sq1->end();
-            sq1->eval();
-
-            mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
-
-            mgr.destroy("One");
-
-            EXPECT_FALSE(sq1->isInit());
-        }
-    }
-    EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
-}
-
-TEST(TestDestroy, TestDestroySequenceNameVectorInsideManager)
-{
-    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
-
-    std::string shader(R"(
-      #version 450
-      layout (local_size_x = 1) in;
-      layout(set = 0, binding = 0) buffer a { float pa[]; };
-      void main() {
-          uint index = gl_GlobalInvocationID.x;
-          pa[index] = pa[index] + 1;
-      })");
-
-    {
-        kp::Manager mgr;
-        {
-            mgr.rebuild({ tensorA });
-
-            mgr.evalOp<kp::OpAlgoCreate>(
-              { tensorA }, "one",
-              kp::Shader::compile_source(shader));
-
-            mgr.evalOp<kp::OpAlgoCreate>(
-              { tensorA }, "two",
-              kp::Shader::compile_source(shader));
-
-            mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
-
-            mgr.destroy(std::vector<std::string>({"one", "two"}));
-        }
-    }
-    EXPECT_EQ(tensorA->data(), std::vector<float>({ 2, 2, 2 }));
-}
-
-TEST(TestDestroy, TestDestroySequenceNameVectorOutsideManager)
-{
-    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
-
-    std::string shader(R"(
-      #version 450
-      layout (local_size_x = 1) in;
-      layout(set = 0, binding = 0) buffer a { float pa[]; };
-      void main() {
-          uint index = gl_GlobalInvocationID.x;
-          pa[index] = pa[index] + 1;
-      })");
-
-    {
-        kp::Manager mgr;
-        {
-            mgr.rebuild({ tensorA });
-
-            mgr.evalOp<kp::OpAlgoCreate>(
-              { tensorA }, "one",
-              kp::Shader::compile_source(shader));
-
-            mgr.evalOp<kp::OpAlgoCreate>(
-              { tensorA }, "two",
-              kp::Shader::compile_source(shader));
-
-            mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
-
-            mgr.destroy(std::vector<std::string>({"one", "two"}));
-        }
-    }
-    EXPECT_EQ(tensorA->data(), std::vector<float>({ 2, 2, 2 }));
-}
-
-TEST(TestDestroy, TestDestroySequenceNameDefaultOutsideManager)
-{
-    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
-
-    std::string shader(R"(
-      #version 450
-      layout (local_size_x = 1) in;
-      layout(set = 0, binding = 0) buffer a { float pa[]; };
-      void main() {
-          uint index = gl_GlobalInvocationID.x;
-          pa[index] = pa[index] + 1;
-      })");
-
-    {
-        kp::Manager mgr;
-        {
-            mgr.rebuild({ tensorA });
-
-            mgr.evalOpDefault<kp::OpAlgoCreate>(
-              { tensorA },
-              kp::Shader::compile_source(shader));
-
-            mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
-
-            mgr.destroy(KP_DEFAULT_SESSION);
-        }
-    }
-    EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
-}
--- a/test/TestLogisticRegression.cpp
+++ b/test/TestLogisticRegression.cpp
@ -11,47 +11,40 @@ TEST(TestLogisticRegression, TestMainLogisticRegression)
    uint32_t ITERATIONS = 100;
    float learningRate = 0.1;

-    std::shared_ptr<kp::Tensor> xI{ new kp::Tensor({ 0, 1, 1, 1, 1 }) };
-    std::shared_ptr<kp::Tensor> xJ{ new kp::Tensor({ 0, 0, 0, 1, 1 }) };
-
-    std::shared_ptr<kp::Tensor> y{ new kp::Tensor({ 0, 0, 0, 1, 1 }) };
-
-    std::shared_ptr<kp::Tensor> wIn{ new kp::Tensor({ 0.001, 0.001 }) };
-    std::shared_ptr<kp::Tensor> wOutI{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
-    std::shared_ptr<kp::Tensor> wOutJ{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
-
-    std::shared_ptr<kp::Tensor> bIn{ new kp::Tensor({ 0 }) };
-    std::shared_ptr<kp::Tensor> bOut{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
-
-    std::shared_ptr<kp::Tensor> lOut{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
-
-    std::vector<std::shared_ptr<kp::Tensor>> params = { xI,  xJ,    y,
-                                                        wIn, wOutI, wOutJ,
-                                                        bIn, bOut,  lOut };
-
    {
        kp::Manager mgr;

-        mgr.rebuild(params);
+        std::shared_ptr<kp::Tensor> xI = mgr.tensor({ 0, 1, 1, 1, 1 });
+        std::shared_ptr<kp::Tensor> xJ = mgr.tensor({ 0, 0, 0, 1, 1 });

-        std::shared_ptr<kp::Sequence> sq = mgr.sequence();
+        std::shared_ptr<kp::Tensor> y = mgr.tensor({ 0, 0, 0, 1, 1 });

-        // Record op algo base
-        sq->begin();
+        std::shared_ptr<kp::Tensor> wIn = mgr.tensor({ 0.001, 0.001 });
+        std::shared_ptr<kp::Tensor> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
+        std::shared_ptr<kp::Tensor> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });

-        sq->record<kp::OpTensorSyncDevice>({ wIn, bIn });
+        std::shared_ptr<kp::Tensor> bIn = mgr.tensor({ 0 });
+        std::shared_ptr<kp::Tensor> bOut = mgr.tensor({ 0, 0, 0, 0, 0 });

-        sq->record<kp::OpAlgoCreate>(
-          params,
-          std::vector<uint32_t>(
+        std::shared_ptr<kp::Tensor> lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
+
+        std::vector<std::shared_ptr<kp::Tensor>> params = { xI,  xJ,    y,
+                                                        wIn, wOutI, wOutJ,
+                                                        bIn, bOut,  lOut };
+
+        std::vector<uint32_t> spirv = std::vector<uint32_t>(
            (uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
            (uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv +
-              kp::shader_data::shaders_glsl_logisticregression_comp_spv_len)),
-          kp::Workgroup(), kp::Constants({5.0}));
+              kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));

-        sq->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
+        std::shared_ptr<kp::Algorithm> algorithm =
+            mgr.algorithm(params, spirv, kp::Workgroup(), kp::Constants({5.0}));

-        sq->end();
+        std::shared_ptr<kp::Sequence> sq =
+            mgr.sequence()
+                ->record<kp::OpTensorSyncDevice>({ wIn, bIn })
+                ->record<kp::OpAlgoDispatch>(algorithm)
+                ->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });

        // Iterate across all expected iterations
        for (size_t i = 0; i < ITERATIONS; i++) {
@ -64,21 +57,21 @@ TEST(TestLogisticRegression, TestMainLogisticRegression)
                bIn->data()[0] -= learningRate * bOut->data()[j];
            }
        }
+
+        // Based on the inputs the outputs should be at least:
+        // * wi < 0.01
+        // * wj > 1.0
+        // * b < 0
+        // TODO: Add EXPECT_DOUBLE_EQ instead
+        EXPECT_LT(wIn->data()[0], 0.01);
+        EXPECT_GT(wIn->data()[1], 1.0);
+        EXPECT_LT(bIn->data()[0], 0.0);
+
+        KP_LOG_WARN("Result wIn i: {}, wIn j: {}, bIn: {}",
+                    wIn->data()[0],
+                    wIn->data()[1],
+                    bIn->data()[0]);
    }
-
-    // Based on the inputs the outputs should be at least:
-    // * wi < 0.01
-    // * wj > 1.0
-    // * b < 0
-    // TODO: Add EXPECT_DOUBLE_EQ instead
-    EXPECT_LT(wIn->data()[0], 0.01);
-    EXPECT_GT(wIn->data()[1], 1.0);
-    EXPECT_LT(bIn->data()[0], 0.0);
-
-    KP_LOG_WARN("Result wIn i: {}, wIn j: {}, bIn: {}",
-                wIn->data()[0],
-                wIn->data()[1],
-                bIn->data()[0]);
 }

 TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy)
@ -87,50 +80,43 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy)
    uint32_t ITERATIONS = 100;
    float learningRate = 0.1;

-    kp::Constants wInVec = { 0.001, 0.001 };
-    std::vector<float> bInVec = { 0 };
-
-    std::shared_ptr<kp::Tensor> xI{ new kp::Tensor({ 0, 1, 1, 1, 1 }) };
-    std::shared_ptr<kp::Tensor> xJ{ new kp::Tensor({ 0, 0, 0, 1, 1 }) };
-
-    std::shared_ptr<kp::Tensor> y{ new kp::Tensor({ 0, 0, 0, 1, 1 }) };
-
-    std::shared_ptr<kp::Tensor> wIn{ new kp::Tensor(
-      wInVec, kp::Tensor::TensorTypes::eHost) };
-    std::shared_ptr<kp::Tensor> wOutI{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
-    std::shared_ptr<kp::Tensor> wOutJ{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
-
-    std::shared_ptr<kp::Tensor> bIn{ new kp::Tensor(
-      bInVec, kp::Tensor::TensorTypes::eHost) };
-    std::shared_ptr<kp::Tensor> bOut{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
-
-    std::shared_ptr<kp::Tensor> lOut{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
-
-    std::vector<std::shared_ptr<kp::Tensor>> params = { xI,  xJ,    y,
-                                                        wIn, wOutI, wOutJ,
-                                                        bIn, bOut,  lOut };
-
    {
        kp::Manager mgr;

-        mgr.rebuild(params);
+        std::shared_ptr<kp::Tensor> xI = mgr.tensor({ 0, 1, 1, 1, 1 });
+        std::shared_ptr<kp::Tensor> xJ = mgr.tensor({ 0, 0, 0, 1, 1 });

-        std::shared_ptr<kp::Sequence> sq = mgr.sequence();
+        std::shared_ptr<kp::Tensor> y = mgr.tensor({ 0, 0, 0, 1, 1 });

-        // Record op algo base
-        sq->begin();
+        std::shared_ptr<kp::Tensor> wIn = mgr.tensor(
+                { 0.001, 0.001 }, kp::Tensor::TensorTypes::eHost);
+        std::shared_ptr<kp::Tensor> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
+        std::shared_ptr<kp::Tensor> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });

-        sq->record<kp::OpAlgoCreate>(
-          params,
-          std::vector<uint32_t>(
+        std::shared_ptr<kp::Tensor> bIn = mgr.tensor(
+                { 0 },
+                kp::Tensor::TensorTypes::eHost);
+        std::shared_ptr<kp::Tensor> bOut = mgr.tensor({ 0, 0, 0, 0, 0 });
+
+        std::shared_ptr<kp::Tensor> lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
+
+        std::vector<std::shared_ptr<kp::Tensor>> params = { xI,  xJ,    y,
+                                                        wIn, wOutI, wOutJ,
+                                                        bIn, bOut,  lOut };
+
+        std::vector<uint32_t> spirv = std::vector<uint32_t>(
            (uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
            (uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv +
-              kp::shader_data::shaders_glsl_logisticregression_comp_spv_len)),
-          kp::Workgroup(), kp::Constants({5.0}));
+              kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));

-        sq->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
+        std::shared_ptr<kp::Algorithm> algorithm =
+            mgr.algorithm(params, spirv, kp::Workgroup(), kp::Constants({5.0}));

-        sq->end();
+        std::shared_ptr<kp::Sequence> sq =
+            mgr.sequence()
+                ->record<kp::OpTensorSyncDevice>({ wIn, bIn })
+                ->record<kp::OpAlgoDispatch>(algorithm)
+                ->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });

        // Iterate across all expected iterations
        for (size_t i = 0; i < ITERATIONS; i++) {
@ -145,7 +131,6 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy)
            wIn->mapDataIntoHostMemory();
            bIn->mapDataIntoHostMemory();
        }
-    }

    // Based on the inputs the outputs should be at least:
    // * wi < 0.01
@ -160,4 +145,5 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy)
                wIn->data()[0],
                wIn->data()[1],
                bIn->data()[0]);
+    }
 }
--- a/test/TestWorkgroup.cpp
+++ b/test/TestWorkgroup.cpp
@ -3,9 +3,6 @@

 #include "kompute/Kompute.hpp"

-#include "kompute_test/shaders/shadertest_workgroup.hpp"
-
-
 TEST(TestWorkgroup, TestSimpleWorkgroup)
 {
    std::shared_ptr<kp::Tensor> tensorA = nullptr;
@ -31,9 +28,9 @@ TEST(TestWorkgroup, TestSimpleWorkgroup)
            std::shared_ptr<kp::Algorithm> algorithm = mgr.algorithm(params, spirv, workgroup);

            sq = mgr.sequence();
-            sq->record(std::make_shared<kp::OpTensorSyncDevice>(params));
-            sq->record(std::make_shared<kp::OpAlgoDispatch>(params, algorithm));
-            sq->record(std::make_shared<kp::OpTensorSyncLocal>(params));
+            sq->record<kp::OpTensorSyncDevice>(params);
+            sq->record<kp::OpAlgoDispatch>(params, algorithm);
+            sq->record<kp::OpTensorSyncLocal>(params);
            sq->eval();
        }
    }