Merge pull request #138 from EthicalML/136_memory_hierarchy_tensor_manager

Amend memory ownership hierarchy to have Tensor owned by Manager instead of OpCreateTensor / OpBase
2021-02-10 07:07:54 +00:00 · 2021-02-10 07:07:54 +00:00 · d5df5c1f41
commit d5df5c1f41
parent 698883992f 48f041d9f3
42 changed files with 507 additions and 576 deletions
--- a/.ccls
+++ b/.ccls
@ -13,6 +13,7 @@
 -DDEBUG=1
 -DKOMPUTE_INCLUDE_FOR_SYNTAX

+-I/usr/include/python3.6/
 -I./python/pybind11/include/
 -I./external/Vulkan-Headers/include/
 -I./external/googletest/googletest/include/
--- a/5
+++ b/5
@ -156,6 +156,11 @@ vs_run_tests: vs_build_tests
 	./build/test/$(VS_BUILD_TYPE)/test_kompute.exe --gtest_filter=$(FILTER_TESTS)


+#### PYTHONG ####
+
+test_python:
+	python -m pytest -s --log-cli-level=DEBUG -v python/test/
+
 ####### Run CI Commands #######

 # This command uses act to replicate github action
--- a/README.md
+++ b/README.md
@ -54,9 +54,9 @@ int main() {
    kp::Manager mgr; 

    // 2. Create and initialise Kompute Tensors through manager
-    auto tensorInA = mgr.buildTensor({ 2., 2., 2. });
-    auto tensorInB = mgr.buildTensor({ 1., 2., 3. });
-    auto tensorOut = mgr.buildTensor({ 0., 0., 0. });
+    auto tensorInA = mgr.tensor({ 2., 2., 2. });
+    auto tensorInB = mgr.tensor({ 1., 2., 3. });
+    auto tensorOut = mgr.tensor({ 0., 0., 0. });

    // 3. Specify "multiply shader" code (can also be raw string, spir-v bytes or file path)
    std::string shaderString = (R"(
--- a/docs/overview/advanced-examples.rst
+++ b/docs/overview/advanced-examples.rst
@ -97,7 +97,7 @@ Record commands in a single submit by using a Sequence to send in batch to GPU.
       mgr.evalOpDefault<kp::OpCreateTensor>({tensorLHS, tensorRHS, tensorOutput});

       // Create a new sequence
-       std::weak_ptr<kp::Sequence> sqWeakPtr = mgr.getOrCreateManagedSequence();
+       std::weak_ptr<kp::Sequence> sqWeakPtr = mgr.sequence();

       if (std::shared_ptr<kp::Sequence> sq = sqWeakPtr.lock())
       {
@ -226,8 +226,8 @@ Back to `examples list <#simple-examples>`_.
       // We need to create explicit sequences with their respective queues
       // The second parameter is the index in the familyIndex array which is relative
       //      to the vector we created the manager with.
-       mgr.createManagedSequence("queueOne", 0);
-       mgr.createManagedSequence("queueTwo", 1);
+       mgr.sequence("queueOne", 0);
+       mgr.sequence("queueTwo", 1);

       // Creates tensor an initializes GPU memory (below we show more granularity)
       auto tensorA = std::make_shared<kp::Tensor>(kp::Tensor(std::vector<float>(10, 0.0)));
@ -422,7 +422,7 @@ Now that we have the inputs and outputs we will be able to use them in the proce
    kp::Manager mgr;

    if (std::shared_ptr<kp::Sequence> sq = 
-            mgr.getOrCreateManagedSequence("createTensors").lock()) 
+            mgr.sequence("createTensors").lock()) 
    {
        // ...

--- a/docs/overview/async-parallel.rst
+++ b/docs/overview/async-parallel.rst
@ -208,8 +208,8 @@ It's worth mentioning you can have multiple sequences referencing the same queue
    // We need to create explicit sequences with their respective queues
    // The second parameter is the index in the familyIndex array which is relative
    //      to the vector we created the manager with.
-    mgr.createManagedSequence("queueOne", 0);
-    mgr.createManagedSequence("queueTwo", 1);
+    mgr.sequence("queueOne", 0);
+    mgr.sequence("queueTwo", 1);

 We create the tensors without modifications.

--- a/docs/overview/reference.rst
+++ b/docs/overview/reference.rst
@ -86,16 +86,6 @@ The kp::OpMult operation is a sample implementation of the kp::OpAlgoBase class.
 .. doxygenclass:: kp::OpMult
   :members:

-OpTensorCreate
-------
-
-The kp::OpTensorCreate is a tensor only operations which initialises a kp::Tensor by creating the respective vk::Buffer and vk::Memory, as well as transferring the local data into the GPU.
-
-.. image:: ../images/kompute-vulkan-architecture-opcreatetensor.jpg
-   :width: 100%
-
-.. doxygenclass:: kp::OpTensorCreate
-   :members:

 OpTensorCopy
 -------
--- a/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp
+++ b/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp
@ -42,16 +42,9 @@ void KomputeModelML::train(std::vector<float> yData, std::vector<float> xIData,
        kp::Manager mgr;

        {
+            mgr.rebuild(params);

-            std::shared_ptr<kp::Sequence> sqTensor =
-              mgr.createManagedSequence();
-
-            sqTensor->begin();
-            sqTensor->record<kp::OpTensorCreate>(params);
-            sqTensor->end();
-            sqTensor->eval();
-
-            std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
+            std::shared_ptr<kp::Sequence> sq = mgr.sequence();

            // Record op algo base
            sq->begin();
--- a/examples/array_multiplication/src/Main.cpp
+++ b/examples/array_multiplication/src/Main.cpp
@ -14,9 +14,9 @@ int main()

    kp::Manager mgr;

-    auto tensorInA = mgr.buildTensor({ 2.0, 4.0, 6.0 });
-    auto tensorInB = mgr.buildTensor({ 0.0, 1.0, 2.0 });
-    auto tensorOut = mgr.buildTensor({ 0.0, 0.0, 0.0 });
+    auto tensorInA = mgr.tensor({ 2.0, 4.0, 6.0 });
+    auto tensorInB = mgr.tensor({ 0.0, 1.0, 2.0 });
+    auto tensorOut = mgr.tensor({ 0.0, 0.0, 0.0 });

 #ifdef KOMPUTE_ANDROID_SHADER_FROM_STRING
    std::string shader(R"(
--- a/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp
+++ b/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp
@ -12,7 +12,7 @@ void KomputeSummatorNode::add(float value) {
    // Set the new data in the local device
    this->mSecondaryTensor->setData({value});
    // Execute recorded sequence
-    if (std::shared_ptr<kp::Sequence> sq = this->mSequence.lock()) {
+    if (std::shared_ptr<kp::Sequence> sq = this->mSequence) {
        sq->eval();
    }
    else {
@ -29,12 +29,12 @@ float KomputeSummatorNode::get_total() const {

 void KomputeSummatorNode::_init() {
    std::cout << "CALLING INIT" << std::endl;
-    this->mPrimaryTensor = this->mManager.buildTensor({ 0.0 });
-    this->mSecondaryTensor = this->mManager.buildTensor({ 0.0 });
-    this->mSequence = this->mManager.getOrCreateManagedSequence("AdditionSeq");
+    this->mPrimaryTensor = this->mManager.tensor({ 0.0 });
+    this->mSecondaryTensor = this->mManager.tensor({ 0.0 });
+    this->mSequence = this->mManager.sequence("AdditionSeq");

    // We now record the steps in the sequence
-    if (std::shared_ptr<kp::Sequence> sq = this->mSequence.lock())
+    if (std::shared_ptr<kp::Sequence> sq = this->mSequence)
    {

        std::string shader(R"(
@ -59,7 +59,7 @@ void KomputeSummatorNode::_init() {
                { this->mSecondaryTensor });

        // Then we run the operation with both tensors
-        sq->record<kp::OpAlgoBase<>>(
+        sq->record<kp::OpAlgoBase>(
            { this->mPrimaryTensor, this->mSecondaryTensor }, 
            std::vector<char>(shader.begin(), shader.end()));

--- a/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp
+++ b/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp
@ -28,9 +28,9 @@ float KomputeSummator::get_total() const {

 void KomputeSummator::_init() {
    std::cout << "CALLING INIT" << std::endl;
-    this->mPrimaryTensor = this->mManager.buildTensor({ 0.0 });
-    this->mSecondaryTensor = this->mManager.buildTensor({ 0.0 });
-    this->mSequence = this->mManager.getOrCreateManagedSequence("AdditionSeq");
+    this->mPrimaryTensor = this->mManager.tensor({ 0.0 });
+    this->mSecondaryTensor = this->mManager.tensor({ 0.0 });
+    this->mSequence = this->mManager.sequence("AdditionSeq");

    // We now record the steps in the sequence
    {
--- a/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp
+++ b/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp
@ -50,15 +50,10 @@ void KomputeModelMLNode::train(Array yArr, Array xIArr, Array xJArr) {
    {
        kp::Manager mgr;

-            std::shared_ptr<kp::Sequence> sqTensor =
-              mgr.createManagedSequence();
+        mgr.rebuild(params);

-            sqTensor->begin();
-            sqTensor->record<kp::OpTensorCreate>(params);
-            sqTensor->end();
-            sqTensor->eval();
-
-            std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
+        {
+            std::shared_ptr<kp::Sequence> sq = mgr.sequence();

            // Record op algo base
            sq->begin();
--- a/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp
+++ b/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp
@ -55,15 +55,9 @@ void KomputeModelML::train(Array yArr, Array xIArr, Array xJArr) {
        kp::Manager mgr;

        {
-            std::shared_ptr<kp::Sequence> sqTensor =
-              mgr.createManagedSequence();
+            mgr.rebuild(params);

-            sqTensor->begin();
-            sqTensor->record<kp::OpTensorCreate>(params);
-            sqTensor->end();
-            sqTensor->eval();
-
-            std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
+            std::shared_ptr<kp::Sequence> sq = mgr.sequence();

            // Record op algo base
            sq->begin();
--- a/examples/logistic_regression/src/Main.cpp
+++ b/examples/logistic_regression/src/Main.cpp
@ -35,15 +35,9 @@ int main()

    kp::Manager mgr;

-    std::shared_ptr<kp::Sequence> sqTensor =
-      mgr.createManagedSequence();
+    mgr.rebuild(params);

-    sqTensor->begin();
-    sqTensor->record<kp::OpTensorCreate>(params);
-    sqTensor->end();
-    sqTensor->eval();
-
-    std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
+    std::shared_ptr<kp::Sequence> sq = mgr.sequence();

    // Record op algo base
    sq->begin();
--- a/python/src/docstrings.hpp
+++ b/python/src/docstrings.hpp
@ -119,7 +119,7 @@ integrate with the vulkan kompute use.
@param device Vulkan logical device to use for all base resources
@param physicalDeviceIndex Index for vulkan physical device used)doc";

-static const char *__doc_kp_Manager_buildTensor =
+static const char *__doc_kp_Manager_tensor =
 R"doc(Function that simplifies the common workflow of tensor creation and
 initialization. It will take the constructor parameters for a Tensor
 and will will us it to create a new Tensor and then create it using
@ -133,15 +133,6 @@ static const char *__doc_kp_Manager_createDevice = R"doc()doc";

 static const char *__doc_kp_Manager_createInstance = R"doc()doc";

-static const char *__doc_kp_Manager_createManagedSequence =
-R"doc(Create a new managed Kompute sequence so it's available within the
-manager.
-
-@param sequenceName The name for the named sequence to be created, if
-empty then default indexed value is used @param queueIndex The queue
-to use from the available queues @return Weak pointer to the manager
-owned sequence resource)doc";
-
 static const char *__doc_kp_Manager_evalOp =
 R"doc(Function that evaluates operation against named sequence.

@ -187,7 +178,7 @@ R"doc(Function that evaluates operation against a newly created sequence.
 TArgs Template parameters that will be used to initialise Operation to
 allow for extensible configurations on initialisation)doc";

-static const char *__doc_kp_Manager_getOrCreateManagedSequence =
+static const char *__doc_kp_Manager_sequence =
 R"doc(Get or create a managed Sequence that will be contained by this
 manager. If the named sequence does not currently exist, it would be
 created and initialised.
--- a/python/src/main.cpp
+++ b/python/src/main.cpp
@ -105,8 +105,6 @@ PYBIND11_MODULE(kp, m) {
        .def("is_init", &kp::Sequence::isInit, "Checks if the Sequence has been initialized")
        
        // record
-        .def("record_tensor_create", &kp::Sequence::record<kp::OpTensorCreate>,
-            "Records operation to create and initialise tensor GPU memory and buffer")
        .def("record_tensor_copy", &kp::Sequence::record<kp::OpTensorCopy>,
            "Records operation to copy one tensor to one or many tensors")
        .def("record_tensor_sync_device", &kp::Sequence::record<kp::OpTensorSyncDevice>,
@ -157,11 +155,16 @@ PYBIND11_MODULE(kp, m) {
            [](uint32_t physicalDeviceIndex, const std::vector<uint32_t>& familyQueueIndices) {
                return std::unique_ptr<kp::Manager>(new kp::Manager(physicalDeviceIndex, familyQueueIndices));
            }), "Manager initialiser can provide specified device and array of GPU queueFamilies to load.")
-        .def("get_create_sequence", &kp::Manager::getOrCreateManagedSequence, "Get a Sequence or create a new one with given name")
-        .def("create_sequence", &kp::Manager::createManagedSequence,
-                py::arg("name") = "", py::arg("queueIndex") = 0, "Create a sequence with specific name and specified index of available queues")
-        .def("build_tensor", &kp::Manager::buildTensor, 
-                py::arg("data"), py::arg("tensorType") = kp::Tensor::TensorTypes::eDevice,
+        .def("sequence", &kp::Manager::sequence,
+                py::arg("name") = "", py::arg("queueIndex") = 0, "Get or create a sequence with specific name and specified index of available queues")
+        .def("tensor", &kp::Manager::tensor, 
+                py::arg("data"), py::arg("tensorType") = kp::Tensor::TensorTypes::eDevice, py::arg("syncDataToGPU") = true,
+                "Build and initialise tensor")
+        .def("rebuild", py::overload_cast<std::vector<std::shared_ptr<kp::Tensor>>, bool>(&kp::Manager::rebuild),
+                py::arg("tensors"), py::arg("syncDataToGPU") = true,
+                "Build and initialise list of tensors")
+        .def("rebuild", py::overload_cast<std::shared_ptr<kp::Tensor>, bool>(&kp::Manager::rebuild),
+                py::arg("tensor"), py::arg("syncDataToGPU") = true,
                "Build and initialise tensor")
        
        // Await functions
@ -172,8 +175,6 @@ PYBIND11_MODULE(kp, m) {
                py::arg("waitFor") = UINT64_MAX, "Awaits for asynchronous operation on the last anonymous Sequence created")
        
        // eval default
-        .def("eval_tensor_create_def", &kp::Manager::evalOpDefault<kp::OpTensorCreate>,
-            "Evaluates operation to create and initialise tensor GPU memory and buffer with new anonymous Sequence")
        .def("eval_tensor_copy_def", &kp::Manager::evalOpDefault<kp::OpTensorCopy>,
            "Evaluates operation to copy one tensor to one or many tensors with new anonymous Sequence")
        .def("eval_tensor_sync_device_def", &kp::Manager::evalOpDefault<kp::OpTensorSyncDevice>,
@ -209,8 +210,6 @@ PYBIND11_MODULE(kp, m) {
            "Evaluates operation to run left right out operation with custom shader with new anonymous Sequence")
        
        // eval
-        .def("eval_tensor_create", &kp::Manager::evalOp<kp::OpTensorCreate>,
-            "Evaluates operation to create and initialise tensor GPU memory and buffer with explicitly named Sequence")
        .def("eval_tensor_copy", &kp::Manager::evalOp<kp::OpTensorCopy>,
            "Evaluates operation to copy one tensor to one or many tensors with explicitly named Sequence")
        .def("eval_tensor_sync_device", &kp::Manager::evalOp<kp::OpTensorSyncDevice>,
@ -249,8 +248,6 @@ PYBIND11_MODULE(kp, m) {
            "Evaluates operation to run left right out operation with custom shader with explicitly named Sequence")
        
        // eval async default
-        .def("eval_async_tensor_create_def", &kp::Manager::evalOpAsyncDefault<kp::OpTensorCreate>,
-            "Evaluates asynchronously operation to create and initialise tensor GPU memory and buffer with anonymous Sequence")
        .def("eval_async_tensor_copy_def", &kp::Manager::evalOpAsyncDefault<kp::OpTensorCopy>,
            "Evaluates asynchronously operation to copy one tensor to one or many tensors with anonymous Sequence")
        .def("eval_async_tensor_sync_device_def", &kp::Manager::evalOpAsyncDefault<kp::OpTensorSyncDevice>,
@ -286,8 +283,6 @@ PYBIND11_MODULE(kp, m) {
            "Evaluates asynchronously operation to run left right out operation with custom shader with anonymous Sequence")
        
        // eval async
-        .def("eval_async_tensor_create", &kp::Manager::evalOpAsync<kp::OpTensorCreate>,
-            "Evaluates asynchronously operation to create and initialise tensor GPU memory and buffer with explicitly named Sequence")
        .def("eval_async_tensor_copy", &kp::Manager::evalOpAsync<kp::OpTensorCopy>,
            "Evaluates asynchronously operation to copy one tensor to one or many tensors with explicitly named Sequence")
        .def("eval_async_tensor_sync_device", &kp::Manager::evalOpAsync<kp::OpTensorSyncDevice>,
--- a/python/test/test_array_multiplication.py
+++ b/python/test/test_array_multiplication.py
@ -14,7 +14,7 @@ def test_array_multiplication():
    tensor_out = kp.Tensor([0, 0, 0])

    # 3. Initialise the Kompute Tensors in the GPU
-    mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
+    mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])

    # 4. Define the multiplication shader code to run on the GPU
    @ps.python2shader
--- a/python/test/test_kompute.py
+++ b/python/test/test_kompute.py
@ -2,6 +2,7 @@ import os

 import kp
 import numpy as np
+import logging

 DIRNAME = os.path.dirname(os.path.abspath(__file__))

@ -16,7 +17,7 @@ def test_opmult():

    mgr = kp.Manager()

-    mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
+    mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])

    mgr.eval_algo_mult_def([tensor_in_a, tensor_in_b, tensor_out])

@ -41,7 +42,7 @@ def test_opalgobase_data():

        layout (local_size_x = 1) in;

-        // The input tensors bind index is relative to index in parameter passed
+        // The input rebuild bind index is relative to index in parameter passed
        layout(set = 0, binding = 0) buffer bina { float tina[]; };
        layout(set = 0, binding = 1) buffer binb { float tinb[]; };
        layout(set = 0, binding = 2) buffer bout { float tout[]; };
@ -52,7 +53,7 @@ def test_opalgobase_data():
        }
    """

-    mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
+    mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])

    mgr.eval_algo_str_def([tensor_in_a, tensor_in_b, tensor_out], shaderData)

@ -75,7 +76,7 @@ def test_opalgobase_file():

    shaderFilePath = os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp")

-    mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
+    mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])

    mgr.eval_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath)

@ -93,14 +94,14 @@ def test_sequence():
    tensor_in_b = kp.Tensor([1, 2, 3])
    tensor_out = kp.Tensor([0, 0, 0])

-    mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
+    mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])

    shaderFilePath = os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp")
    mgr.eval_async_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath)

    mgr.eval_await_def()

-    seq = mgr.create_sequence("op")
+    seq = mgr.sequence("op")
    seq.begin()
    seq.record_tensor_sync_local([tensor_in_a])
    seq.record_tensor_sync_local([tensor_in_b])
@ -118,32 +119,35 @@ def test_workgroup():

    tensor_a = kp.Tensor(np.zeros([16,8]))
    tensor_b = kp.Tensor(np.zeros([16,8]))
-    mgr.eval_tensor_create_def([tensor_a, tensor_b])
+
+    mgr.rebuild([tensor_a, tensor_b])

    shader_src = """
        #version 450

        layout (local_size_x = 1) in;

-        // The input tensors bind index is relative to index in parameter passed
+        // The input rebuild bind index is relative to index in parameter passed
        layout(set = 0, binding = 0) writeonly buffer bout  { float toutx[]; };
        layout(set = 0, binding = 1) writeonly buffer bout2 { float touty[]; };

        void main() {
            uint index   = gl_WorkGroupID.x*gl_NumWorkGroups.y + gl_WorkGroupID.y;
-            
+
            toutx[index] = gl_GlobalInvocationID.x;
            touty[index] = gl_GlobalInvocationID.y;
        }
    """
    shader_src = bytes(shader_src, encoding='utf8')

-    seq = mgr.create_sequence()
+    seq = mgr.sequence("new")
    seq.begin()
    seq.record_algo_data([tensor_a, tensor_b], shader_src, (16,8,1))
    seq.end()
    seq.eval()
-    
+
    mgr.eval_tensor_sync_local_def([tensor_a, tensor_b])
+
    assert np.all(tensor_a.numpy() == np.stack([np.arange(16)]*8, axis=1).ravel())
    assert np.all(tensor_b.numpy() == np.stack([np.arange(8)]*16, axis=0).ravel())
+
--- a/python/test/test_logistic_regression.py
+++ b/python/test/test_logistic_regression.py
@ -66,10 +66,10 @@ def test_logistic_regression():
    params = [tensor_x_i, tensor_x_j, tensor_y, tensor_w_in, tensor_w_out_i,
        tensor_w_out_j, tensor_b_in, tensor_b_out, tensor_l_out, tensor_m]

-    mgr.eval_tensor_create_def(params)
+    mgr.rebuild(params)

    # Create a managed sequence
-    sq = mgr.create_sequence()
+    sq = mgr.sequence()

    # Clear previous operations and begin recording for new operations
    sq.begin()
--- a/single_include/AggregateHeaders.cpp
+++ b/single_include/AggregateHeaders.cpp
@ -8,7 +8,6 @@
 #include "kompute/operations/OpAlgoBase.hpp"
 #include "kompute/operations/OpAlgoLhsRhsOut.hpp"
 #include "kompute/operations/OpMult.hpp"
-#include "kompute/operations/OpTensorCreate.hpp"
 #include "kompute/operations/OpTensorCopy.hpp"
 #include "kompute/operations/OpTensorSyncDevice.hpp"
 #include "kompute/operations/OpTensorSyncLocal.hpp"
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@ -697,6 +697,7 @@ static const unsigned int shaders_glsl_logisticregression_comp_spv_len = 4920;
 }
 #endif // define SHADEROP_SHADERLOGISTICREGRESSION_HPP

+#include <set>
 #include <unordered_map>

 #define KP_MAX_DIM_SIZE 1
@ -723,7 +724,7 @@ class Tensor
    enum class TensorTypes
    {
        eDevice = 0,  ///< Type is device memory, source and destination
-        eHost = 1, ///< Type is host memory, source and destination
+        eHost = 1,    ///< Type is host memory, source and destination
        eStorage = 2, ///< Type is Device memory (only)
    };

@ -736,7 +737,8 @@ class Tensor
     *  Default constructor with data provided which would be used to create the
     * respective vulkan buffer and memory.
     *
-     *  @param data Non-zero-sized vector of data that will be used by the tensor
+     *  @param data Non-zero-sized vector of data that will be used by the
+     * tensor
     *  @param tensorType Type for the tensor which is of type TensorTypes
     */
    Tensor(const std::vector<float>& data,
@ -829,24 +831,30 @@ class Tensor
                        bool createBarrier);

    /**
-     * Records a copy from the internal staging memory to the device memory using an optional barrier to wait for the operation. This function would only be relevant for kp::Tensors of type eDevice.
+     * Records a copy from the internal staging memory to the device memory
+     * using an optional barrier to wait for the operation. This function would
+     * only be relevant for kp::Tensors of type eDevice.
     *
     * @param commandBuffer Vulkan Command Buffer to record the commands into
     * @param createBarrier Whether to create a barrier that ensures the data is
     * copied before further operations. Default is true.
     */
-    void recordCopyFromStagingToDevice(std::shared_ptr<vk::CommandBuffer> commandBuffer,
-                        bool createBarrier);
+    void recordCopyFromStagingToDevice(
+      std::shared_ptr<vk::CommandBuffer> commandBuffer,
+      bool createBarrier);

    /**
-     * Records a copy from the internal device memory to the staging memory using an optional barrier to wait for the operation. This function would only be relevant for kp::Tensors of type eDevice.
+     * Records a copy from the internal device memory to the staging memory
+     * using an optional barrier to wait for the operation. This function would
+     * only be relevant for kp::Tensors of type eDevice.
     *
     * @param commandBuffer Vulkan Command Buffer to record the commands into
     * @param createBarrier Whether to create a barrier that ensures the data is
     * copied before further operations. Default is true.
     */
-    void recordCopyFromDeviceToStaging(std::shared_ptr<vk::CommandBuffer> commandBuffer,
-                        bool createBarrier);
+    void recordCopyFromDeviceToStaging(
+      std::shared_ptr<vk::CommandBuffer> commandBuffer,
+      bool createBarrier);

    /**
     * Records the buffer memory barrier into the command buffer which
@ -908,9 +916,17 @@ class Tensor
    bool mIsInit = false;

    void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer
-    void createBuffer(std::shared_ptr<vk::Buffer> buffer, vk::BufferUsageFlags bufferUsageFlags);
-    void allocateBindMemory(std::shared_ptr<vk::Buffer> buffer, std::shared_ptr<vk::DeviceMemory> memory, vk::MemoryPropertyFlags memoryPropertyFlags);
-    void copyBuffer(std::shared_ptr<vk::CommandBuffer> commandBuffer, std::shared_ptr<vk::Buffer> bufferFrom, std::shared_ptr<vk::Buffer> bufferTo, vk::DeviceSize bufferSize, vk::BufferCopy copyRegion, bool createBarrier);
+    void createBuffer(std::shared_ptr<vk::Buffer> buffer,
+                      vk::BufferUsageFlags bufferUsageFlags);
+    void allocateBindMemory(std::shared_ptr<vk::Buffer> buffer,
+                            std::shared_ptr<vk::DeviceMemory> memory,
+                            vk::MemoryPropertyFlags memoryPropertyFlags);
+    void copyBuffer(std::shared_ptr<vk::CommandBuffer> commandBuffer,
+                    std::shared_ptr<vk::Buffer> bufferFrom,
+                    std::shared_ptr<vk::Buffer> bufferTo,
+                    vk::DeviceSize bufferSize,
+                    vk::BufferCopy copyRegion,
+                    bool createBarrier);

    // Private util functions
    vk::BufferUsageFlags getPrimaryBufferUsageFlags();
@ -949,13 +965,11 @@ class OpBase
     * @param device Vulkan logical device for passing to Algorithm
     * @param commandBuffer Vulkan Command Buffer to record commands into
     * @param tensors Tensors that are to be used in this operation
-     * @param freeTensors Whether operation manages the memory of the Tensors
     */
    OpBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
           std::shared_ptr<vk::Device> device,
           std::shared_ptr<vk::CommandBuffer> commandBuffer,
-           std::vector<std::shared_ptr<Tensor>>& tensors,
-           bool freeTensors)
+           std::vector<std::shared_ptr<Tensor>>& tensors)
    {
        SPDLOG_DEBUG("Compute OpBase constructor with params");

@ -963,14 +977,12 @@ class OpBase
        this->mDevice = device;
        this->mCommandBuffer = commandBuffer;
        this->mTensors = tensors;
-        this->mFreeTensors = freeTensors;
    }

    /**
     * Default destructor for OpBase class. This OpBase destructor class should
     * always be called to destroy and free owned resources unless it is
-     * intended to destroy the resources in the parent class. This can be done
-     * by passing the mFreeTensors=false.
+     * intended to destroy the resources in the parent class.
     */
    virtual ~OpBase()
    {
@ -1234,50 +1246,38 @@ class Sequence
 namespace kp {

 /**
-    Operation that creates tensor and manages the memory of the components
-   created
+    Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging.
 */
-class OpTensorCreate : public OpBase
+class OpTensorSyncDevice : public OpBase
 {
  public:
-    OpTensorCreate();
+    OpTensorSyncDevice();

    /**
-     * Default constructor with parameters that provides the bare minimum
-     * requirements for the operations to be able to create and manage their
-     * sub-components.
+     * Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensos provided cannot be of type TensorTypes::eStorage.
     *
     * @param physicalDevice Vulkan physical device used to find device queues
     * @param device Vulkan logical device for passing to Algorithm
     * @param commandBuffer Vulkan Command Buffer to record commands into
     * @param tensors Tensors that will be used to create in operation.
-     * @param freeTensors Whether operation manages the memory of the Tensors
     */
-    OpTensorCreate(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
+    OpTensorSyncDevice(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
                   std::shared_ptr<vk::Device> device,
                   std::shared_ptr<vk::CommandBuffer> commandBuffer,
                   std::vector<std::shared_ptr<Tensor>> tensors);

    /**
-     * Default destructor which in this case expects the parent class to free
-     * the tensors
+     * Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
     */
-    ~OpTensorCreate() override;
+    ~OpTensorSyncDevice() override;

    /**
-     * In charge of initialising the primary Tensor as well as the staging
-     * tensor as required. It will only initialise a staging tensor if the
-     * Primary tensor is of type Device. For staging tensors it performs a 
-     * mapDataIntoHostMemory which would perform immediately as opposed to 
-     * on sequence eval/submission.
+     * Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element.
     */
    void init() override;

    /**
-     * Record runs the core actions to create the tensors. For device tensors
-     * it records a copyCommand to move the data from the staging tensor to the 
-     * device tensor. The mapping for staging tensors happens in the init function
-     * not in the record function.
+     * For device tensors, it records the copy command for the tensor to copy the data from its staging to device memory.
     */
    void record() override;

@ -1287,8 +1287,7 @@ class OpTensorCreate : public OpBase
    virtual void preEval() override;

    /**
-     * Performs a copy back into the main tensor to ensure that the data
-     * contained is the one that is now being stored in the GPU.
+     * Does not perform any postEval commands.
     */
    virtual void postEval() override;

@ -1352,23 +1351,12 @@ class Manager
     *
     * @param sequenceName The name for the named sequence to be retrieved or
     * created
+     * @param queueIndex The queue to use from the available queues
     * @return Shared pointer to the manager owned sequence resource
     */
-    std::shared_ptr<Sequence> getOrCreateManagedSequence(
-      std::string sequenceName);
-
-    /**
-     * Create a new managed Kompute sequence so it's available within the
-     * manager.
-     *
-     * @param sequenceName The name for the named sequence to be created, if
-     * empty then default indexed value is used
-     * @param queueIndex The queue to use from the available queues
-     * @return Weak pointer to the manager owned sequence resource
-     */
-    std::shared_ptr<Sequence> createManagedSequence(
-      std::string sequenceName = "",
-      uint32_t queueIndex = 0);
+    std::shared_ptr<Sequence> sequence(
+            std::string sequenceName = KP_DEFAULT_SESSION,
+            uint32_t queueIndex = 0);

    /**
     * Function that evaluates operation against named sequence.
@ -1385,7 +1373,7 @@ class Manager
    {
        SPDLOG_DEBUG("Kompute Manager evalOp triggered");
        std::shared_ptr<kp::Sequence> sq =
-          this->getOrCreateManagedSequence(sequenceName);
+          this->sequence(sequenceName);

        SPDLOG_DEBUG("Kompute Manager evalOp running sequence BEGIN");
        sq->begin();
@ -1415,10 +1403,8 @@ class Manager
    {
        SPDLOG_DEBUG("Kompute Manager evalOp Default triggered");
        this->mCurrentSequenceIndex++;
-        this->evalOp<T>(tensors,
-                        KP_DEFAULT_SESSION +
-                          std::to_string(this->mCurrentSequenceIndex),
-                        std::forward<TArgs>(params)...);
+        this->evalOp<T>(
+          tensors, KP_DEFAULT_SESSION, std::forward<TArgs>(params)...);
    }

    /**
@ -1437,7 +1423,7 @@ class Manager
        SPDLOG_DEBUG("Kompute Manager evalOpAsync triggered");

        std::shared_ptr<kp::Sequence> sq =
-          this->getOrCreateManagedSequence(sequenceName);
+          this->sequence(sequenceName);

        SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence BEGIN");
        sq->begin();
@ -1468,10 +1454,8 @@ class Manager
    {
        SPDLOG_DEBUG("Kompute Manager evalOpAsyncDefault triggered");
        this->mCurrentSequenceIndex++;
-        this->evalOpAsync<T>(tensors,
-                             KP_DEFAULT_SESSION +
-                               std::to_string(this->mCurrentSequenceIndex),
-                             std::forward<TArgs>(params)...);
+        this->evalOpAsync<T>(
+          tensors, KP_DEFAULT_SESSION, std::forward<TArgs>(params)...);
    }

    /**
@ -1512,36 +1496,98 @@ class Manager
    void evalOpAwaitDefault(uint64_t waitFor = UINT64_MAX)
    {
        SPDLOG_DEBUG("Kompute Manager evalOpAwaitDefault triggered");
-        this->evalOpAwait(KP_DEFAULT_SESSION +
-                            std::to_string(this->mCurrentSequenceIndex),
-                          waitFor);
+        this->evalOpAwait(KP_DEFAULT_SESSION, waitFor);
    }

    /**
     * Function that simplifies the common workflow of tensor creation and
     * initialization. It will take the constructor parameters for a Tensor
-     * and will will us it to create a new Tensor and then create it using
-     * the OpCreateTensor command.
+     * and will will us it to create a new Tensor and then create it. The
+     * tensor memory will then be managed and owned by the manager.
     *
     * @param data The data to initialize the tensor with
     * @param tensorType The type of tensor to initialize
+     * @param syncDataToGPU Whether to sync the data to GPU memory
     * @returns Initialized Tensor with memory Syncd to GPU device
     */
-    std::shared_ptr<Tensor> buildTensor(
+    std::shared_ptr<Tensor> tensor(
      const std::vector<float>& data,
-      Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
+      Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice,
+      bool syncDataToGPU = true)
    {
-        SPDLOG_DEBUG("Kompute Manager createInitTensor triggered");
+        SPDLOG_DEBUG("Kompute Manager tensor triggered");

        SPDLOG_DEBUG("Kompute Manager creating new tensor shared ptr");
        std::shared_ptr<Tensor> tensor =
          std::make_shared<Tensor>(kp::Tensor(data, tensorType));

-        this->evalOpDefault<OpTensorCreate>({ tensor });
+        tensor->init(this->mPhysicalDevice, this->mDevice);
+
+        if (syncDataToGPU) {
+            this->evalOpDefault<OpTensorSyncDevice>({ tensor });
+        }
+        this->mManagedTensors.insert(tensor);

        return tensor;
    }

+    /**
+     * Function that simplifies the common workflow of tensor initialisation. It
+     * will take the constructor parameters for a Tensor and will will us it to
+     * create a new Tensor. The tensor memory will then be managed and owned by
+     * the manager.
+     *
+     * @param tensors Array of tensors to rebuild
+     * @param syncDataToGPU Whether to sync the data to GPU memory
+     * @returns Initialized Tensor with memory Syncd to GPU device
+     */
+    void rebuild(std::vector<std::shared_ptr<kp::Tensor>> tensors,
+                        bool syncDataToGPU = true)
+    {
+        SPDLOG_DEBUG("Kompute Manager rebuild triggered");
+        for (std::shared_ptr<Tensor> tensor : tensors) {
+
+            // False syncData to run all tensors at once instead one by one
+            this->rebuild(tensor, false);
+        }
+
+        if (syncDataToGPU) {
+            this->evalOpDefault<OpTensorSyncDevice>(tensors);
+        }
+    }
+
+    /**
+     * Function that simplifies the common workflow of tensor initialisation. It
+     * will take the constructor parameters for a Tensor and will will us it to
+     * create a new Tensor. The tensor memory will then be managed and owned by
+     * the manager.
+     *
+     * @param tensors Single tensor to rebuild
+     * @param syncDataToGPU Whether to sync the data to GPU memory
+     * @returns Initialized Tensor with memory Syncd to GPU device
+     */
+    void rebuild(std::shared_ptr<kp::Tensor> tensor,
+                        bool syncDataToGPU = true)
+    {
+        SPDLOG_DEBUG("Kompute Manager rebuild Tensor triggered");
+
+        if (tensor->isInit()) {
+            tensor->freeMemoryDestroyGPUResources();
+        }
+
+        tensor->init(this->mPhysicalDevice, this->mDevice);
+
+        std::set<std::shared_ptr<Tensor>>::iterator it =
+          this->mManagedTensors.find(tensor);
+        if (it == this->mManagedTensors.end()) {
+            this->mManagedTensors.insert(tensor);
+        }
+
+        if (syncDataToGPU) {
+            this->evalOpDefault<OpTensorSyncDevice>({ tensor });
+        }
+    }
+
  private:
    // -------------- OPTIONALLY OWNED RESOURCES
    std::shared_ptr<vk::Instance> mInstance = nullptr;
@ -1552,6 +1598,8 @@ class Manager
    bool mFreeDevice = false;

    // -------------- ALWAYS OWNED RESOURCES
+    std::set<std::shared_ptr<Tensor>> mManagedTensors;
+
    std::unordered_map<std::string, std::shared_ptr<Sequence>>
      mManagedSequences;

@ -1999,59 +2047,6 @@ class OpTensorCopy : public OpBase

 namespace kp {

-/**
-    Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging.
-*/
-class OpTensorSyncDevice : public OpBase
-{
-  public:
-    OpTensorSyncDevice();
-
-    /**
-     * Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensos provided cannot be of type TensorTypes::eStorage.
-     *
-     * @param physicalDevice Vulkan physical device used to find device queues
-     * @param device Vulkan logical device for passing to Algorithm
-     * @param commandBuffer Vulkan Command Buffer to record commands into
-     * @param tensors Tensors that will be used to create in operation.
-     */
-    OpTensorSyncDevice(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
-                   std::shared_ptr<vk::Device> device,
-                   std::shared_ptr<vk::CommandBuffer> commandBuffer,
-                   std::vector<std::shared_ptr<Tensor>> tensors);
-
-    /**
-     * Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
-     */
-    ~OpTensorSyncDevice() override;
-
-    /**
-     * Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element.
-     */
-    void init() override;
-
-    /**
-     * For device tensors, it records the copy command for the tensor to copy the data from its staging to device memory.
-     */
-    void record() override;
-
-    /**
-     * Does not perform any preEval commands.
-     */
-    virtual void preEval() override;
-
-    /**
-     * Does not perform any postEval commands.
-     */
-    virtual void postEval() override;
-
-  private:
-};
-
-} // End namespace kp
-
-namespace kp {
-
 /**
    Operation that syncs tensor's local memory by mapping device data into the local CPU memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging.
 */
--- a/src/Manager.cpp
+++ b/src/Manager.cpp
@ -68,6 +68,18 @@ Manager::~Manager()
        this->mManagedSequences.clear();
    }

+    if (this->mManagedTensors.size()) {
+        SPDLOG_DEBUG("Kompute Manager explicitly freeing tensors");
+        for (const std::shared_ptr<Tensor>& tensor : this->mManagedTensors) {
+            if (!tensor->isInit()) {
+                SPDLOG_ERROR("Kompute Manager attempted to free managed tensor "
+                             "but not tensor is not initialised");
+            }
+            tensor->freeMemoryDestroyGPUResources();
+        }
+        this->mManagedTensors.clear();
+    }
+
    if (this->mFreeDevice) {
        SPDLOG_INFO("Destroying device");
        this->mDevice->destroy(
@ -99,48 +111,34 @@ Manager::~Manager()
 }

 std::shared_ptr<Sequence>
-Manager::getOrCreateManagedSequence(std::string sequenceName)
+Manager::sequence(std::string sequenceName, uint32_t queueIndex)
 {
-    SPDLOG_DEBUG("Kompute Manager creating Sequence object");
+    SPDLOG_DEBUG("Kompute Manager sequence() with sequenceName: {} "
+                 "and queueIndex: {}",
+                 sequenceName,
+                 queueIndex);
+
+    std::shared_ptr<Sequence> sq = nullptr;

    std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator found =
      this->mManagedSequences.find(sequenceName);

    if (found == this->mManagedSequences.end()) {
-        return this->createManagedSequence(sequenceName);
+        std::shared_ptr<Sequence> sq =
+          std::make_shared<Sequence>(this->mPhysicalDevice,
+                                     this->mDevice,
+                                     this->mComputeQueues[queueIndex],
+                                     this->mComputeQueueFamilyIndices[queueIndex]);
+        sq->init();
+
+        this->mManagedSequences.insert({ sequenceName, sq });
+
+        return sq;
    } else {
        return found->second;
    }
 }

-std::shared_ptr<Sequence>
-Manager::createManagedSequence(std::string sequenceName, uint32_t queueIndex)
-{
-
-    SPDLOG_DEBUG("Kompute Manager createManagedSequence with sequenceName: {} "
-                 "and queueIndex: {}",
-                 sequenceName,
-                 queueIndex);
-
-    std::shared_ptr<Sequence> sq =
-      std::make_shared<Sequence>(this->mPhysicalDevice,
-                                 this->mDevice,
-                                 this->mComputeQueues[queueIndex],
-                                 this->mComputeQueueFamilyIndices[queueIndex]);
-    sq->init();
-
-    if (sequenceName.empty()) {
-        this->mCurrentSequenceIndex++;
-        this->mManagedSequences.insert(
-          { KP_DEFAULT_SESSION + std::to_string(this->mCurrentSequenceIndex),
-            sq });
-    } else {
-        // TODO: Check if sequence doesn't already exist
-        this->mManagedSequences.insert({ sequenceName, sq });
-    }
-    return sq;
-}
-
 void
 Manager::createInstance()
 {
--- a/src/OpAlgoBase.cpp
+++ b/src/OpAlgoBase.cpp
@ -14,7 +14,7 @@ OpAlgoBase::OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
                       std::shared_ptr<vk::CommandBuffer> commandBuffer,
                       std::vector<std::shared_ptr<Tensor>>& tensors,
                       KomputeWorkgroup komputeWorkgroup)
-  : OpBase(physicalDevice, device, commandBuffer, tensors, false)
+  : OpBase(physicalDevice, device, commandBuffer, tensors)
 {
    SPDLOG_DEBUG("Kompute OpAlgoBase constructor with params numTensors: {}",
                 tensors.size());
--- a/src/OpTensorCopy.cpp
+++ b/src/OpTensorCopy.cpp
@ -12,7 +12,7 @@ OpTensorCopy::OpTensorCopy(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
                           std::shared_ptr<vk::Device> device,
                           std::shared_ptr<vk::CommandBuffer> commandBuffer,
                           std::vector<std::shared_ptr<Tensor>> tensors)
-  : OpBase(physicalDevice, device, commandBuffer, tensors, false)
+  : OpBase(physicalDevice, device, commandBuffer, tensors)
 {
    SPDLOG_DEBUG("Kompute OpTensorCopy constructor with params");
 }
--- a/src/OpTensorCreate.cpp
+++ b/src/OpTensorCreate.cpp
@ -1,76 +0,0 @@
-
-#include "kompute/Tensor.hpp"
-
-#include "kompute/operations/OpTensorCreate.hpp"
-
-namespace kp {
-
-OpTensorCreate::OpTensorCreate()
-{
-    SPDLOG_DEBUG("Kompute OpTensorCreate constructor base");
-}
-
-OpTensorCreate::OpTensorCreate(
-  std::shared_ptr<vk::PhysicalDevice> physicalDevice,
-  std::shared_ptr<vk::Device> device,
-  std::shared_ptr<vk::CommandBuffer> commandBuffer,
-  std::vector<std::shared_ptr<Tensor>> tensors)
-  : OpBase(physicalDevice, device, commandBuffer, tensors, true)
-{
-    SPDLOG_DEBUG("Kompute OpTensorCreate constructor with params");
-}
-
-OpTensorCreate::~OpTensorCreate()
-{
-    SPDLOG_DEBUG("Kompute OpTensorCreate destructor started");
-}
-
-void
-OpTensorCreate::init()
-{
-    SPDLOG_DEBUG("Kompute OpTensorCreate init called");
-
-    if (this->mTensors.size() < 1) {
-        throw std::runtime_error(
-          "Kompute OpTensorCreate called with less than 1 tensor");
-    }
-
-    for (std::shared_ptr<Tensor> tensor : this->mTensors) {
-        if (tensor->isInit()) {
-            throw std::runtime_error(
-              "Kompute OpTensorCreate: Tensor has already been initialized");
-        }
-        if (tensor->tensorType() != Tensor::TensorTypes::eStorage) {
-            tensor->init(this->mPhysicalDevice, this->mDevice);
-
-            tensor->mapDataIntoHostMemory();
-        }
-    }
-}
-
-void
-OpTensorCreate::record()
-{
-    SPDLOG_DEBUG("Kompute OpTensorCreate record called");
-
-    for (size_t i = 0; i < this->mTensors.size(); i++) {
-        if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
-            this->mTensors[i]->recordCopyFromStagingToDevice(
-              this->mCommandBuffer, false);
-        }
-    }
-}
-
-void
-OpTensorCreate::preEval()
-{
-    SPDLOG_DEBUG("Kompute OpTensorCreate preEval called");
-}
-
-void
-OpTensorCreate::postEval()
-{
-    SPDLOG_DEBUG("Kompute OpTensorCreate postEval called");
-}
-
-}
--- a/src/OpTensorSyncDevice.cpp
+++ b/src/OpTensorSyncDevice.cpp
@ -15,7 +15,7 @@ OpTensorSyncDevice::OpTensorSyncDevice(
  std::shared_ptr<vk::Device> device,
  std::shared_ptr<vk::CommandBuffer> commandBuffer,
  std::vector<std::shared_ptr<Tensor>> tensors)
-  : OpBase(physicalDevice, device, commandBuffer, tensors, false)
+  : OpBase(physicalDevice, device, commandBuffer, tensors)
 {
    SPDLOG_DEBUG("Kompute OpTensorSyncDevice constructor with params");
 }
--- a/src/OpTensorSyncLocal.cpp
+++ b/src/OpTensorSyncLocal.cpp
@ -15,7 +15,7 @@ OpTensorSyncLocal::OpTensorSyncLocal(
  std::shared_ptr<vk::Device> device,
  std::shared_ptr<vk::CommandBuffer> commandBuffer,
  std::vector<std::shared_ptr<Tensor>> tensors)
-  : OpBase(physicalDevice, device, commandBuffer, tensors, false)
+  : OpBase(physicalDevice, device, commandBuffer, tensors)
 {
    SPDLOG_DEBUG("Kompute OpTensorSyncLocal constructor with params");
 }
--- a/src/Sequence.cpp
+++ b/src/Sequence.cpp
@ -20,7 +20,7 @@ Sequence::Sequence(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
    this->mDevice = device;
    this->mComputeQueue = computeQueue;
    this->mQueueIndex = queueIndex;
-    this->mIsInit = true;
+    this->mIsInit = false;
 }

 Sequence::~Sequence()
@ -203,6 +203,8 @@ Sequence::isInit()
 void
 Sequence::freeMemoryDestroyGPUResources()
 {
+    SPDLOG_DEBUG("Kompute Sequence freeMemoryDestroyGPUResources called");
+
    if (!this->mIsInit) {
        SPDLOG_ERROR("Kompute Sequence freeMemoryDestroyGPUResources called "
                     "but Sequence is not initialized so there's no relevant "
--- a/src/Tensor.cpp
+++ b/src/Tensor.cpp
@ -229,8 +229,12 @@ Tensor::mapDataFromHostMemory()

    if (this->mTensorType == TensorTypes::eHost) {
        hostVisibleMemory = this->mPrimaryMemory;
-    } else {
+    } else if (this->mTensorType == TensorTypes::eDevice) {
        hostVisibleMemory = this->mStagingMemory;
+    } else {
+        SPDLOG_WARN(
+          "Kompute Tensor mapping data not supported on storage tensor");
+        return;
    }

    vk::DeviceSize bufferSize = this->memorySize();
@ -252,8 +256,12 @@ Tensor::mapDataIntoHostMemory()

    if (this->mTensorType == TensorTypes::eHost) {
        hostVisibleMemory = this->mPrimaryMemory;
-    } else {
+    } else if (this->mTensorType == TensorTypes::eDevice) {
        hostVisibleMemory = this->mStagingMemory;
+    } else {
+        SPDLOG_WARN(
+          "Kompute Tensor mapping data not supported on storage tensor");
+        return;
    }

    vk::DeviceSize bufferSize = this->memorySize();
--- a/src/include/kompute/Manager.hpp
+++ b/src/include/kompute/Manager.hpp
@ -1,12 +1,13 @@
 #pragma once

+#include <set>
 #include <unordered_map>

 #include "kompute/Core.hpp"

 #include "kompute/Sequence.hpp"

-#include "kompute/operations/OpTensorCreate.hpp"
+#include "kompute/operations/OpTensorSyncDevice.hpp"

 #define KP_DEFAULT_SESSION "DEFAULT"

@ -63,23 +64,12 @@ class Manager
     *
     * @param sequenceName The name for the named sequence to be retrieved or
     * created
+     * @param queueIndex The queue to use from the available queues
     * @return Shared pointer to the manager owned sequence resource
     */
-    std::shared_ptr<Sequence> getOrCreateManagedSequence(
-      std::string sequenceName);
-
-    /**
-     * Create a new managed Kompute sequence so it's available within the
-     * manager.
-     *
-     * @param sequenceName The name for the named sequence to be created, if
-     * empty then default indexed value is used
-     * @param queueIndex The queue to use from the available queues
-     * @return Weak pointer to the manager owned sequence resource
-     */
-    std::shared_ptr<Sequence> createManagedSequence(
-      std::string sequenceName = "",
-      uint32_t queueIndex = 0);
+    std::shared_ptr<Sequence> sequence(
+            std::string sequenceName = KP_DEFAULT_SESSION,
+            uint32_t queueIndex = 0);

    /**
     * Function that evaluates operation against named sequence.
@ -96,7 +86,7 @@ class Manager
    {
        SPDLOG_DEBUG("Kompute Manager evalOp triggered");
        std::shared_ptr<kp::Sequence> sq =
-          this->getOrCreateManagedSequence(sequenceName);
+          this->sequence(sequenceName);

        SPDLOG_DEBUG("Kompute Manager evalOp running sequence BEGIN");
        sq->begin();
@ -126,10 +116,8 @@ class Manager
    {
        SPDLOG_DEBUG("Kompute Manager evalOp Default triggered");
        this->mCurrentSequenceIndex++;
-        this->evalOp<T>(tensors,
-                        KP_DEFAULT_SESSION +
-                          std::to_string(this->mCurrentSequenceIndex),
-                        std::forward<TArgs>(params)...);
+        this->evalOp<T>(
+          tensors, KP_DEFAULT_SESSION, std::forward<TArgs>(params)...);
    }

    /**
@ -148,7 +136,7 @@ class Manager
        SPDLOG_DEBUG("Kompute Manager evalOpAsync triggered");

        std::shared_ptr<kp::Sequence> sq =
-          this->getOrCreateManagedSequence(sequenceName);
+          this->sequence(sequenceName);

        SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence BEGIN");
        sq->begin();
@ -179,10 +167,8 @@ class Manager
    {
        SPDLOG_DEBUG("Kompute Manager evalOpAsyncDefault triggered");
        this->mCurrentSequenceIndex++;
-        this->evalOpAsync<T>(tensors,
-                             KP_DEFAULT_SESSION +
-                               std::to_string(this->mCurrentSequenceIndex),
-                             std::forward<TArgs>(params)...);
+        this->evalOpAsync<T>(
+          tensors, KP_DEFAULT_SESSION, std::forward<TArgs>(params)...);
    }

    /**
@ -223,36 +209,98 @@ class Manager
    void evalOpAwaitDefault(uint64_t waitFor = UINT64_MAX)
    {
        SPDLOG_DEBUG("Kompute Manager evalOpAwaitDefault triggered");
-        this->evalOpAwait(KP_DEFAULT_SESSION +
-                            std::to_string(this->mCurrentSequenceIndex),
-                          waitFor);
+        this->evalOpAwait(KP_DEFAULT_SESSION, waitFor);
    }

    /**
     * Function that simplifies the common workflow of tensor creation and
     * initialization. It will take the constructor parameters for a Tensor
-     * and will will us it to create a new Tensor and then create it using
-     * the OpCreateTensor command.
+     * and will will us it to create a new Tensor and then create it. The
+     * tensor memory will then be managed and owned by the manager.
     *
     * @param data The data to initialize the tensor with
     * @param tensorType The type of tensor to initialize
+     * @param syncDataToGPU Whether to sync the data to GPU memory
     * @returns Initialized Tensor with memory Syncd to GPU device
     */
-    std::shared_ptr<Tensor> buildTensor(
+    std::shared_ptr<Tensor> tensor(
      const std::vector<float>& data,
-      Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
+      Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice,
+      bool syncDataToGPU = true)
    {
-        SPDLOG_DEBUG("Kompute Manager createInitTensor triggered");
+        SPDLOG_DEBUG("Kompute Manager tensor triggered");

        SPDLOG_DEBUG("Kompute Manager creating new tensor shared ptr");
        std::shared_ptr<Tensor> tensor =
          std::make_shared<Tensor>(kp::Tensor(data, tensorType));

-        this->evalOpDefault<OpTensorCreate>({ tensor });
+        tensor->init(this->mPhysicalDevice, this->mDevice);
+
+        if (syncDataToGPU) {
+            this->evalOpDefault<OpTensorSyncDevice>({ tensor });
+        }
+        this->mManagedTensors.insert(tensor);

        return tensor;
    }

+    /**
+     * Function that simplifies the common workflow of tensor initialisation. It
+     * will take the constructor parameters for a Tensor and will will us it to
+     * create a new Tensor. The tensor memory will then be managed and owned by
+     * the manager.
+     *
+     * @param tensors Array of tensors to rebuild
+     * @param syncDataToGPU Whether to sync the data to GPU memory
+     * @returns Initialized Tensor with memory Syncd to GPU device
+     */
+    void rebuild(std::vector<std::shared_ptr<kp::Tensor>> tensors,
+                        bool syncDataToGPU = true)
+    {
+        SPDLOG_DEBUG("Kompute Manager rebuild triggered");
+        for (std::shared_ptr<Tensor> tensor : tensors) {
+
+            // False syncData to run all tensors at once instead one by one
+            this->rebuild(tensor, false);
+        }
+
+        if (syncDataToGPU) {
+            this->evalOpDefault<OpTensorSyncDevice>(tensors);
+        }
+    }
+
+    /**
+     * Function that simplifies the common workflow of tensor initialisation. It
+     * will take the constructor parameters for a Tensor and will will us it to
+     * create a new Tensor. The tensor memory will then be managed and owned by
+     * the manager.
+     *
+     * @param tensors Single tensor to rebuild
+     * @param syncDataToGPU Whether to sync the data to GPU memory
+     * @returns Initialized Tensor with memory Syncd to GPU device
+     */
+    void rebuild(std::shared_ptr<kp::Tensor> tensor,
+                        bool syncDataToGPU = true)
+    {
+        SPDLOG_DEBUG("Kompute Manager rebuild Tensor triggered");
+
+        if (tensor->isInit()) {
+            tensor->freeMemoryDestroyGPUResources();
+        }
+
+        tensor->init(this->mPhysicalDevice, this->mDevice);
+
+        std::set<std::shared_ptr<Tensor>>::iterator it =
+          this->mManagedTensors.find(tensor);
+        if (it == this->mManagedTensors.end()) {
+            this->mManagedTensors.insert(tensor);
+        }
+
+        if (syncDataToGPU) {
+            this->evalOpDefault<OpTensorSyncDevice>({ tensor });
+        }
+    }
+
  private:
    // -------------- OPTIONALLY OWNED RESOURCES
    std::shared_ptr<vk::Instance> mInstance = nullptr;
@ -263,6 +311,8 @@ class Manager
    bool mFreeDevice = false;

    // -------------- ALWAYS OWNED RESOURCES
+    std::set<std::shared_ptr<Tensor>> mManagedTensors;
+
    std::unordered_map<std::string, std::shared_ptr<Sequence>>
      mManagedSequences;

--- a/src/include/kompute/operations/OpBase.hpp
+++ b/src/include/kompute/operations/OpBase.hpp
@ -31,13 +31,11 @@ class OpBase
     * @param device Vulkan logical device for passing to Algorithm
     * @param commandBuffer Vulkan Command Buffer to record commands into
     * @param tensors Tensors that are to be used in this operation
-     * @param freeTensors Whether operation manages the memory of the Tensors
     */
    OpBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
           std::shared_ptr<vk::Device> device,
           std::shared_ptr<vk::CommandBuffer> commandBuffer,
-           std::vector<std::shared_ptr<Tensor>>& tensors,
-           bool freeTensors)
+           std::vector<std::shared_ptr<Tensor>>& tensors)
    {
        SPDLOG_DEBUG("Compute OpBase constructor with params");

@ -45,14 +43,12 @@ class OpBase
        this->mDevice = device;
        this->mCommandBuffer = commandBuffer;
        this->mTensors = tensors;
-        this->mFreeTensors = freeTensors;
    }

    /**
     * Default destructor for OpBase class. This OpBase destructor class should
     * always be called to destroy and free owned resources unless it is
-     * intended to destroy the resources in the parent class. This can be done
-     * by passing the mFreeTensors=false.
+     * intended to destroy the resources in the parent class.
     */
    virtual ~OpBase()
    {
--- a/src/include/kompute/operations/OpTensorCreate.hpp
+++ b/src/include/kompute/operations/OpTensorCreate.hpp
@ -1,74 +0,0 @@
-#pragma once
-
-#include "kompute/Core.hpp"
-
-#include "kompute/Tensor.hpp"
-
-#include "kompute/operations/OpBase.hpp"
-
-namespace kp {
-
-/**
-    Operation that creates tensor and manages the memory of the components
-   created
-*/
-class OpTensorCreate : public OpBase
-{
-  public:
-    OpTensorCreate();
-
-    /**
-     * Default constructor with parameters that provides the bare minimum
-     * requirements for the operations to be able to create and manage their
-     * sub-components.
-     *
-     * @param physicalDevice Vulkan physical device used to find device queues
-     * @param device Vulkan logical device for passing to Algorithm
-     * @param commandBuffer Vulkan Command Buffer to record commands into
-     * @param tensors Tensors that will be used to create in operation.
-     * @param freeTensors Whether operation manages the memory of the Tensors
-     */
-    OpTensorCreate(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
-                   std::shared_ptr<vk::Device> device,
-                   std::shared_ptr<vk::CommandBuffer> commandBuffer,
-                   std::vector<std::shared_ptr<Tensor>> tensors);
-
-    /**
-     * Default destructor which in this case expects the parent class to free
-     * the tensors
-     */
-    ~OpTensorCreate() override;
-
-    /**
-     * In charge of initialising the primary Tensor as well as the staging
-     * tensor as required. It will only initialise a staging tensor if the
-     * Primary tensor is of type Device. For staging tensors it performs a 
-     * mapDataIntoHostMemory which would perform immediately as opposed to 
-     * on sequence eval/submission.
-     */
-    void init() override;
-
-    /**
-     * Record runs the core actions to create the tensors. For device tensors
-     * it records a copyCommand to move the data from the staging tensor to the 
-     * device tensor. The mapping for staging tensors happens in the init function
-     * not in the record function.
-     */
-    void record() override;
-
-    /**
-     * Does not perform any preEval commands.
-     */
-    virtual void preEval() override;
-
-    /**
-     * Performs a copy back into the main tensor to ensure that the data
-     * contained is the one that is now being stored in the GPU.
-     */
-    virtual void postEval() override;
-
-
-  private:
-};
-
-} // End namespace kp
--- a/test/TestAsyncOperations.cpp
+++ b/test/TestAsyncOperations.cpp
@ -49,7 +49,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
        inputsSyncB.push_back(std::make_shared<kp::Tensor>(kp::Tensor(data)));
    }

-    mgr.evalOpDefault<kp::OpTensorCreate>(inputsSyncB);
+    mgr.rebuild(inputsSyncB);

    auto startSync = std::chrono::high_resolution_clock::now();

@ -77,10 +77,10 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
        inputsAsyncB.push_back(std::make_shared<kp::Tensor>(kp::Tensor(data)));
    }

-    mgrAsync.evalOpDefault<kp::OpTensorCreate>(inputsAsyncB);
+    mgrAsync.rebuild(inputsAsyncB);

    for (uint32_t i = 0; i < numParallel; i++) {
-        mgrAsync.createManagedSequence("async" + std::to_string(i), i);
+        mgrAsync.sequence("async" + std::to_string(i), i);
    }

    auto startAsync = std::chrono::high_resolution_clock::now();
@ -146,10 +146,10 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution)
    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(data) };
    std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(data) };

-    mgr.createManagedSequence("asyncOne");
-    mgr.createManagedSequence("asyncTwo");
+    mgr.sequence("asyncOne");
+    mgr.sequence("asyncTwo");

-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
+    mgr.rebuild({ tensorA, tensorB });

    mgr.evalOpAsync<kp::OpAlgoBase>(
      { tensorA }, "asyncOne", std::vector<char>(shader.begin(), shader.end()));
--- a/test/TestLogisticRegression.cpp
+++ b/test/TestLogisticRegression.cpp
@ -32,14 +32,9 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegression)
    {
        kp::Manager mgr;

-        std::shared_ptr<kp::Sequence> sqTensor = mgr.createManagedSequence();
+        mgr.rebuild(params);

-        sqTensor->begin();
-        sqTensor->record<kp::OpTensorCreate>(params);
-        sqTensor->end();
-        sqTensor->eval();
-
-        std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
+        std::shared_ptr<kp::Sequence> sq = mgr.sequence();

        // Record op algo base
        sq->begin();
@ -122,14 +117,9 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegressionManualCopy)
    {
        kp::Manager mgr;

-        std::shared_ptr<kp::Sequence> sqTensor = mgr.createManagedSequence();
+        mgr.rebuild(params);

-        sqTensor->begin();
-        sqTensor->record<kp::OpTensorCreate>(params);
-        sqTensor->end();
-        sqTensor->eval();
-
-        std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
+        std::shared_ptr<kp::Sequence> sq = mgr.sequence();

        // Record op algo base
        sq->begin();
--- a/test/TestManager.cpp
+++ b/test/TestManager.cpp
@ -8,14 +8,14 @@ TEST(TestManager, EndToEndOpMultFlow)
    kp::Manager mgr;

    std::shared_ptr<kp::Tensor> tensorLHS{ new kp::Tensor({ 0, 1, 2 }) };
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorLHS });
+    mgr.rebuild({ tensorLHS });

    std::shared_ptr<kp::Tensor> tensorRHS{ new kp::Tensor({ 2, 4, 6 }) };
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorRHS });
+    mgr.rebuild({ tensorRHS });

    std::shared_ptr<kp::Tensor> tensorOutput{ new kp::Tensor({ 0, 0, 0 }) };

-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorOutput });
+    mgr.rebuild({ tensorOutput });

    mgr.evalOpDefault<kp::OpMult>({ tensorLHS, tensorRHS, tensorOutput });

@ -36,15 +36,13 @@ TEST(TestManager, OpMultSequenceFlow)
    kp::Manager mgr;

    {
+        mgr.rebuild({ tensorLHS, tensorRHS, tensorOutput });
+
        std::shared_ptr<kp::Sequence> sq =
-          mgr.getOrCreateManagedSequence("newSequence");
+          mgr.sequence("newSequence");

        sq->begin();

-        sq->record<kp::OpTensorCreate>({ tensorLHS });
-        sq->record<kp::OpTensorCreate>({ tensorRHS });
-        sq->record<kp::OpTensorCreate>({ tensorOutput });
-
        sq->record<kp::OpMult>({ tensorLHS, tensorRHS, tensorOutput });

        sq->record<kp::OpTensorSyncLocal>({ tensorOutput });
@ -61,16 +59,16 @@ TEST(TestManager, TestMultipleSequences)
    kp::Manager mgr;

    std::shared_ptr<kp::Sequence> sqOne =
-      mgr.getOrCreateManagedSequence("sqOne");
+      mgr.sequence("sqOne");

    std::shared_ptr<kp::Sequence> sqTwo =
-      mgr.getOrCreateManagedSequence("sqTwo");
+      mgr.sequence("sqTwo");

    std::shared_ptr<kp::Sequence> sqOneRef =
-      mgr.getOrCreateManagedSequence("sqOne");
+      mgr.sequence("sqOne");

    std::shared_ptr<kp::Sequence> sqTwoRef =
-      mgr.getOrCreateManagedSequence("sqTwo");
+      mgr.sequence("sqTwo");

    EXPECT_EQ(sqOne, sqOneRef);
    EXPECT_NE(sqTwo, sqOneRef);
@ -90,17 +88,17 @@ TEST(TestManager, TestMultipleTensorsAtOnce)
    kp::Manager mgr;

    std::shared_ptr<kp::Sequence> sq =
-      mgr.getOrCreateManagedSequence("newSequence");
+      mgr.sequence("newSequence");

    {
-        sq->begin();
-
-        sq->record<kp::OpTensorCreate>({ tensorLHS, tensorRHS, tensorOutput });
+        mgr.rebuild({ tensorLHS, tensorRHS, tensorOutput });

        EXPECT_TRUE(tensorLHS->isInit());
        EXPECT_TRUE(tensorRHS->isInit());
        EXPECT_TRUE(tensorOutput->isInit());

+        sq->begin();
+
        sq->record<kp::OpMult>({ tensorLHS, tensorRHS, tensorOutput });

        sq->record<kp::OpTensorSyncLocal>({ tensorOutput });
@ -116,8 +114,8 @@ TEST(TestManager, TestCreateInitTensor)
 {
    kp::Manager mgr;

-    std::shared_ptr<kp::Tensor> tensorA = mgr.buildTensor({ 0, 1, 2 });
-    std::shared_ptr<kp::Tensor> tensorB = mgr.buildTensor({ 0, 0, 0 });
+    std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 1, 2 });
+    std::shared_ptr<kp::Tensor> tensorB = mgr.tensor({ 0, 0, 0 });

    mgr.evalOpDefault<kp::OpTensorCopy>({ tensorA, tensorB });

@ -126,7 +124,7 @@ TEST(TestManager, TestCreateInitTensor)
    EXPECT_EQ(tensorB->data(), std::vector<float>({ 0, 1, 2 }));

    std::shared_ptr<kp::Tensor> tensorC =
-      mgr.buildTensor({ 0, 0, 0 }, kp::Tensor::TensorTypes::eHost);
+      mgr.tensor({ 0, 0, 0 }, kp::Tensor::TensorTypes::eHost);

    mgr.evalOpDefault<kp::OpTensorCopy>({ tensorA, tensorC });

--- a/test/TestMultipleAlgoExecutions.cpp
+++ b/test/TestMultipleAlgoExecutions.cpp
@ -19,14 +19,14 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord)
          pa[index] = pa[index] + 1;
      })");

+    mgr.rebuild({ tensorA });
+
    std::shared_ptr<kp::Sequence> sq =
-      mgr.getOrCreateManagedSequence("newSequence");
+      mgr.sequence("newSequence");

    {
        sq->begin();

-        sq->record<kp::OpTensorCreate>({ tensorA });
-
        sq->record<kp::OpAlgoBase>(
          { tensorA }, std::vector<char>(shader.begin(), shader.end()));
        sq->record<kp::OpAlgoBase>(
@ -58,13 +58,15 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords)
          pa[index] = pa[index] + 1;
      })");

-    std::shared_ptr<kp::Sequence> sqTensor = mgr.createManagedSequence();
+    mgr.rebuild({ tensorA }, false);

-    std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
+    std::shared_ptr<kp::Sequence> sqTensor = mgr.sequence();
+
+    std::shared_ptr<kp::Sequence> sq = mgr.sequence();

    // First create the tensor in a separate sequence
    sqTensor->begin();
-    sqTensor->record<kp::OpTensorCreate>({ tensorA });
+    sqTensor->record<kp::OpTensorSyncDevice>({ tensorA });
    sqTensor->end();
    sqTensor->eval();

@ -111,24 +113,11 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences)
          pa[index] = pa[index] + 1;
      })");

-    {
-        std::shared_ptr<kp::Sequence> sq =
-          mgr.getOrCreateManagedSequence("newSequence");
-
-        sq->begin();
-
-        sq->record<kp::OpTensorCreate>({ tensorA });
-
-        sq->record<kp::OpAlgoBase>(
-          { tensorA }, std::vector<char>(shader.begin(), shader.end()));
-
-        sq->end();
-        sq->eval();
-    }
+    mgr.rebuild({ tensorA });

    {
        std::shared_ptr<kp::Sequence> sq =
-          mgr.getOrCreateManagedSequence("newSequence2");
+          mgr.sequence("newSequence");

        sq->begin();

@ -141,7 +130,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences)

    {
        std::shared_ptr<kp::Sequence> sq =
-          mgr.getOrCreateManagedSequence("newSequence3");
+          mgr.sequence("newSequence2");

        sq->begin();

@ -154,7 +143,20 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences)

    {
        std::shared_ptr<kp::Sequence> sq =
-          mgr.getOrCreateManagedSequence("newSequence5");
+          mgr.sequence("newSequence3");
+
+        sq->begin();
+
+        sq->record<kp::OpAlgoBase>(
+          { tensorA }, std::vector<char>(shader.begin(), shader.end()));
+
+        sq->end();
+        sq->eval();
+    }
+
+    {
+        std::shared_ptr<kp::Sequence> sq =
+          mgr.sequence("newSequence5");

        sq->begin();

@ -183,13 +185,15 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)
          pa[index] = pa[index] + 1;
      })");

+    mgr.rebuild({ tensorA }, false);
+
    {
        std::shared_ptr<kp::Sequence> sq =
-          mgr.getOrCreateManagedSequence("newSequence");
+          mgr.sequence("newSequence");

        sq->begin();

-        sq->record<kp::OpTensorCreate>({ tensorA });
+        sq->record<kp::OpTensorSyncDevice>({ tensorA });

        sq->end();
        sq->eval();
@ -197,7 +201,7 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)

    {
        std::shared_ptr<kp::Sequence> sq =
-          mgr.getOrCreateManagedSequence("newSequence2");
+          mgr.sequence("newSequence2");

        sq->begin();

@ -213,7 +217,7 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)

    {
        std::shared_ptr<kp::Sequence> sq =
-          mgr.getOrCreateManagedSequence("newSequence3");
+          mgr.sequence("newSequence3");

        sq->begin();

@ -238,7 +242,7 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrOpCreate)
    std::shared_ptr<kp::Tensor> tensorInB{ new kp::Tensor({ 0.0, 1.0, 2.0 }) };
    std::shared_ptr<kp::Tensor> tensorOut{ new kp::Tensor({ 0.0, 0.0, 0.0 }) };

-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorInA, tensorInB, tensorOut });
+    mgr.rebuild({ tensorInA, tensorInB, tensorOut });

    std::string shader(R"(
        // The version to use 
@ -273,9 +277,12 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrMgrCreate)

    kp::Manager mgr;

-    auto tensorInA = mgr.buildTensor({ 2.0, 4.0, 6.0 });
-    auto tensorInB = mgr.buildTensor({ 0.0, 1.0, 2.0 });
-    auto tensorOut = mgr.buildTensor({ 0.0, 0.0, 0.0 });
+    auto tensorInA = mgr.tensor(
+      { 2.0, 4.0, 6.0 }, kp::Tensor::TensorTypes::eDevice, false);
+    auto tensorInB = mgr.tensor(
+      { 0.0, 1.0, 2.0 }, kp::Tensor::TensorTypes::eDevice, false);
+    auto tensorOut = mgr.tensor(
+      { 0.0, 0.0, 0.0 }, kp::Tensor::TensorTypes::eDevice, false);

    std::string shader(R"(
        // The version to use 
@ -296,6 +303,9 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrMgrCreate)
        }
      )");

+    mgr.evalOpDefault<kp::OpTensorSyncDevice>(
+      { tensorInA, tensorInB, tensorOut });
+
    mgr.evalOpDefault<kp::OpAlgoBase>(
      { tensorInA, tensorInB, tensorOut },
      std::vector<char>(shader.begin(), shader.end()));
@ -304,3 +314,39 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrMgrCreate)

    EXPECT_EQ(tensorOut->data(), std::vector<float>({ 0.0, 4.0, 12.0 }));
 }
+
+TEST(TestMultipleAlgoExecutions, SequenceAlgoDestroyOutsideManagerScope)
+{
+    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
+
+    std::string shader(R"(
+      #version 450
+      layout (local_size_x = 1) in;
+      layout(set = 0, binding = 0) buffer a { float pa[]; };
+      void main() {
+          uint index = gl_GlobalInvocationID.x;
+          pa[index] = pa[index] + 1;
+      })");
+
+    {
+        std::shared_ptr<kp::Sequence> sq = nullptr;
+
+        {
+            kp::Manager mgr;
+
+            mgr.rebuild({ tensorA });
+
+            sq = mgr.sequence();
+
+            sq->begin();
+            sq->record<kp::OpAlgoBase>(
+              { tensorA }, std::vector<char>(shader.begin(), shader.end()));
+            sq->end();
+
+            sq->eval();
+
+            mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
+        }
+    }
+    EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
+}
--- a/test/TestOpAlgoLoopsPassingData.cpp
+++ b/test/TestOpAlgoLoopsPassingData.cpp
@ -30,13 +30,15 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies)
        }
    )");

+    mgr.rebuild({ tensorA, tensorB }, false);
+
    {
        std::shared_ptr<kp::Sequence> sq =
-          mgr.getOrCreateManagedSequence("default");
+          mgr.sequence("default");

        sq->begin();

-        sq->record<kp::OpTensorCreate>({ tensorA, tensorB });
+        sq->record<kp::OpTensorSyncDevice>({ tensorA, tensorB });

        sq->end();

@ -45,7 +47,7 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies)

    {
        std::shared_ptr<kp::Sequence> sq =
-          mgr.getOrCreateManagedSequence("run");
+          mgr.sequence("run");

        sq->begin();

@ -63,7 +65,7 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies)

    {
        std::shared_ptr<kp::Sequence> sq =
-          mgr.getOrCreateManagedSequence("export");
+          mgr.sequence("export");

        sq->begin();

--- a/test/TestOpShadersFromStringAndFile.cpp
+++ b/test/TestOpShadersFromStringAndFile.cpp
@ -11,7 +11,7 @@ TEST(TestOpAlgoBase, ShaderRawDataFromConstructor)

    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
    std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
+    mgr.rebuild({ tensorA, tensorB });

    std::string shader(R"(
        #version 450
@ -43,7 +43,7 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromConstructor)

    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
    std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
+    mgr.rebuild({ tensorA, tensorB });

    mgr.evalOpDefault<kp::OpAlgoBase>(
      { tensorA, tensorB },
@ -65,7 +65,7 @@ TEST(TestOpAlgoBase, ShaderRawDataFromFile)

    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
    std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
+    mgr.rebuild({ tensorA, tensorB });

    mgr.evalOpDefault<kp::OpAlgoBase>(
      { tensorA, tensorB }, "test/shaders/glsl/test_op_custom_shader.comp");
@ -82,7 +82,7 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromFile)

    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
    std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
+    mgr.rebuild({ tensorA, tensorB });

    mgr.evalOpDefault<kp::OpAlgoBase>(
      { tensorA, tensorB }, "test/shaders/glsl/test_op_custom_shader.comp.spv");
--- a/test/TestOpTensorCopy.cpp
+++ b/test/TestOpTensorCopy.cpp
@ -8,13 +8,13 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensor)

    kp::Manager mgr;

-    std::vector<float> testVecA{ 9, 8, 7 };
+    std::vector<float> testVecA{ 1, 2, 3 };
    std::vector<float> testVecB{ 0, 0, 0 };

    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
    std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };

-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
+    mgr.rebuild({ tensorA, tensorB });

    EXPECT_TRUE(tensorA->isInit());
    EXPECT_TRUE(tensorB->isInit());
@ -33,7 +33,7 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti)

    kp::Manager mgr;

-    std::vector<float> testVecA{ 9, 8, 7 };
+    std::vector<float> testVecA{ 2, 3, 4 };
    std::vector<float> testVecB{ 0, 0, 0 };
    std::vector<float> testVecC{ 0, 0, 0 };

@ -41,7 +41,7 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti)
    std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
    std::shared_ptr<kp::Tensor> tensorC{ new kp::Tensor(testVecC) };

-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB, tensorC });
+    mgr.rebuild({ tensorA, tensorB, tensorC });

    EXPECT_TRUE(tensorA->isInit());
    EXPECT_TRUE(tensorB->isInit());
@ -63,14 +63,17 @@ TEST(TestOpTensorCopy, CopyDeviceToHostTensor)

    kp::Manager mgr;

-    std::vector<float> testVecA{ 9, 8, 7 };
+    std::vector<float> testVecA{ 3, 4, 5 };
    std::vector<float> testVecB{ 0, 0, 0 };

    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
    std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(
      testVecB, kp::Tensor::TensorTypes::eHost) };

-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
+    mgr.rebuild({ tensorA, tensorB }, false);
+
+    //  Only calling sync on device type tensor
+    mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorA });

    EXPECT_TRUE(tensorA->isInit());
    EXPECT_TRUE(tensorB->isInit());
@ -89,14 +92,20 @@ TEST(TestOpTensorCopy, CopyHostToDeviceTensor)

    kp::Manager mgr;

-    std::vector<float> testVecA{ 9, 8, 7 };
+    std::vector<float> testVecA{ 4, 5, 6 };
    std::vector<float> testVecB{ 0, 0, 0 };

    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(
      testVecA, kp::Tensor::TensorTypes::eHost) };
    std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };

-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
+    mgr.rebuild({ tensorA, tensorB }, false);
+
+    // Manually copy data into host memory of Tensor
+    tensorA->mapDataIntoHostMemory();
+
+    //  Only calling sync on device type tensor
+    mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorB });

    EXPECT_TRUE(tensorA->isInit());
    EXPECT_TRUE(tensorB->isInit());
@ -115,7 +124,7 @@ TEST(TestOpTensorCopy, CopyHostToHostTensor)

    kp::Manager mgr;

-    std::vector<float> testVecA{ 9, 8, 7 };
+    std::vector<float> testVecA{ 5, 6, 7 };
    std::vector<float> testVecB{ 0, 0, 0 };

    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(
@ -123,7 +132,7 @@ TEST(TestOpTensorCopy, CopyHostToHostTensor)
    std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(
      testVecB, kp::Tensor::TensorTypes::eHost) };

-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
+    mgr.rebuild({ tensorA, tensorB });

    EXPECT_TRUE(tensorA->isInit());
    EXPECT_TRUE(tensorB->isInit());
@ -142,12 +151,12 @@ TEST(TestOpTensorCopy, SingleTensorShouldFail)

    kp::Manager mgr;

-    std::vector<float> testVecA{ 9, 8, 7 };
+    std::vector<float> testVecA{ 6, 7, 8 };

    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(
      testVecA, kp::Tensor::TensorTypes::eHost) };

-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA });
+    mgr.rebuild({ tensorA }, false);

    EXPECT_TRUE(tensorA->isInit());

--- a/test/TestOpTensorCreate.cpp
+++ b/test/TestOpTensorCreate.cpp
@ -5,20 +5,19 @@

 TEST(TestOpTensorCreate, CreateSingleTensorSingleOp)
 {
-
-    kp::Manager mgr;
-
    std::vector<float> testVecA{ 9, 8, 7 };
-
    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };

-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA });
+    {
+        kp::Manager mgr;

-    EXPECT_TRUE(tensorA->isInit());
+        mgr.rebuild({ tensorA });

-    EXPECT_EQ(tensorA->data(), testVecA);
+        EXPECT_TRUE(tensorA->isInit());
+
+        EXPECT_EQ(tensorA->data(), testVecA);
+    }

-    tensorA->freeMemoryDestroyGPUResources();
    EXPECT_FALSE(tensorA->isInit());
 }

@ -33,7 +32,7 @@ TEST(TestOpTensorCreate, CreateMultipleTensorSingleOp)
    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
    std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };

-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
+    mgr.rebuild({ tensorA, tensorB });

    EXPECT_TRUE(tensorA->isInit());
    EXPECT_TRUE(tensorB->isInit());
@ -53,8 +52,8 @@ TEST(TestOpTensorCreate, CreateMultipleTensorMultipleOp)
    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
    std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };

-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA });
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorB });
+    mgr.rebuild({ tensorA });
+    mgr.rebuild({ tensorB });

    EXPECT_TRUE(tensorA->isInit());
    EXPECT_TRUE(tensorB->isInit());
@ -63,7 +62,7 @@ TEST(TestOpTensorCreate, CreateMultipleTensorMultipleOp)
    EXPECT_EQ(tensorB->data(), testVecB);
 }

-TEST(TestOpTensorCreate, ManageTensorMemoryWhenOpTensorCreateDestroyed)
+TEST(TestOpTensorCreate, TestTensorMemoryManagedByManagerDestroyed)
 {

    std::vector<float> testVecA{ 9, 8, 7 };
@ -74,8 +73,8 @@ TEST(TestOpTensorCreate, ManageTensorMemoryWhenOpTensorCreateDestroyed)

    {
        kp::Manager mgr;
-        mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA });
-        mgr.evalOpDefault<kp::OpTensorCreate>({ tensorB });
+        mgr.rebuild({ tensorA });
+        mgr.rebuild({ tensorB });

        EXPECT_TRUE(tensorA->isInit());
        EXPECT_TRUE(tensorB->isInit());
@ -88,6 +87,32 @@ TEST(TestOpTensorCreate, ManageTensorMemoryWhenOpTensorCreateDestroyed)
    EXPECT_FALSE(tensorB->isInit());
 }

+TEST(TestOpTensorCreate, TestTensorMemoryManagedByManagerNOTDestroyed)
+{
+
+    std::vector<float> testVecA{ 9, 8, 7 };
+    std::vector<float> testVecB{ 6, 5, 4 };
+
+    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
+    std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
+
+    kp::Manager mgr;
+
+    {
+        mgr.rebuild({ tensorA });
+        mgr.rebuild({ tensorB });
+
+        EXPECT_TRUE(tensorA->isInit());
+        EXPECT_TRUE(tensorB->isInit());
+
+        EXPECT_EQ(tensorA->data(), testVecA);
+        EXPECT_EQ(tensorB->data(), testVecB);
+    }
+
+    EXPECT_TRUE(tensorA->isInit());
+    EXPECT_TRUE(tensorB->isInit());
+}
+
 TEST(TestOpTensorCreate, NoErrorIfTensorFreedBefore)
 {

@ -99,8 +124,8 @@ TEST(TestOpTensorCreate, NoErrorIfTensorFreedBefore)

    kp::Manager mgr;

-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA });
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorB });
+    mgr.rebuild({ tensorA });
+    mgr.rebuild({ tensorB });

    EXPECT_TRUE(tensorA->isInit());
    EXPECT_TRUE(tensorB->isInit());
@ -123,7 +148,7 @@ TEST(TestOpTensorCreate, ExceptionOnZeroSizeTensor)
    kp::Manager mgr;

    try {
-        mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA });
+        mgr.rebuild({ tensorA });
    } catch (const std::runtime_error& err) {
        // check exception
        ASSERT_TRUE(std::string(err.what()).find("zero-sized") !=
--- a/test/TestOpTensorSync.cpp
+++ b/test/TestOpTensorSync.cpp
@ -13,7 +13,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemorySingleTensor)

    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecPreA) };

-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA });
+    mgr.rebuild({ tensorA }, false);

    EXPECT_TRUE(tensorA->isInit());

@ -37,7 +37,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemoryMultiTensor)
    std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
    std::shared_ptr<kp::Tensor> tensorC{ new kp::Tensor({ 0, 0, 0 }) };

-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB, tensorC });
+    mgr.rebuild({ tensorA, tensorB, tensorC }, false);

    EXPECT_TRUE(tensorA->isInit());
    EXPECT_TRUE(tensorB->isInit());
--- a/test/TestSequence.cpp
+++ b/test/TestSequence.cpp
@ -9,7 +9,7 @@ TEST(TestSequence, CmdBufSequenceBeginEnd)

    {
        std::shared_ptr<kp::Sequence> sq =
-          mgr.getOrCreateManagedSequence("newSequence");
+          mgr.sequence("newSequence");

        EXPECT_TRUE(sq->eval());
        EXPECT_TRUE(!sq->isRecording());
@ -32,10 +32,11 @@ TEST(TestSequence, SequenceDestructorViaManager)
    {
        kp::Manager mgr;

-        sq = mgr.getOrCreateManagedSequence("newSequence");
+        sq = mgr.sequence("newSequence");

        EXPECT_TRUE(sq->isInit());
    }

    EXPECT_FALSE(sq->isInit());
 }
+
--- a/test/TestTensor.cpp
+++ b/test/TestTensor.cpp
@ -23,11 +23,11 @@ TEST(TestTensor, CopyFromHostData)

    kp::Manager mgr;

-    if (std::shared_ptr<kp::Sequence> sq =
-          mgr.getOrCreateManagedSequence("new")) {
-        sq->begin();
+    mgr.rebuild({ tensorA, tensorB });

-        sq->record<kp::OpTensorCreate>({ tensorA, tensorB });
+    if (std::shared_ptr<kp::Sequence> sq =
+          mgr.sequence("new")) {
+        sq->begin();

        sq->record<kp::OpTensorCopy>({ tensorA, tensorB });