Initial checkpoint with reasonable workflow

2021-02-24 08:39:09 +00:00 · 2021-02-24 08:39:09 +00:00 · 9aae5d69db
commit 9aae5d69db
parent 5db9abd06e
46 changed files with 1158 additions and 695 deletions
--- a/python/src/docstrings.hpp
+++ b/python/src/docstrings.hpp
@ -266,23 +266,23 @@ The type of tensor to initialize @param syncDataToGPU Whether to sync
 the data to GPU memory @returns Initialized Tensor with memory Syncd
 to GPU device)doc";

-static const char *__doc_kp_OpAlgoBase =
+static const char *__doc_kp_OpAlgoCreate =
 R"doc(Operation that provides a general abstraction that simplifies the use
 of algorithm and parameter components which can be used with shaders.
 By default it enables the user to provide a dynamic number of tensors
 which are then passed as inputs.)doc";

-static const char *__doc_kp_OpAlgoBase_KomputeWorkgroup = R"doc()doc";
+static const char *__doc_kp_OpAlgoCreate_KomputeWorkgroup = R"doc()doc";

-static const char *__doc_kp_OpAlgoBase_KomputeWorkgroup_x = R"doc()doc";
+static const char *__doc_kp_OpAlgoCreate_KomputeWorkgroup_x = R"doc()doc";

-static const char *__doc_kp_OpAlgoBase_KomputeWorkgroup_y = R"doc()doc";
+static const char *__doc_kp_OpAlgoCreate_KomputeWorkgroup_y = R"doc()doc";

-static const char *__doc_kp_OpAlgoBase_KomputeWorkgroup_z = R"doc()doc";
+static const char *__doc_kp_OpAlgoCreate_KomputeWorkgroup_z = R"doc()doc";

-static const char *__doc_kp_OpAlgoBase_OpAlgoBase = R"doc(Base constructor, should not be used unless explicitly intended.)doc";
+static const char *__doc_kp_OpAlgoCreate_OpAlgoCreate = R"doc(Base constructor, should not be used unless explicitly intended.)doc";

-static const char *__doc_kp_OpAlgoBase_OpAlgoBase_2 =
+static const char *__doc_kp_OpAlgoCreate_OpAlgoCreate_2 =
 R"doc(Default constructor with parameters that provides the bare minimum
 requirements for the operations to be able to create and manage their
 sub-components.
@ -295,7 +295,7 @@ shaderFilePath Optional parameter to specify the shader to load
 (either in spirv or raw format) @param komputeWorkgroup Optional
 parameter to specify the layout for processing)doc";

-static const char *__doc_kp_OpAlgoBase_OpAlgoBase_3 =
+static const char *__doc_kp_OpAlgoCreate_OpAlgoCreate_3 =
 R"doc(Constructor that enables a file to be passed to the operation with the
 contents of the shader. This can be either in raw format or in
 compiled SPIR-V binary format.
@ -308,7 +308,7 @@ shaderFilePath Parameter to specify the shader to load (either in
 spirv or raw format) @param komputeWorkgroup Optional parameter to
 specify the layout for processing)doc";

-static const char *__doc_kp_OpAlgoBase_OpAlgoBase_4 =
+static const char *__doc_kp_OpAlgoCreate_OpAlgoCreate_4 =
 R"doc(Constructor that enables raw shader data to be passed to the main
 operation which can be either in raw shader glsl code or in compiled
 SPIR-V binary.
@ -321,37 +321,37 @@ shaderDataRaw Optional parameter to specify the shader data either in
 binary or raw form @param komputeWorkgroup Optional parameter to
 specify the layout for processing)doc";

-static const char *__doc_kp_OpAlgoBase_fetchSpirvBinaryData = R"doc()doc";
+static const char *__doc_kp_OpAlgoCreate_fetchSpirvBinaryData = R"doc()doc";

-static const char *__doc_kp_OpAlgoBase_init =
+static const char *__doc_kp_OpAlgoCreate_init =
 R"doc(The init function is responsible for the initialisation of the
 algorithm component based on the parameters specified, and allows for
 extensibility on the options provided. Further dependent classes can
 perform more specific checks such as ensuring tensors provided are
 initialised, etc.)doc";

-static const char *__doc_kp_OpAlgoBase_mAlgorithm = R"doc()doc";
+static const char *__doc_kp_OpAlgoCreate_mAlgorithm = R"doc()doc";

-static const char *__doc_kp_OpAlgoBase_mFreeAlgorithm = R"doc()doc";
+static const char *__doc_kp_OpAlgoCreate_mFreeAlgorithm = R"doc()doc";

-static const char *__doc_kp_OpAlgoBase_mKomputeWorkgroup = R"doc()doc";
+static const char *__doc_kp_OpAlgoCreate_mKomputeWorkgroup = R"doc()doc";

-static const char *__doc_kp_OpAlgoBase_mShaderDataRaw =
+static const char *__doc_kp_OpAlgoCreate_mShaderDataRaw =
 R"doc(< Optional member variable which can be provided to contain either the
 raw shader content or the spirv binary content)doc";

-static const char *__doc_kp_OpAlgoBase_mShaderFilePath =
-R"doc(< Optional member variable which can be provided for the OpAlgoBase to
+static const char *__doc_kp_OpAlgoCreate_mShaderFilePath =
+R"doc(< Optional member variable which can be provided for the OpAlgoCreate to
 find the data automatically and load for processing)doc";

-static const char *__doc_kp_OpAlgoBase_postEval =
+static const char *__doc_kp_OpAlgoCreate_postEval =
 R"doc(Executes after the recorded commands are submitted, and performs a
 copy of the GPU Device memory into the staging buffer so the output
 data can be retrieved.)doc";

-static const char *__doc_kp_OpAlgoBase_preEval = R"doc(Does not perform any preEval commands.)doc";
+static const char *__doc_kp_OpAlgoCreate_preEval = R"doc(Does not perform any preEval commands.)doc";

-static const char *__doc_kp_OpAlgoBase_record =
+static const char *__doc_kp_OpAlgoCreate_record =
 R"doc(This records the commands that are to be sent to the GPU. This
 includes the barriers that ensure the memory has been copied before
 going in and out of the shader, as well as the dispatch operation that
--- a/python/src/main.cpp
+++ b/python/src/main.cpp
@ -133,7 +133,7 @@ PYBIND11_MODULE(kp, m) {
        .def("record_tensor_sync_local", &kp::Sequence::record<kp::OpTensorSyncLocal>,
            "Records operation to sync tensor(s) from GPU memory to local memory")
        .def("record_algo_file", &kp::Sequence::record<
-                                    kp::OpAlgoBase,
+                                    kp::OpAlgoCreate,
                                    const std::string&,
                                    kp::Workgroup,
                                    kp::Constants>,
@ -148,7 +148,7 @@ PYBIND11_MODULE(kp, m) {
                py::buffer_info info(py::buffer(bytes).request());
                const char *data = reinterpret_cast<const char *>(info.ptr);
                size_t length = static_cast<size_t>(info.size);
-                return self.record<kp::OpAlgoBase>(
+                return self.record<kp::OpAlgoCreate>(
                    tensors, std::vector<uint32_t>((uint32_t*)data, (uint32_t*)(data + length)), workgroup, constants);
            },
            "Records an operation using a custom shader provided as spirv bytes",
@ -211,7 +211,7 @@ PYBIND11_MODULE(kp, m) {
        .def("eval_tensor_sync_local_def", &kp::Manager::evalOpDefault<kp::OpTensorSyncLocal>,
            "Evaluates operation to sync tensor(s) from GPU memory to local memory with new anonymous Sequence")
        .def("eval_algo_file_def", &kp::Manager::evalOpDefault<
-                                    kp::OpAlgoBase,
+                                    kp::OpAlgoCreate,
                                    const std::string&,
                                    kp::Workgroup,
                                    kp::Constants>,
@ -226,7 +226,7 @@ PYBIND11_MODULE(kp, m) {
                py::buffer_info info(py::buffer(bytes).request());
                const char *data = reinterpret_cast<const char *>(info.ptr);
                size_t length = static_cast<size_t>(info.size);
-                self.evalOpDefault<kp::OpAlgoBase>(
+                self.evalOpDefault<kp::OpAlgoCreate>(
                    tensors, std::vector<uint32_t>((uint32_t*)data, (uint32_t*)(data + length)), workgroup, constants);
            },
            "Evaluates an operation using a custom shader provided as spirv bytes with new anonymous Sequence",
@ -240,7 +240,7 @@ PYBIND11_MODULE(kp, m) {
        .def("eval_tensor_sync_local", &kp::Manager::evalOp<kp::OpTensorSyncLocal>,
            "Evaluates operation to sync tensor(s) from GPU memory to local memory with explicitly named Sequence")
        .def("eval_algo_file", &kp::Manager::evalOp<
-                                    kp::OpAlgoBase,
+                                    kp::OpAlgoCreate,
                                    const std::string&,
                                    kp::Workgroup,
                                    kp::Constants>,
@ -256,7 +256,7 @@ PYBIND11_MODULE(kp, m) {
                py::buffer_info info(py::buffer(bytes).request());
                const char *data = reinterpret_cast<const char *>(info.ptr);
                size_t length = static_cast<size_t>(info.size);
-                self.evalOp<kp::OpAlgoBase>(
+                self.evalOp<kp::OpAlgoCreate>(
                    tensors, sequenceName, std::vector<uint32_t>((uint32_t*)data, (uint32_t*)(data + length)), workgroup, constants);
            },
            "Evaluates an operation using a custom shader provided as spirv bytes with explicitly named Sequence",
@ -270,7 +270,7 @@ PYBIND11_MODULE(kp, m) {
        .def("eval_async_tensor_sync_local_def", &kp::Manager::evalOpAsyncDefault<kp::OpTensorSyncLocal>,
            "Evaluates asynchronously operation to sync tensor(s) from GPU memory to local memory with anonymous Sequence")
        .def("eval_async_algo_file_def", &kp::Manager::evalOpAsyncDefault<
-                                    kp::OpAlgoBase,
+                                    kp::OpAlgoCreate,
                                    const std::string&,
                                    kp::Workgroup,
                                    kp::Constants>,
@ -285,7 +285,7 @@ PYBIND11_MODULE(kp, m) {
                py::buffer_info info(py::buffer(bytes).request());
                const char *data = reinterpret_cast<const char *>(info.ptr);
                size_t length = static_cast<size_t>(info.size);
-                self.evalOpAsyncDefault<kp::OpAlgoBase>(
+                self.evalOpAsyncDefault<kp::OpAlgoCreate>(
                    tensors, std::vector<uint32_t>((uint32_t*)data, (uint32_t*)(data + length)), workgroup, constants);
            },
            "Evaluates asynchronously an operation using a custom shader provided as raw string or spirv bytes with anonymous Sequence",
@ -299,7 +299,7 @@ PYBIND11_MODULE(kp, m) {
        .def("eval_async_tensor_sync_local", &kp::Manager::evalOpAsync<kp::OpTensorSyncLocal>,
            "Evaluates asynchronously operation to sync tensor(s) from GPU memory to local memory with explicitly named Sequence")
        .def("eval_async_algo_file", &kp::Manager::evalOpAsync<
-                                    kp::OpAlgoBase,
+                                    kp::OpAlgoCreate,
                                    const std::string&,
                                    kp::Workgroup,
                                    kp::Constants>,
@ -315,7 +315,7 @@ PYBIND11_MODULE(kp, m) {
                py::buffer_info info(py::buffer(bytes).request());
                const char *data = reinterpret_cast<const char *>(info.ptr);
                size_t length = static_cast<size_t>(info.size);
-                self.evalOpAsync<kp::OpAlgoBase>(
+                self.evalOpAsync<kp::OpAlgoCreate>(
                    tensors, sequenceName, std::vector<uint32_t>((uint32_t*)data, (uint32_t*)(data + length)), workgroup, constants);
            },
            "Evaluates asynchronously an operation using a custom shader provided as raw string or spirv bytes with explicitly named Sequence",
--- a/python/test/test_kompute.py
+++ b/python/test/test_kompute.py
@ -28,6 +28,424 @@ def test_opalgobase_file():
    assert tensor_out.data() == [2.0, 4.0, 6.0]


+params = [kp.Tensor([2, 2, 2]), kp.Tensor([1, 2, 3]), kp.Tensor([0, 0, 0])]
+
+mgr = kp.Manager()
+op_ct = kp.OpTensorCreate(params)
+op_ct = mgr.rebuild(op_ct)
+mgr.eval_op(op_ct)
+
+algo = kp.Algo(params, spirv)
+op_ac = kp.OpAlgoCreate(algo)
+op_ac = mgr.rebuild(op_ac)
+mgr.eval_op(op_ac)
+
+op_ac = kp.OpAlgoCreate(kp.Algo(params, spirv))
+mgr.eval_op(kp.OpAlgoCreate(algo))
+
+
+mgr = kp.Manager()
+
+op_ct = kp.OpTensorCreate(mgr, params) # This initialises operation
+op_ct.eval()
+
+algo = kp.Algo(params, spirv)
+op_ac = kp.OpAlgoCreate(mgr, algo)
+op_ct.eval()
+
+op_tsd = kp.OpTensorSyncDevice(mgr, params)
+op_ad = kp.OpAlgoDispatch(mgr, algo)
+op_tsl = kp.OpTensorSyncLocal(mgr, params)
+
+sq = kp.Sequence(mgr, "newSeq")
+sq.record([op_tsd, op_ad, op_tsl])
+sq.eval()
+sq.destroy()
+
+# Explore consistent interface:
+op_tsd = kp.OpTensorSyncDevice(sq, params)
+op_ad = kp.OpAlgoDispatch(sq, algo)
+op_tsl = kp.OpTensorSyncLocal(sq, params)
+
+op_tsd.record()
+op_ad.record()
+op_tsl.record()
+
+sq.eval()
+
+
+
+auto params = ...;
+std::string shader = "...";
+std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
+
+// Example passing mgr
+kp::Manager mgr;
+
+kp::OpTensorCreate op_tc(mgr, params);
+op_tc.eval()
+
+kp::Algorithm algo(params, spirv);
+kp::OpAlgoCreate op_ac(mgr, algo);
+op_ac.eval()
+
+op_ac.destroy()
+op_tc.destroy()
+
+kp::OpTensorAlgoCreate op_c(mgr, params, algo);
+op_c.eval()
+
+kp::Sequence sq(mgr);
+
+kp::OpTensorSyncDevice op_tsd(mgr, params);
+kp::OpAlgoDispatch op_ad(mgr, algo);
+kp::OpTensorSyncLocal op_tsl(mgr, params);
+
+sq.record({op_tsd, op_ad, op_tsl})
+
+for(...) {
+    sq.eval();
+
+    tensorA...
+}
+
+######
+#######
+#######
+#######
+#######
+######
+// Example not passing mgr
+kp::Manager mgr;
+
+std::shared_ptr<kp::OpTensorCreate> op_tc_1{ new kp::OpTensorCreate(params) };
+auto sq_1 = mgr.eval(op_tc_1); // Initialises and stores op as part of new sequence
+mgr.eval(op_tc_1); // Fails as this op can only be "initialised" once
+mgr.destroy(op_tc_1);
+mgr.eval(op_tc_1); // This works as it's a new setup
+mgr.eval<kp::OpTensorCreate>(params); // Fails as tensors already created
+// NOT ALLOED TO DELETE JUST TENSORS ANYMORE - SEE BELOW
+mgr.destroy(params); // Sends to inconsistent state as op_tc_1 will still destroy these parameters
+mgr.destroy(op_tc_1, recursive=false); // Destroys only operation, which is useful when you need to ensure another operation owns the parameters
+auto op_tc_2 = mgr.eval<kp::OpTensorCreate>(params);
+std::shared_ptr<kp::OpTensorCreate> op_tc_2{ new kp::OpTensorCreate(params) }; // fails as tensors already created
+op_tc_2.destroy(); // Manager still holds dangling reference so requires explicit termination in manager
+mgr.destroy(op_tc_2);
+auto op_tc_3 = mgr.eval({ new kp::OpTensorCreate(params) });
+
+std::shared_ptr<kp::Algorithm> algo{ new kp::Algorithm(params, spirv, kp::Workgroup(), kp::SpecConst(), kp::PushConst()) };
+std::shared_ptr<kp::OpAlgoCreate> op_ac_1{ new kp::OpAlgoCreate(algo) };
+mgr.eval(op_ac_1); // Initialises and stores op as part of manager
+mgr.eval(op_ac_1); // Fails as this op can only be "initialised" once
+mgr.destroy(op_ac_1);
+
+std::shared_ptr<kp::OpAlgoCreate> op_ac_2 =
+    mgr.eval({ new kp::OpAlgoCreate(params, { new kp::Algorithm(spirv) }) });
+
+std::shared_ptr<kp::OpAlgoMultCreate> op_amc{ new kp::OpAlgoMultCreate(params) };
+mgr.eval(op_amc);
+
+std::shared_ptr<kp::Algorithm> algo_mult = op_amc.algorithm()
+std::vector<std::shared_ptr<kp::Tensor>> params = op_amc.tensors()
+
+auto op_tsd = std::make_shared<kp::OpTensorSyncDevice>(params);
+auto op_ad = std::make_shared<kp::OpAlgoSetPushConst>(algo);
+auto op_ad = std::make_shared<kp::OpAlgoDispatch>(algo);
+auto op_tsl = std::make_shared<kp::OpTensorSyncLocal>(mgr, params);
+
+op_params = {op_tsd, op_ad, op_tsl};
+
+mgr.record(op_params);
+mgr.eval(); // Runs recorded default sequence
+
+mgr.record(op_params, clear=false); // Non-create ops ok if rerun
+mgr.eval(); // Runs twice the recorded paams
+
+mgr.record("namedSeq", op_params);
+mgr.eval("namedSeq");
+
+kp::Manager mgrAsync(0, {0, 2});
+mgr.sequence("namedSeq2", 0); // Create named sequence with queue in index 0
+mgr.sequence("namedSeq3", 1);
+
+mgr.eval_async("namedSeq2", op_params); // Clear, record params and eval
+mgr.eval_async("namedSeq3", op_params); // Clear, record params and eval
+
+mgr.eval_await("namedSeq2");
+mgr.eval_await("namedSeq3");
+
+mgr.destroy("namedSeq"); // Destroy named sequence
+mgr.destroy({"namedSeq2", "namedSeq3"}); // Destroy multiple named sequences
+mgr.destroy("namedSeq"); // Error
+
+
+
+
+mgr = kp.Manager(0, [0, 2])
+
+// Manager does not need to manage seq anymore
+sq_1 = kp.Sequence(mgr, 0)
+
+t1 = kp.Tensor(sq_1, [0, 0, 0])
+t2 = kp.Tensor(sq_1, [0, 1, 2])
+
+algo = kp.Algorithm(sq_1)
+
+op_tc = kp.OpTensorCreate(sq_1, params)
+op_tsd = kp.OpTensorSyncDevice(sq_1, params)
+op_ac = kp.OpAlgoCreate(sq_1, algo)
+op_ad = kp.OpAlgoDispatch(sq_1, algo)
+
+sq_1.clear()
+
+op_tc.record()
+op_tsd.record()
+op_ac.record()
+op_ad.record()
+op_ad.record()
+op_ad.record()
+
+sq_1.eval()
+
+
+std::shared_ptr<kp::Manager> mgr = kp::ManagerSP(0, {0, 1});
+
+std::shared_ptr<kp::Sequence> sq_2 = kp::SequenceSP(mgr, 1)
+
+std::shared_ptr<kp::Tensor> t1 = kp::TensorSP(sq_2, {1, 2, 3});
+std::shared_ptr<kp::Tensor> t2 = kp::TensorSP(sq_2, {2, 3, 4});
+
+auto params = ...
+
+std::shared_ptr<kp::Algorithm> algo2 = kp::AlgorithmSP(sq_2, params, spirv, workgroup);
+
+// How do we deal with this?
+{
+    auto op_1 = kp::OpTensorSyncDevice(sq_2, params)
+    auto op_2 = kp::OpAlgoDispatch(sq_2, algo)
+}
+
+sq_2.eval()
+
+
+// HEAP ONLY - This would fail
+
+kp::Manager mgr = kp::Manager(0, {0, 1});
+
+kp::Sequence sq_2 = kp::Sequence(mgr, 1)
+
+kp::Tensor t1 = kp::Tensor(sq_2, {1, 2, 3});
+kp::Tensor t2 = kp::Tensor(sq_2, {2, 3, 4});
+
+auto params = ...
+
+kp::Algorithm algo2 = kp::AlgorithmSP(sq_2, params, spirv, workgroup);
+
+// How do we deal with this?
+{
+    auto op_1 = kp::OpTensorSyncDevice(sq_2, params)
+    auto op_2 = kp::OpAlgoDispatch(sq_2, algo)
+}
+
+sq_2.eval()
+
+
+
+
+
+kp::Manager mgr = kp::Manager(0, {0, 1});
+
+kp::Sequence sq_2 = kp::Sequence(mgr, 1)
+
+kp::Tensor t1 = kp::Tensor(sq_2, {1, 2, 3});
+kp::Tensor t2 = kp::Tensor(sq_2, {2, 3, 4});
+
+auto params = ...
+
+kp::Algorithm* algo2 = new kp::Algorithm(sq_2, params, spirv, workgroup);
+
+// How do we deal with this?
+{
+    auto op_1 = kp::OpTensorSyncDevice(sq_2, params)
+    auto op_2 = kp::OpAlgoDispatch(sq_2, algo)
+}
+
+sq_2.eval()
+
+
+
+
+
+
+kp::Manager mgr = kp::Manager;
+
+auto sq_2 = mgr.sequence()
+
+{
+    // What if we want to use tensor in a different sequence?
+    auto t1 = sq_2.tensor({1, 2, 3});
+    auto t2 = sq_2.tensor({1, 2, 3});
+
+    auto algo2 = sq_2.algorithm();
+
+    sq_2.record(kp::OpTensorRebuild({ t1 }))
+    sq_2.record(kp::OpAlgoRebuild(params, algo2, spirv))
+    sq_2.record(kp::OpTensorSyncDevice(prams))
+    sq_2.record(kp::OpAlgoDispatch(prams, algo2))
+}
+
+sq_2.eval()
+
+
+
+kp::Manager mgr = kp::Manager;
+
+auto t1 = mgr.tensor({1, 2, 3}); // Held as weak ptr but passed as shared
+auto t2 = mgr.tensor({1, 2, 3});
+
+auto algo2 = mgr.algorithm();
+
+{
+    auto sq_2 = mgr.sequence()
+
+    {
+        sq_2.record(kp::OpTensorRebuild({ t1 })) // record only supports move operator &&
+        sq_2.record(kp::OpAlgoRebuild(params, algo2, spirv))
+        sq_2.record(kp::OpTensorSyncDevice(prams))
+        sq_2.record(kp::OpAlgoDispatch(prams, algo2))
+    }
+
+    sq_2.eval()
+}
+
+
+
+// What about only tensors being init with it
+
+
+{
+    kp::Manager mgr = kp::Manager;
+
+    auto t0 = mgr.tensor({0, 0, 0})
+
+    {
+        auto t1 = mgr.tensor({1, 2, 3}); // Held as weak ptr but passed as shared (refc 1)
+
+        {
+            auto sq_2 = mgr.sequence()
+
+            {
+
+                auto t2 = mgr.tensor({1, 2, 3}); // Held as weak ptr but passed as shared (refc 1)
+                auto algo2 = mgr.algorithm(); // Held as weak ptr but passed as shared (refc 1)
+
+                params = {t1, t2}
+
+                sq_2.record(kp::OpTensorRebuild(params, {1, 2, 3, 4})) // Refc is now 2 for 3 for params
+                sq_2.record(kp::OpAlgoRebuild(params, algo2, spirv)) // refc is now 2 for algo2, 3 for parms
+                sq_2.record(kp::OpTensorSyncDevice(prams)) // refc for params 4
+                sq_2.record(kp::OpAlgoDispatch(prams, algo2)) // refc for params 5, 3 for algo2
+            }
+
+            sq_2.eval() // all refcs stil valid
+        } // seq destroyed so refc for algo2 and t2 drops to 0, gets destroyed, t1 has 1
+    } // t1 refc drops to 0, gets destroyed
+    // refc of t0 is still 1
+
+    mgr.gc() // Iterates through all tensor, sequence and algo weak_ptr and removes unused
+
+    // can we have something like
+    mgr.sequence()
+        .record(kp::OpTensorRebuild(params, {1, 2, 3, 4}))
+        .record(kp::OpAlgoDispatch(params, algo2))
+        .eval();
+
+}// refc is destroyed by manager manually, the rest are empty shells so ignored
+
+
+
+
+kp::Manager mgr = kp::Manager(0, {0, 1});
+
+std::shared_ptr<kp::Tensor> t1 = mgr.tensor({1, 2, 3});
+std::shared_ptr<kp::Tensor> t2 = mgr.tensor({1, 2, 3});
+
+auto params = ...
+
+std::shared_ptr<kp::Algorithm> algo2 = mgr.algorithm(params, spirv, workgroup);
+
+sq_2.record<kp::OpTensorSyncDevice>(prams)
+sq_2.record<kp::OpAlgoDispatch>(algo)
+
+
+// WHY NO MORE DETROY TENSORS:
+
+     * std::shared_ptr<kp::OpTensorCreate> op_tc1{ kp::OpTensorCreate(params) };
+     * {
+     *     std::shared_ptr<kp::OpTensorCreate> op_tc2{ kp::OpTensorCreate(params) };
+     *     mgr.eval(op_tc2);
+     *     mgr.destroy(params);
+     *
+     *     mgr.eval(op_tc1);
+     *
+     * } // op_tc1 is destroyed and all parameters are freed
+
+
+
+// NO LONGER ALLOWED:  Mainly as manager now needs to regsiter ops
+// If we still want it, then sequence wil have to hold ref to manager
+auto sq = mgr.sequence();
+
+auto op_tsd = std::make_shared<kp::OpTensorSyncDevice>(params);
+auto op_ad = std::make_shared<kp::OpAlgoDispatch>(algo);
+auto op_tsl = std::make_shared<kp::OpTensorSyncLocal>(mgr, params);
+
+sq.record({op_tsd, op_ad, op_tsl}); // Clear and record
+sq.eval();
+sq.record({op_tsd, op_ad, op_tsl}, clear=false); // record on top
+sq.eval();
+sq.clear(); // explicitly clear
+
+
+
+
+
+mgr = kp.Manager()
+
+op_ct = kp.OpTensorCreate(params)
+mgr.eval(op_ct)
+
+algo = kp.Algo(params, spirv)
+op_ac = kp.OpAlgoCreate(algo)
+mgr.eval(op_ac) # Runs init on operator function (below shows explicit steps)
+
+op_tsd = kp.OpTensorSyncDevice(params)
+op_ad = kp.OpAlgoDispatch(algo)
+op_tsl = kp.OpTensorSyncLocal(params)
+
+sq = mgr.sequence()
+sq.record([op_tsd, op_ad, op_tsl])
+sq.eval()
+sq.eval()
+sq.eval()
+
+mgr.eval(op_ac) # Would fail as algo is initialised
+mgr.destroy(op_ac) # Destroys Op and Algo owned object
+mgr.eval(op_ac) # Succeeds with new
+mgr.destroy(op_ac)
+mgr.init(op_ac)
+mgr.eval(op_ac, init=False)
+
+
+
+
+
+
+
+
+
+
+
 def test_shader_str():
    """
    Test basic OpAlgoBase operation