From 9aae5d69dbf7ebfb09d0069743667497d2779cfa Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Wed, 24 Feb 2021 08:39:09 +0000 Subject: [PATCH] Initial checkpoint with reasonable workflow --- .ccls | 1 + .../app/src/main/cpp/KomputeModelML.cpp | 2 +- examples/array_multiplication/src/Main.cpp | 2 +- .../kompute_summator/KomputeSummatorNode.cpp | 2 +- .../gdnative_shared/src/KomputeSummator.cpp | 2 +- .../kompute_model_ml/KomputeModelMLNode.cpp | 4 +- .../gdnative_shared/src/KomputeModelML.cpp | 4 +- examples/logistic_regression/src/Main.cpp | 2 +- python/src/docstrings.hpp | 40 +- python/src/main.cpp | 20 +- python/test/test_kompute.py | 418 ++++++++++++++++++ single_include/AggregateHeaders.cpp | 2 +- single_include/kompute/Kompute.hpp | 22 +- src/Algorithm.cpp | 123 ++++-- src/Manager.cpp | 84 ++-- src/OpAlgoBase.cpp | 176 -------- src/OpAlgoCreate.cpp | 51 +++ src/OpAlgoDispatch.cpp | 59 +++ src/OpAlgoLhsRhsOut.cpp | 16 +- src/OpTensorCopy.cpp | 22 +- src/OpTensorCreate.cpp | 46 ++ src/OpTensorSyncDevice.cpp | 19 +- src/OpTensorSyncLocal.cpp | 26 +- src/Sequence.cpp | 43 +- src/Tensor.cpp | 72 +-- src/include/kompute/Algorithm.hpp | 42 +- src/include/kompute/Manager.hpp | 19 +- src/include/kompute/Sequence.hpp | 19 - src/include/kompute/Tensor.hpp | 35 +- .../kompute/operations/OpAlgoCreate.hpp | 77 ++++ .../{OpAlgoBase.hpp => OpAlgoDispatch.hpp} | 63 +-- .../kompute/operations/OpAlgoLhsRhsOut.hpp | 20 +- src/include/kompute/operations/OpBase.hpp | 122 +++-- src/include/kompute/operations/OpMult.hpp | 6 +- .../kompute/operations/OpTensorCopy.hpp | 12 +- .../kompute/operations/OpTensorCreate.hpp | 71 +++ .../kompute/operations/OpTensorSyncDevice.hpp | 15 +- .../kompute/operations/OpTensorSyncLocal.hpp | 12 +- test/TestAsyncOperations.cpp | 8 +- test/TestDestroy.cpp | 26 +- test/TestLogisticRegression.cpp | 4 +- test/TestMultipleAlgoExecutions.cpp | 26 +- test/TestOpAlgoLoopsPassingData.cpp | 2 +- test/TestOpShadersFromStringAndFile.cpp | 12 +- test/TestSpecializationConstant.cpp | 2 +- test/TestWorkgroup.cpp | 2 +- 46 files changed, 1158 insertions(+), 695 deletions(-) delete mode 100644 src/OpAlgoBase.cpp create mode 100644 src/OpAlgoCreate.cpp create mode 100644 src/OpAlgoDispatch.cpp create mode 100644 src/OpTensorCreate.cpp create mode 100644 src/include/kompute/operations/OpAlgoCreate.hpp rename src/include/kompute/operations/{OpAlgoBase.hpp => OpAlgoDispatch.hpp} (64%) create mode 100644 src/include/kompute/operations/OpTensorCreate.hpp diff --git a/.ccls b/.ccls index 460b00b9d..da06a9763 100644 --- a/.ccls +++ b/.ccls @@ -19,6 +19,7 @@ -I./external/googletest/googletest/include/ -I./external/glslang/ -I./external/spdlog/include/ +-I./external/fmt/include/ -I./src/include/ -I./single_include/ -I./vk_ndk_wrapper_include/ diff --git a/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp b/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp index e39657022..f1884760a 100755 --- a/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp +++ b/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp @@ -52,7 +52,7 @@ void KomputeModelML::train(std::vector yData, std::vector xIData, sq->record({ wIn, bIn }); // Newer versions of Android are able to use shaderc to read raw string - sq->record( + sq->record( params, kp::Shader::compile_source(LR_SHADER)); sq->record({ wOutI, wOutJ, bOut, lOut }); diff --git a/examples/array_multiplication/src/Main.cpp b/examples/array_multiplication/src/Main.cpp index 0fb704a90..8ec611e15 100755 --- a/examples/array_multiplication/src/Main.cpp +++ b/examples/array_multiplication/src/Main.cpp @@ -37,7 +37,7 @@ int main() } )"); - mgr.evalOpDefault( + mgr.evalOpDefault( { tensorInA, tensorInB, tensorOut }, kp::Shader::compile_source(shader)); diff --git a/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp b/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp index c0b68595b..2e9f1bc00 100644 --- a/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp +++ b/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp @@ -59,7 +59,7 @@ void KomputeSummatorNode::_init() { { this->mSecondaryTensor }); // Then we run the operation with both tensors - sq->record( + sq->record( { this->mPrimaryTensor, this->mSecondaryTensor }, kp::Shader::compile_source(shader)); diff --git a/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp b/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp index feb674cd3..ece095c8e 100644 --- a/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp +++ b/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp @@ -56,7 +56,7 @@ void KomputeSummator::_init() { { this->mSecondaryTensor }); // Then we run the operation with both tensors - this->mSequence->record( + this->mSequence->record( { this->mPrimaryTensor, this->mSecondaryTensor }, kp::Shader::compile_source(shader)); diff --git a/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp b/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp index c304deffd..57490a8d4 100644 --- a/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp +++ b/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp @@ -62,11 +62,11 @@ void KomputeModelMLNode::train(Array yArr, Array xIArr, Array xJArr) { #ifdef KOMPUTE_ANDROID_SHADER_FROM_STRING // Newer versions of Android are able to use shaderc to read raw string - sq->record( + sq->record( params, std::vector(LR_SHADER.begin(), LR_SHADER.end())); #else // Older versions of Android require the SPIRV binary directly - sq->record( + sq->record( params, std::vector( kp::shader_data::shaders_glsl_logisticregression_comp_spv, kp::shader_data::shaders_glsl_logisticregression_comp_spv diff --git a/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp b/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp index f16c3c24b..1a01febd0 100644 --- a/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp +++ b/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp @@ -66,11 +66,11 @@ void KomputeModelML::train(Array yArr, Array xIArr, Array xJArr) { #ifdef KOMPUTE_ANDROID_SHADER_FROM_STRING // Newer versions of Android are able to use shaderc to read raw string - sq->record( + sq->record( params, std::vector(LR_SHADER.begin(), LR_SHADER.end())); #else // Older versions of Android require the SPIRV binary directly - sq->record( + sq->record( params, std::vector( kp::shader_data::shaders_glsl_logisticregression_comp_spv, kp::shader_data::shaders_glsl_logisticregression_comp_spv diff --git a/examples/logistic_regression/src/Main.cpp b/examples/logistic_regression/src/Main.cpp index e342e7a2b..769699ca7 100755 --- a/examples/logistic_regression/src/Main.cpp +++ b/examples/logistic_regression/src/Main.cpp @@ -44,7 +44,7 @@ int main() sq->record({ wIn, bIn }); - sq->record( + sq->record( params, std::vector( (uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv, (uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv diff --git a/python/src/docstrings.hpp b/python/src/docstrings.hpp index 36f481b94..2000421c3 100644 --- a/python/src/docstrings.hpp +++ b/python/src/docstrings.hpp @@ -266,23 +266,23 @@ The type of tensor to initialize @param syncDataToGPU Whether to sync the data to GPU memory @returns Initialized Tensor with memory Syncd to GPU device)doc"; -static const char *__doc_kp_OpAlgoBase = +static const char *__doc_kp_OpAlgoCreate = R"doc(Operation that provides a general abstraction that simplifies the use of algorithm and parameter components which can be used with shaders. By default it enables the user to provide a dynamic number of tensors which are then passed as inputs.)doc"; -static const char *__doc_kp_OpAlgoBase_KomputeWorkgroup = R"doc()doc"; +static const char *__doc_kp_OpAlgoCreate_KomputeWorkgroup = R"doc()doc"; -static const char *__doc_kp_OpAlgoBase_KomputeWorkgroup_x = R"doc()doc"; +static const char *__doc_kp_OpAlgoCreate_KomputeWorkgroup_x = R"doc()doc"; -static const char *__doc_kp_OpAlgoBase_KomputeWorkgroup_y = R"doc()doc"; +static const char *__doc_kp_OpAlgoCreate_KomputeWorkgroup_y = R"doc()doc"; -static const char *__doc_kp_OpAlgoBase_KomputeWorkgroup_z = R"doc()doc"; +static const char *__doc_kp_OpAlgoCreate_KomputeWorkgroup_z = R"doc()doc"; -static const char *__doc_kp_OpAlgoBase_OpAlgoBase = R"doc(Base constructor, should not be used unless explicitly intended.)doc"; +static const char *__doc_kp_OpAlgoCreate_OpAlgoCreate = R"doc(Base constructor, should not be used unless explicitly intended.)doc"; -static const char *__doc_kp_OpAlgoBase_OpAlgoBase_2 = +static const char *__doc_kp_OpAlgoCreate_OpAlgoCreate_2 = R"doc(Default constructor with parameters that provides the bare minimum requirements for the operations to be able to create and manage their sub-components. @@ -295,7 +295,7 @@ shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format) @param komputeWorkgroup Optional parameter to specify the layout for processing)doc"; -static const char *__doc_kp_OpAlgoBase_OpAlgoBase_3 = +static const char *__doc_kp_OpAlgoCreate_OpAlgoCreate_3 = R"doc(Constructor that enables a file to be passed to the operation with the contents of the shader. This can be either in raw format or in compiled SPIR-V binary format. @@ -308,7 +308,7 @@ shaderFilePath Parameter to specify the shader to load (either in spirv or raw format) @param komputeWorkgroup Optional parameter to specify the layout for processing)doc"; -static const char *__doc_kp_OpAlgoBase_OpAlgoBase_4 = +static const char *__doc_kp_OpAlgoCreate_OpAlgoCreate_4 = R"doc(Constructor that enables raw shader data to be passed to the main operation which can be either in raw shader glsl code or in compiled SPIR-V binary. @@ -321,37 +321,37 @@ shaderDataRaw Optional parameter to specify the shader data either in binary or raw form @param komputeWorkgroup Optional parameter to specify the layout for processing)doc"; -static const char *__doc_kp_OpAlgoBase_fetchSpirvBinaryData = R"doc()doc"; +static const char *__doc_kp_OpAlgoCreate_fetchSpirvBinaryData = R"doc()doc"; -static const char *__doc_kp_OpAlgoBase_init = +static const char *__doc_kp_OpAlgoCreate_init = R"doc(The init function is responsible for the initialisation of the algorithm component based on the parameters specified, and allows for extensibility on the options provided. Further dependent classes can perform more specific checks such as ensuring tensors provided are initialised, etc.)doc"; -static const char *__doc_kp_OpAlgoBase_mAlgorithm = R"doc()doc"; +static const char *__doc_kp_OpAlgoCreate_mAlgorithm = R"doc()doc"; -static const char *__doc_kp_OpAlgoBase_mFreeAlgorithm = R"doc()doc"; +static const char *__doc_kp_OpAlgoCreate_mFreeAlgorithm = R"doc()doc"; -static const char *__doc_kp_OpAlgoBase_mKomputeWorkgroup = R"doc()doc"; +static const char *__doc_kp_OpAlgoCreate_mKomputeWorkgroup = R"doc()doc"; -static const char *__doc_kp_OpAlgoBase_mShaderDataRaw = +static const char *__doc_kp_OpAlgoCreate_mShaderDataRaw = R"doc(< Optional member variable which can be provided to contain either the raw shader content or the spirv binary content)doc"; -static const char *__doc_kp_OpAlgoBase_mShaderFilePath = -R"doc(< Optional member variable which can be provided for the OpAlgoBase to +static const char *__doc_kp_OpAlgoCreate_mShaderFilePath = +R"doc(< Optional member variable which can be provided for the OpAlgoCreate to find the data automatically and load for processing)doc"; -static const char *__doc_kp_OpAlgoBase_postEval = +static const char *__doc_kp_OpAlgoCreate_postEval = R"doc(Executes after the recorded commands are submitted, and performs a copy of the GPU Device memory into the staging buffer so the output data can be retrieved.)doc"; -static const char *__doc_kp_OpAlgoBase_preEval = R"doc(Does not perform any preEval commands.)doc"; +static const char *__doc_kp_OpAlgoCreate_preEval = R"doc(Does not perform any preEval commands.)doc"; -static const char *__doc_kp_OpAlgoBase_record = +static const char *__doc_kp_OpAlgoCreate_record = R"doc(This records the commands that are to be sent to the GPU. This includes the barriers that ensure the memory has been copied before going in and out of the shader, as well as the dispatch operation that diff --git a/python/src/main.cpp b/python/src/main.cpp index ab7d64e41..6f99a193b 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -133,7 +133,7 @@ PYBIND11_MODULE(kp, m) { .def("record_tensor_sync_local", &kp::Sequence::record, "Records operation to sync tensor(s) from GPU memory to local memory") .def("record_algo_file", &kp::Sequence::record< - kp::OpAlgoBase, + kp::OpAlgoCreate, const std::string&, kp::Workgroup, kp::Constants>, @@ -148,7 +148,7 @@ PYBIND11_MODULE(kp, m) { py::buffer_info info(py::buffer(bytes).request()); const char *data = reinterpret_cast(info.ptr); size_t length = static_cast(info.size); - return self.record( + return self.record( tensors, std::vector((uint32_t*)data, (uint32_t*)(data + length)), workgroup, constants); }, "Records an operation using a custom shader provided as spirv bytes", @@ -211,7 +211,7 @@ PYBIND11_MODULE(kp, m) { .def("eval_tensor_sync_local_def", &kp::Manager::evalOpDefault, "Evaluates operation to sync tensor(s) from GPU memory to local memory with new anonymous Sequence") .def("eval_algo_file_def", &kp::Manager::evalOpDefault< - kp::OpAlgoBase, + kp::OpAlgoCreate, const std::string&, kp::Workgroup, kp::Constants>, @@ -226,7 +226,7 @@ PYBIND11_MODULE(kp, m) { py::buffer_info info(py::buffer(bytes).request()); const char *data = reinterpret_cast(info.ptr); size_t length = static_cast(info.size); - self.evalOpDefault( + self.evalOpDefault( tensors, std::vector((uint32_t*)data, (uint32_t*)(data + length)), workgroup, constants); }, "Evaluates an operation using a custom shader provided as spirv bytes with new anonymous Sequence", @@ -240,7 +240,7 @@ PYBIND11_MODULE(kp, m) { .def("eval_tensor_sync_local", &kp::Manager::evalOp, "Evaluates operation to sync tensor(s) from GPU memory to local memory with explicitly named Sequence") .def("eval_algo_file", &kp::Manager::evalOp< - kp::OpAlgoBase, + kp::OpAlgoCreate, const std::string&, kp::Workgroup, kp::Constants>, @@ -256,7 +256,7 @@ PYBIND11_MODULE(kp, m) { py::buffer_info info(py::buffer(bytes).request()); const char *data = reinterpret_cast(info.ptr); size_t length = static_cast(info.size); - self.evalOp( + self.evalOp( tensors, sequenceName, std::vector((uint32_t*)data, (uint32_t*)(data + length)), workgroup, constants); }, "Evaluates an operation using a custom shader provided as spirv bytes with explicitly named Sequence", @@ -270,7 +270,7 @@ PYBIND11_MODULE(kp, m) { .def("eval_async_tensor_sync_local_def", &kp::Manager::evalOpAsyncDefault, "Evaluates asynchronously operation to sync tensor(s) from GPU memory to local memory with anonymous Sequence") .def("eval_async_algo_file_def", &kp::Manager::evalOpAsyncDefault< - kp::OpAlgoBase, + kp::OpAlgoCreate, const std::string&, kp::Workgroup, kp::Constants>, @@ -285,7 +285,7 @@ PYBIND11_MODULE(kp, m) { py::buffer_info info(py::buffer(bytes).request()); const char *data = reinterpret_cast(info.ptr); size_t length = static_cast(info.size); - self.evalOpAsyncDefault( + self.evalOpAsyncDefault( tensors, std::vector((uint32_t*)data, (uint32_t*)(data + length)), workgroup, constants); }, "Evaluates asynchronously an operation using a custom shader provided as raw string or spirv bytes with anonymous Sequence", @@ -299,7 +299,7 @@ PYBIND11_MODULE(kp, m) { .def("eval_async_tensor_sync_local", &kp::Manager::evalOpAsync, "Evaluates asynchronously operation to sync tensor(s) from GPU memory to local memory with explicitly named Sequence") .def("eval_async_algo_file", &kp::Manager::evalOpAsync< - kp::OpAlgoBase, + kp::OpAlgoCreate, const std::string&, kp::Workgroup, kp::Constants>, @@ -315,7 +315,7 @@ PYBIND11_MODULE(kp, m) { py::buffer_info info(py::buffer(bytes).request()); const char *data = reinterpret_cast(info.ptr); size_t length = static_cast(info.size); - self.evalOpAsync( + self.evalOpAsync( tensors, sequenceName, std::vector((uint32_t*)data, (uint32_t*)(data + length)), workgroup, constants); }, "Evaluates asynchronously an operation using a custom shader provided as raw string or spirv bytes with explicitly named Sequence", diff --git a/python/test/test_kompute.py b/python/test/test_kompute.py index b998532b8..a6e4ead3a 100644 --- a/python/test/test_kompute.py +++ b/python/test/test_kompute.py @@ -28,6 +28,424 @@ def test_opalgobase_file(): assert tensor_out.data() == [2.0, 4.0, 6.0] +params = [kp.Tensor([2, 2, 2]), kp.Tensor([1, 2, 3]), kp.Tensor([0, 0, 0])] + +mgr = kp.Manager() +op_ct = kp.OpTensorCreate(params) +op_ct = mgr.rebuild(op_ct) +mgr.eval_op(op_ct) + +algo = kp.Algo(params, spirv) +op_ac = kp.OpAlgoCreate(algo) +op_ac = mgr.rebuild(op_ac) +mgr.eval_op(op_ac) + +op_ac = kp.OpAlgoCreate(kp.Algo(params, spirv)) +mgr.eval_op(kp.OpAlgoCreate(algo)) + + +mgr = kp.Manager() + +op_ct = kp.OpTensorCreate(mgr, params) # This initialises operation +op_ct.eval() + +algo = kp.Algo(params, spirv) +op_ac = kp.OpAlgoCreate(mgr, algo) +op_ct.eval() + +op_tsd = kp.OpTensorSyncDevice(mgr, params) +op_ad = kp.OpAlgoDispatch(mgr, algo) +op_tsl = kp.OpTensorSyncLocal(mgr, params) + +sq = kp.Sequence(mgr, "newSeq") +sq.record([op_tsd, op_ad, op_tsl]) +sq.eval() +sq.destroy() + +# Explore consistent interface: +op_tsd = kp.OpTensorSyncDevice(sq, params) +op_ad = kp.OpAlgoDispatch(sq, algo) +op_tsl = kp.OpTensorSyncLocal(sq, params) + +op_tsd.record() +op_ad.record() +op_tsl.record() + +sq.eval() + + + +auto params = ...; +std::string shader = "..."; +std::vector spirv = kp::Shader::compile_source(shader); + +// Example passing mgr +kp::Manager mgr; + +kp::OpTensorCreate op_tc(mgr, params); +op_tc.eval() + +kp::Algorithm algo(params, spirv); +kp::OpAlgoCreate op_ac(mgr, algo); +op_ac.eval() + +op_ac.destroy() +op_tc.destroy() + +kp::OpTensorAlgoCreate op_c(mgr, params, algo); +op_c.eval() + +kp::Sequence sq(mgr); + +kp::OpTensorSyncDevice op_tsd(mgr, params); +kp::OpAlgoDispatch op_ad(mgr, algo); +kp::OpTensorSyncLocal op_tsl(mgr, params); + +sq.record({op_tsd, op_ad, op_tsl}) + +for(...) { + sq.eval(); + + tensorA... +} + +###### +####### +####### +####### +####### +###### +// Example not passing mgr +kp::Manager mgr; + +std::shared_ptr op_tc_1{ new kp::OpTensorCreate(params) }; +auto sq_1 = mgr.eval(op_tc_1); // Initialises and stores op as part of new sequence +mgr.eval(op_tc_1); // Fails as this op can only be "initialised" once +mgr.destroy(op_tc_1); +mgr.eval(op_tc_1); // This works as it's a new setup +mgr.eval(params); // Fails as tensors already created +// NOT ALLOED TO DELETE JUST TENSORS ANYMORE - SEE BELOW +mgr.destroy(params); // Sends to inconsistent state as op_tc_1 will still destroy these parameters +mgr.destroy(op_tc_1, recursive=false); // Destroys only operation, which is useful when you need to ensure another operation owns the parameters +auto op_tc_2 = mgr.eval(params); +std::shared_ptr op_tc_2{ new kp::OpTensorCreate(params) }; // fails as tensors already created +op_tc_2.destroy(); // Manager still holds dangling reference so requires explicit termination in manager +mgr.destroy(op_tc_2); +auto op_tc_3 = mgr.eval({ new kp::OpTensorCreate(params) }); + +std::shared_ptr algo{ new kp::Algorithm(params, spirv, kp::Workgroup(), kp::SpecConst(), kp::PushConst()) }; +std::shared_ptr op_ac_1{ new kp::OpAlgoCreate(algo) }; +mgr.eval(op_ac_1); // Initialises and stores op as part of manager +mgr.eval(op_ac_1); // Fails as this op can only be "initialised" once +mgr.destroy(op_ac_1); + +std::shared_ptr op_ac_2 = + mgr.eval({ new kp::OpAlgoCreate(params, { new kp::Algorithm(spirv) }) }); + +std::shared_ptr op_amc{ new kp::OpAlgoMultCreate(params) }; +mgr.eval(op_amc); + +std::shared_ptr algo_mult = op_amc.algorithm() +std::vector> params = op_amc.tensors() + +auto op_tsd = std::make_shared(params); +auto op_ad = std::make_shared(algo); +auto op_ad = std::make_shared(algo); +auto op_tsl = std::make_shared(mgr, params); + +op_params = {op_tsd, op_ad, op_tsl}; + +mgr.record(op_params); +mgr.eval(); // Runs recorded default sequence + +mgr.record(op_params, clear=false); // Non-create ops ok if rerun +mgr.eval(); // Runs twice the recorded paams + +mgr.record("namedSeq", op_params); +mgr.eval("namedSeq"); + +kp::Manager mgrAsync(0, {0, 2}); +mgr.sequence("namedSeq2", 0); // Create named sequence with queue in index 0 +mgr.sequence("namedSeq3", 1); + +mgr.eval_async("namedSeq2", op_params); // Clear, record params and eval +mgr.eval_async("namedSeq3", op_params); // Clear, record params and eval + +mgr.eval_await("namedSeq2"); +mgr.eval_await("namedSeq3"); + +mgr.destroy("namedSeq"); // Destroy named sequence +mgr.destroy({"namedSeq2", "namedSeq3"}); // Destroy multiple named sequences +mgr.destroy("namedSeq"); // Error + + + + +mgr = kp.Manager(0, [0, 2]) + +// Manager does not need to manage seq anymore +sq_1 = kp.Sequence(mgr, 0) + +t1 = kp.Tensor(sq_1, [0, 0, 0]) +t2 = kp.Tensor(sq_1, [0, 1, 2]) + +algo = kp.Algorithm(sq_1) + +op_tc = kp.OpTensorCreate(sq_1, params) +op_tsd = kp.OpTensorSyncDevice(sq_1, params) +op_ac = kp.OpAlgoCreate(sq_1, algo) +op_ad = kp.OpAlgoDispatch(sq_1, algo) + +sq_1.clear() + +op_tc.record() +op_tsd.record() +op_ac.record() +op_ad.record() +op_ad.record() +op_ad.record() + +sq_1.eval() + + +std::shared_ptr mgr = kp::ManagerSP(0, {0, 1}); + +std::shared_ptr sq_2 = kp::SequenceSP(mgr, 1) + +std::shared_ptr t1 = kp::TensorSP(sq_2, {1, 2, 3}); +std::shared_ptr t2 = kp::TensorSP(sq_2, {2, 3, 4}); + +auto params = ... + +std::shared_ptr algo2 = kp::AlgorithmSP(sq_2, params, spirv, workgroup); + +// How do we deal with this? +{ + auto op_1 = kp::OpTensorSyncDevice(sq_2, params) + auto op_2 = kp::OpAlgoDispatch(sq_2, algo) +} + +sq_2.eval() + + +// HEAP ONLY - This would fail + +kp::Manager mgr = kp::Manager(0, {0, 1}); + +kp::Sequence sq_2 = kp::Sequence(mgr, 1) + +kp::Tensor t1 = kp::Tensor(sq_2, {1, 2, 3}); +kp::Tensor t2 = kp::Tensor(sq_2, {2, 3, 4}); + +auto params = ... + +kp::Algorithm algo2 = kp::AlgorithmSP(sq_2, params, spirv, workgroup); + +// How do we deal with this? +{ + auto op_1 = kp::OpTensorSyncDevice(sq_2, params) + auto op_2 = kp::OpAlgoDispatch(sq_2, algo) +} + +sq_2.eval() + + + + + +kp::Manager mgr = kp::Manager(0, {0, 1}); + +kp::Sequence sq_2 = kp::Sequence(mgr, 1) + +kp::Tensor t1 = kp::Tensor(sq_2, {1, 2, 3}); +kp::Tensor t2 = kp::Tensor(sq_2, {2, 3, 4}); + +auto params = ... + +kp::Algorithm* algo2 = new kp::Algorithm(sq_2, params, spirv, workgroup); + +// How do we deal with this? +{ + auto op_1 = kp::OpTensorSyncDevice(sq_2, params) + auto op_2 = kp::OpAlgoDispatch(sq_2, algo) +} + +sq_2.eval() + + + + + + +kp::Manager mgr = kp::Manager; + +auto sq_2 = mgr.sequence() + +{ + // What if we want to use tensor in a different sequence? + auto t1 = sq_2.tensor({1, 2, 3}); + auto t2 = sq_2.tensor({1, 2, 3}); + + auto algo2 = sq_2.algorithm(); + + sq_2.record(kp::OpTensorRebuild({ t1 })) + sq_2.record(kp::OpAlgoRebuild(params, algo2, spirv)) + sq_2.record(kp::OpTensorSyncDevice(prams)) + sq_2.record(kp::OpAlgoDispatch(prams, algo2)) +} + +sq_2.eval() + + + +kp::Manager mgr = kp::Manager; + +auto t1 = mgr.tensor({1, 2, 3}); // Held as weak ptr but passed as shared +auto t2 = mgr.tensor({1, 2, 3}); + +auto algo2 = mgr.algorithm(); + +{ + auto sq_2 = mgr.sequence() + + { + sq_2.record(kp::OpTensorRebuild({ t1 })) // record only supports move operator && + sq_2.record(kp::OpAlgoRebuild(params, algo2, spirv)) + sq_2.record(kp::OpTensorSyncDevice(prams)) + sq_2.record(kp::OpAlgoDispatch(prams, algo2)) + } + + sq_2.eval() +} + + + +// What about only tensors being init with it + + +{ + kp::Manager mgr = kp::Manager; + + auto t0 = mgr.tensor({0, 0, 0}) + + { + auto t1 = mgr.tensor({1, 2, 3}); // Held as weak ptr but passed as shared (refc 1) + + { + auto sq_2 = mgr.sequence() + + { + + auto t2 = mgr.tensor({1, 2, 3}); // Held as weak ptr but passed as shared (refc 1) + auto algo2 = mgr.algorithm(); // Held as weak ptr but passed as shared (refc 1) + + params = {t1, t2} + + sq_2.record(kp::OpTensorRebuild(params, {1, 2, 3, 4})) // Refc is now 2 for 3 for params + sq_2.record(kp::OpAlgoRebuild(params, algo2, spirv)) // refc is now 2 for algo2, 3 for parms + sq_2.record(kp::OpTensorSyncDevice(prams)) // refc for params 4 + sq_2.record(kp::OpAlgoDispatch(prams, algo2)) // refc for params 5, 3 for algo2 + } + + sq_2.eval() // all refcs stil valid + } // seq destroyed so refc for algo2 and t2 drops to 0, gets destroyed, t1 has 1 + } // t1 refc drops to 0, gets destroyed + // refc of t0 is still 1 + + mgr.gc() // Iterates through all tensor, sequence and algo weak_ptr and removes unused + + // can we have something like + mgr.sequence() + .record(kp::OpTensorRebuild(params, {1, 2, 3, 4})) + .record(kp::OpAlgoDispatch(params, algo2)) + .eval(); + +}// refc is destroyed by manager manually, the rest are empty shells so ignored + + + + +kp::Manager mgr = kp::Manager(0, {0, 1}); + +std::shared_ptr t1 = mgr.tensor({1, 2, 3}); +std::shared_ptr t2 = mgr.tensor({1, 2, 3}); + +auto params = ... + +std::shared_ptr algo2 = mgr.algorithm(params, spirv, workgroup); + +sq_2.record(prams) +sq_2.record(algo) + + +// WHY NO MORE DETROY TENSORS: + + * std::shared_ptr op_tc1{ kp::OpTensorCreate(params) }; + * { + * std::shared_ptr op_tc2{ kp::OpTensorCreate(params) }; + * mgr.eval(op_tc2); + * mgr.destroy(params); + * + * mgr.eval(op_tc1); + * + * } // op_tc1 is destroyed and all parameters are freed + + + +// NO LONGER ALLOWED: Mainly as manager now needs to regsiter ops +// If we still want it, then sequence wil have to hold ref to manager +auto sq = mgr.sequence(); + +auto op_tsd = std::make_shared(params); +auto op_ad = std::make_shared(algo); +auto op_tsl = std::make_shared(mgr, params); + +sq.record({op_tsd, op_ad, op_tsl}); // Clear and record +sq.eval(); +sq.record({op_tsd, op_ad, op_tsl}, clear=false); // record on top +sq.eval(); +sq.clear(); // explicitly clear + + + + + +mgr = kp.Manager() + +op_ct = kp.OpTensorCreate(params) +mgr.eval(op_ct) + +algo = kp.Algo(params, spirv) +op_ac = kp.OpAlgoCreate(algo) +mgr.eval(op_ac) # Runs init on operator function (below shows explicit steps) + +op_tsd = kp.OpTensorSyncDevice(params) +op_ad = kp.OpAlgoDispatch(algo) +op_tsl = kp.OpTensorSyncLocal(params) + +sq = mgr.sequence() +sq.record([op_tsd, op_ad, op_tsl]) +sq.eval() +sq.eval() +sq.eval() + +mgr.eval(op_ac) # Would fail as algo is initialised +mgr.destroy(op_ac) # Destroys Op and Algo owned object +mgr.eval(op_ac) # Succeeds with new +mgr.destroy(op_ac) +mgr.init(op_ac) +mgr.eval(op_ac, init=False) + + + + + + + + + + + def test_shader_str(): """ Test basic OpAlgoBase operation diff --git a/single_include/AggregateHeaders.cpp b/single_include/AggregateHeaders.cpp index 725d04f51..599607c43 100644 --- a/single_include/AggregateHeaders.cpp +++ b/single_include/AggregateHeaders.cpp @@ -6,7 +6,7 @@ #include "kompute/Manager.hpp" #include "kompute/Sequence.hpp" #include "kompute/operations/OpBase.hpp" -#include "kompute/operations/OpAlgoBase.hpp" +#include "kompute/operations/OpAlgoCreate.hpp" #include "kompute/operations/OpAlgoLhsRhsOut.hpp" #include "kompute/operations/OpMult.hpp" #include "kompute/operations/OpTensorCopy.hpp" diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index c77f72d3b..b366dbf09 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -1893,14 +1893,14 @@ namespace kp { * By default it enables the user to provide a dynamic number of tensors * which are then passed as inputs. */ -class OpAlgoBase : public OpBase +class OpAlgoCreate : public OpBase { public: /** * Base constructor, should not be used unless explicitly intended. */ - OpAlgoBase(); + OpAlgoCreate(); /** * Default constructor with parameters that provides the bare minimum @@ -1914,7 +1914,7 @@ class OpAlgoBase : public OpBase * @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format) * @param komputeWorkgroup Optional parameter to specify the layout for processing */ - OpAlgoBase(std::shared_ptr physicalDevice, + OpAlgoCreate(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, std::vector>& tensors, @@ -1933,7 +1933,7 @@ class OpAlgoBase : public OpBase * @param shaderFilePath Parameter to specify the shader to load (either in spirv or raw format) * @param komputeWorkgroup Optional parameter to specify the layout for processing */ - OpAlgoBase(std::shared_ptr physicalDevice, + OpAlgoCreate(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, std::vector>& tensors, @@ -1952,7 +1952,7 @@ class OpAlgoBase : public OpBase * @param shaderDataRaw Optional parameter to specify the shader data either in binary or raw form * @param komputeWorkgroup Optional parameter to specify the layout for processing */ - OpAlgoBase(std::shared_ptr physicalDevice, + OpAlgoCreate(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, std::vector>& tensors, @@ -1964,7 +1964,7 @@ class OpAlgoBase : public OpBase * Default destructor, which is in charge of destroying the algorithm * components but does not destroy the underlying tensors */ - virtual ~OpAlgoBase() override; + virtual ~OpAlgoCreate() override; /** * The init function is responsible for the initialisation of the algorithm @@ -2005,9 +2005,9 @@ class OpAlgoBase : public OpBase // -------------- ALWAYS OWNED RESOURCES - Workgroup mKomputeWorkgroup; + Workgroup mWorkgroup; - std::string mShaderFilePath; ///< Optional member variable which can be provided for the OpAlgoBase to find the data automatically and load for processing + std::string mShaderFilePath; ///< Optional member variable which can be provided for the OpAlgoCreate to find the data automatically and load for processing std::vector mShaderDataRaw; ///< Optional member variable which can be provided to contain either the raw shader content or the spirv binary content virtual std::vector fetchSpirvBinaryData(); @@ -2024,7 +2024,7 @@ namespace kp { * right hand and left hand side datapoints together with a single output. * The expected data passed is two input tensors and one output tensor. */ -class OpAlgoLhsRhsOut : public OpAlgoBase +class OpAlgoLhsRhsOut : public OpAlgoCreate { public: /** @@ -2102,7 +2102,7 @@ namespace kp { * Operation that performs multiplication on two tensors and outpus on third * tensor. */ -class OpMult : public OpAlgoBase +class OpMult : public OpAlgoCreate { public: /** @@ -2128,7 +2128,7 @@ class OpMult : public OpAlgoBase std::shared_ptr commandBuffer, std::vector> tensors, const Workgroup& komputeWorkgroup = {}) - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, "", komputeWorkgroup) + : OpAlgoCreate(physicalDevice, device, commandBuffer, tensors, "", komputeWorkgroup) { KP_LOG_DEBUG("Kompute OpMult constructor with params"); diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp index 3217ee99a..512e0e5aa 100644 --- a/src/Algorithm.cpp +++ b/src/Algorithm.cpp @@ -4,36 +4,62 @@ namespace kp { -Algorithm::Algorithm() -{ - KP_LOG_DEBUG("Kompute Algorithm base constructor"); -} - -Algorithm::Algorithm(std::shared_ptr device, - std::shared_ptr commandBuffer, - const Constants& specializationConstants) +Algorithm::Algorithm( + std::shared_ptr device, + const std::vector>& tensors, + const std::vector& spirv, + const Workgroup& workgroup, + const Constants& specializationConstants, + const Constants& pushConstants) { KP_LOG_DEBUG("Kompute Algorithm Constructor with device"); this->mDevice = device; - this->mCommandBuffer = commandBuffer; - this->mSpecializationConstants = specializationConstants; + this->setWorkgroup(workgroup); + this->mPushConstants = pushConstants; + this->rebuild(tensors, spirv, workgroup, specializationConstants, pushConstants); } Algorithm::~Algorithm() { KP_LOG_DEBUG("Kompute Algorithm Destructor started"); + this->freeMemoryDestroyGPUResources(); +} + +void +Algorithm::rebuild( + const std::vector>& tensors, + const std::vector& spirv, + const Workgroup& workgroup, + const Constants& specializationConstants, + const Constants& pushConstants) +{ + KP_LOG_DEBUG("Kompute Algorithm rebuild started"); + + // Descriptor pool is created first so if available then destroy all before rebuild + if (this->mFreeDescriptorPool) { + this->freeMemoryDestroyGPUResources(); + } + + this->createParameters(tensors); + this->createShaderModule(); + this->createPipeline(); +} + +void +Algorithm::freeMemoryDestroyGPUResources() { + if (!this->mDevice) { - KP_LOG_ERROR( - "Kompute Algorithm destructor reached with null Device pointer"); + KP_LOG_WARN( + "Kompute Algorithm destroy function reached with null Device pointer"); return; } if (this->mFreePipeline) { KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline"); if (!this->mPipeline) { - KP_LOG_ERROR("Kompute Algorithm Error requested to destroy " + KP_LOG_WARN("Kompute Algorithm Error requested to destroy " "pipeline but it is null"); } this->mDevice->destroy( @@ -44,7 +70,7 @@ Algorithm::~Algorithm() if (this->mFreePipelineCache) { KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline cache"); if (!this->mPipelineCache) { - KP_LOG_ERROR("Kompute Algorithm Error requested to destroy " + KP_LOG_WARN("Kompute Algorithm Error requested to destroy " "pipeline cache but it is null"); } this->mDevice->destroy( @@ -55,7 +81,7 @@ Algorithm::~Algorithm() if (this->mFreePipelineLayout) { KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline layout"); if (!this->mPipelineLayout) { - KP_LOG_ERROR("Kompute Algorithm Error requested to destroy " + KP_LOG_WARN("Kompute Algorithm Error requested to destroy " "pipeline layout but it is null"); } this->mDevice->destroy( @@ -66,7 +92,7 @@ Algorithm::~Algorithm() if (this->mFreeShaderModule) { KP_LOG_DEBUG("Kompute Algorithm Destroying shader module"); if (!this->mShaderModule) { - KP_LOG_ERROR("Kompute Algorithm Error requested to destroy shader " + KP_LOG_WARN("Kompute Algorithm Error requested to destroy shader " "module but it is null"); } this->mDevice->destroy( @@ -77,7 +103,7 @@ Algorithm::~Algorithm() if (this->mFreeDescriptorSet) { KP_LOG_DEBUG("Kompute Algorithm Freeing Descriptor Set"); if (!this->mDescriptorSet) { - KP_LOG_ERROR( + KP_LOG_WARN( "Kompute Algorithm Error requested to free descriptor set"); } this->mDevice->freeDescriptorSets( @@ -87,7 +113,7 @@ Algorithm::~Algorithm() if (this->mFreeDescriptorSetLayout) { KP_LOG_DEBUG("Kompute Algorithm Destroying Descriptor Set Layout"); if (!this->mDescriptorSetLayout) { - KP_LOG_ERROR("Kompute Algorithm Error requested to destroy " + KP_LOG_WARN("Kompute Algorithm Error requested to destroy " "descriptor set layout but it is null"); } this->mDevice->destroy( @@ -98,7 +124,7 @@ Algorithm::~Algorithm() if (this->mFreeDescriptorPool) { KP_LOG_DEBUG("Kompute Algorithm Destroying Descriptor Pool"); if (!this->mDescriptorPool) { - KP_LOG_ERROR("Kompute Algorithm Error requested to destroy " + KP_LOG_WARN("Kompute Algorithm Error requested to destroy " "descriptor pool but it is null"); } this->mDevice->destroy( @@ -108,27 +134,7 @@ Algorithm::~Algorithm() } void -Algorithm::init(const std::vector& shaderFileData, - std::vector> tensorParams) -{ - KP_LOG_DEBUG("Kompute Algorithm init started"); - - this->createParameters(tensorParams); - this->createShaderModule(shaderFileData); - - for (std::shared_ptr tensor : tensorParams) { - this->mSpecializationConstants.push_back(tensor->size()); - } - - this->createPipeline(); -} - -void -Algorithm::createDescriptorPool() -{} - -void -Algorithm::createParameters(std::vector>& tensorParams) +Algorithm::createParameters(const std::vector>& tensorParams) { KP_LOG_DEBUG("Kompute Algorithm createParameters started"); @@ -207,17 +213,17 @@ Algorithm::createParameters(std::vector>& tensorParams) } void -Algorithm::createShaderModule(const std::vector& shaderFileData) +Algorithm::createShaderModule() { KP_LOG_DEBUG("Kompute Algorithm createShaderModule started"); vk::ShaderModuleCreateInfo shaderModuleInfo( vk::ShaderModuleCreateFlags(), - sizeof(uint32_t) * shaderFileData.size(), - shaderFileData.data()); + sizeof(uint32_t) * this->mSpirv.size(), + this->mSpirv.data()); KP_LOG_DEBUG("Kompute Algorithm Creating shader module. ShaderFileSize: {}", - shaderFileData.size()); + this->mSpirv.size()); this->mFreeShaderModule = true; this->mShaderModule = std::make_shared(); this->mDevice->createShaderModule( @@ -300,21 +306,42 @@ Algorithm::createPipeline() } void -Algorithm::recordDispatch(uint32_t x, uint32_t y, uint32_t z) +Algorithm::recordDispatch(std::shared_ptr commandBuffer) { KP_LOG_DEBUG("Kompute Algorithm calling record dispatch"); - this->mCommandBuffer->bindPipeline(vk::PipelineBindPoint::eCompute, + commandBuffer->bindPipeline(vk::PipelineBindPoint::eCompute, *this->mPipeline); - this->mCommandBuffer->bindDescriptorSets(vk::PipelineBindPoint::eCompute, + commandBuffer->bindDescriptorSets(vk::PipelineBindPoint::eCompute, *this->mPipelineLayout, 0, // First set *this->mDescriptorSet, nullptr // Dispatcher ); - this->mCommandBuffer->dispatch(x, y, z); + commandBuffer->dispatch(this->mWorkgroup[0], this->mWorkgroup[1], this->mWorkgroup[2]); +} + +void +Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize) { + // The dispatch size is set up based on either explicitly provided template + // parameters or by default it would take the shape and size of the tensors + if (workgroup[0] > 0) { + // If at least the x value is provided we use mainly the parameters + // provided + this->mWorkgroup = { + workgroup[0], + workgroup[1] > 0 ? workgroup[1] : 1, + workgroup[2] > 0 ? workgroup[2] : 1 + }; + } else { + this->mWorkgroup = { minSize, 1, 1 }; + } + KP_LOG_INFO("Kompute OpAlgoCreate dispatch size X: {}, Y: {}, Z: {}", + this->mWorkgroup[0], + this->mWorkgroup[1], + this->mWorkgroup[2]); } } diff --git a/src/Manager.cpp b/src/Manager.cpp index 18b2bf289..7bd629165 100644 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -61,21 +61,30 @@ Manager::~Manager() if (this->mManagedSequences.size()) { KP_LOG_DEBUG("Kompute Manager explicitly running destructor for " "managed sequences"); - for (const std::pair>& sqPair : - this->mManagedSequences) { - sqPair.second->freeMemoryDestroyGPUResources(); + for (const std::weak_ptr& weakSq : this->mManagedSequences) { + if (std::shared_ptr sq = weakSq.lock()) { + sq->freeMemoryDestroyGPUResources(); + } } this->mManagedSequences.clear(); } + if (this->mManagedAlgorithms.size()) { + KP_LOG_DEBUG("Kompute Manager explicitly freeing algorithms"); + for (const std::weak_ptr& weakAlgorithm : this->mManagedAlgorithms) { + if (std::shared_ptr algorithm = weakAlgorithm.lock()) { + algorithm->freeMemoryDestroyGPUResources(); + } + } + this->mManagedTensors.clear(); + } + if (this->mManagedTensors.size()) { KP_LOG_DEBUG("Kompute Manager explicitly freeing tensors"); - for (const std::shared_ptr& tensor : this->mManagedTensors) { - if (!tensor->isInit()) { - KP_LOG_ERROR("Kompute Manager attempted to free managed tensor " - "but not tensor is not initialised"); + for (const std::weak_ptr& weakTensor : this->mManagedTensors) { + if (std::shared_ptr tensor = weakTensor.lock()) { + tensor->freeMemoryDestroyGPUResources(); } - tensor->freeMemoryDestroyGPUResources(); } this->mManagedTensors.clear(); } @@ -111,32 +120,21 @@ Manager::~Manager() } std::shared_ptr -Manager::sequence(std::string sequenceName, uint32_t queueIndex) +Manager::sequence(uint32_t queueIndex) { KP_LOG_DEBUG("Kompute Manager sequence() with sequenceName: {} " "and queueIndex: {}", - sequenceName, queueIndex); - std::shared_ptr sq = nullptr; + std::shared_ptr sq = + std::make_shared(this->mPhysicalDevice, + this->mDevice, + this->mComputeQueues[queueIndex], + this->mComputeQueueFamilyIndices[queueIndex]); - std::unordered_map>::iterator found = - this->mManagedSequences.find(sequenceName); + this->mManagedSequences.insert(sq); - if (found == this->mManagedSequences.end()) { - std::shared_ptr sq = - std::make_shared(this->mPhysicalDevice, - this->mDevice, - this->mComputeQueues[queueIndex], - this->mComputeQueueFamilyIndices[queueIndex]); - sq->init(); - - this->mManagedSequences.insert({ sequenceName, sq }); - - return sq; - } else { - return found->second; - } + return sq; } void @@ -334,13 +332,10 @@ Manager::tensor( Tensor::TensorTypes tensorType, bool syncDataToGPU) { - KP_LOG_DEBUG("Kompute Manager tensor triggered"); + KP_LOG_DEBUG("Kompute Manager tensor creation triggered"); - KP_LOG_DEBUG("Kompute Manager creating new tensor shared ptr"); - std::shared_ptr tensor = - std::make_shared(kp::Tensor(data, tensorType)); - - tensor->init(this->mPhysicalDevice, this->mDevice); + std::shared_ptr tensor = std::make_shared( + kp::Tensor(this->mPhysicalDevice, this->mDevice, data, tensorType)); if (syncDataToGPU) { this->evalOpDefault({ tensor }); @@ -349,6 +344,29 @@ Manager::tensor( return tensor; } +std::shared_ptr +Manager::algorithm( + const std::vector>& tensors, + const std::vector& spirv, + const Workgroup& workgroup, + const Constants& specializationConstants, + const Constants& pushConstants) { + + KP_LOG_DEBUG("Kompute Manager algorithm creation triggered"); + + std::shared_ptr algorithm = std::make_shared( + kp::Algorithm( + this->mDevice, + tensors, + spirv, + workgroup, + specializationConstants, + pushConstants)); + + this->mManagedAlgorithms.insert(algorithm); + + return algorithm; +} void Manager::rebuild(std::vector> tensors, diff --git a/src/OpAlgoBase.cpp b/src/OpAlgoBase.cpp deleted file mode 100644 index 71b61beb5..000000000 --- a/src/OpAlgoBase.cpp +++ /dev/null @@ -1,176 +0,0 @@ -#pragma once - -#include "kompute/operations/OpAlgoBase.hpp" - -namespace kp { - -OpAlgoBase::OpAlgoBase() -{ - KP_LOG_DEBUG("Kompute OpAlgoBase constructor base"); -} - -OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector>& tensors, - const Workgroup& komputeWorkgroup, - const Constants& specializationConstants) - : OpBase(physicalDevice, device, commandBuffer, tensors) -{ - KP_LOG_DEBUG("Kompute OpAlgoBase constructor with params numTensors: {}", - tensors.size()); - - // The dispatch size is set up based on either explicitly provided template - // parameters or by default it would take the shape and size of the tensors - if (komputeWorkgroup[0] > 0) { - // If at least the x value is provided we use mainly the parameters - // provided - this->mKomputeWorkgroup = { - komputeWorkgroup[0], - komputeWorkgroup[1] > 0 ? komputeWorkgroup[1] : 1, - komputeWorkgroup[2] > 0 ? komputeWorkgroup[2] : 1 - }; - } else { - this->mKomputeWorkgroup = { tensors[0]->size(), 1, 1 }; - } - KP_LOG_INFO("Kompute OpAlgoBase dispatch size X: {}, Y: {}, Z: {}", - this->mKomputeWorkgroup[0], - this->mKomputeWorkgroup[1], - this->mKomputeWorkgroup[2]); - - this->mAlgorithm = std::make_shared(device, commandBuffer, specializationConstants); -} - -OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector>& tensors, - std::string shaderFilePath, - const Workgroup& komputeWorkgroup, - const Constants& specializationConstants) - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, komputeWorkgroup, specializationConstants) -{ - KP_LOG_DEBUG( - "Kompute OpAlgoBase shaderFilePath constructo with shaderfile path: {}", - shaderFilePath); - - this->mShaderFilePath = shaderFilePath; -} - -OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector>& tensors, - const std::vector& shaderDataRaw, - const Workgroup& komputeWorkgroup, - const Constants& specializationConstants) - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, komputeWorkgroup, specializationConstants) -{ - KP_LOG_DEBUG("Kompute OpAlgoBase shaderFilePath constructo with shader raw " - "data length: {}", - shaderDataRaw.size()); - - this->mShaderDataRaw = shaderDataRaw; -} - -OpAlgoBase::~OpAlgoBase() -{ - KP_LOG_DEBUG("Kompute OpAlgoBase destructor started"); -} - -void -OpAlgoBase::init() -{ - KP_LOG_DEBUG("Kompute OpAlgoBase init called"); - - if (this->mTensors.size() < 1) { - throw std::runtime_error( - "Kompute OpAlgoBase called with less than 1 tensor"); - } - - for (std::shared_ptr tensor : this->mTensors) { - if (!tensor->isInit()) { - throw std::runtime_error( - "Kompute OpAlgoBase validation failed; all tensor parameters " - "must be initialised."); - } - } - - KP_LOG_DEBUG("Kompute OpAlgoBase fetching spirv data"); - - std::vector shaderFileData = this->fetchSpirvBinaryData(); - - KP_LOG_DEBUG("Kompute OpAlgoBase Initialising algorithm component"); - - this->mAlgorithm->init(shaderFileData, this->mTensors); -} - -void -OpAlgoBase::record() -{ - KP_LOG_DEBUG("Kompute OpAlgoBase record called"); - - // Barrier to ensure the data is finished writing to buffer memory - for (std::shared_ptr tensor : this->mTensors) { - tensor->recordBufferMemoryBarrier( - this->mCommandBuffer, - vk::AccessFlagBits::eHostWrite, - vk::AccessFlagBits::eShaderRead, - vk::PipelineStageFlagBits::eHost, - vk::PipelineStageFlagBits::eComputeShader); - } - - this->mAlgorithm->recordDispatch(this->mKomputeWorkgroup[0], - this->mKomputeWorkgroup[1], - this->mKomputeWorkgroup[2]); -} - -void -OpAlgoBase::preEval() -{ - KP_LOG_DEBUG("Kompute OpAlgoBase preEval called"); -} - -void -OpAlgoBase::postEval() -{ - KP_LOG_DEBUG("Kompute OpAlgoBase postSubmit called"); -} - -std::vector -OpAlgoBase::fetchSpirvBinaryData() -{ - KP_LOG_DEBUG("Kompute OpAlgoBase Running fetchSpirvBinaryData"); - - if (this->mShaderFilePath.size()) { - KP_LOG_DEBUG("Kompute OpAlgoBase Reading data from file path"); - - std::ifstream fileStream(this->mShaderFilePath, - std::ios::binary | std::ios::in | - std::ios::ate); - - if (!fileStream.good()) { - throw std::runtime_error("Error reading file: " + - this->mShaderFilePath); - } - - size_t shaderFileSize = fileStream.tellg(); - fileStream.seekg(0, std::ios::beg); - char* shaderDataRaw = new char[shaderFileSize]; - fileStream.read(shaderDataRaw, shaderFileSize); - fileStream.close(); - - KP_LOG_WARN("Kompute OpAlgoBase fetched {} bytes", shaderFileSize); - - return std::vector((uint32_t*)shaderDataRaw, (uint32_t*)(shaderDataRaw + shaderFileSize)); - } else if (this->mShaderDataRaw.size()) { - KP_LOG_DEBUG("Kompute OpAlgoBase Reading data from data provided"); - return this->mShaderDataRaw; - } else { - throw std::runtime_error( - "Kompute OpAlgoBase Error reached fetchSpirvBinaryData but neither " - "filepath nor data provided"); - } -} - -} diff --git a/src/OpAlgoCreate.cpp b/src/OpAlgoCreate.cpp new file mode 100644 index 000000000..008cf9bbe --- /dev/null +++ b/src/OpAlgoCreate.cpp @@ -0,0 +1,51 @@ +#pragma once + +#include "kompute/operations/OpAlgoCreate.hpp" + +namespace kp { + +OpAlgoCreate::OpAlgoCreate(std::vector> tensors, + std::shared_ptr algorithm) + : OpBase(tensors, algorithm) +{ + KP_LOG_DEBUG("Kompute OpAlgoCreate constructor"); + + this->mManagesAlgorithm = true; + this->mManagesTensors = false; +} + +OpAlgoCreate::~OpAlgoCreate() +{ + KP_LOG_DEBUG("Kompute OpAlgoCreate destructor started"); +} + +void +OpAlgoCreate::init( + std::shared_ptr physicalDevice, + std::shared_ptr device) { + + KP_LOG_DEBUG("Kompute OpAlgoCreate init started"); + + // Explicitly calling top level function to create algo + OpBase::init(physicalDevice, device); +} + +void +OpAlgoCreate::record(std::shared_ptr commandBuffer) +{ + KP_LOG_DEBUG("Kompute OpAlgoCreate record called"); +} + +void +OpAlgoCreate::preEval() +{ + KP_LOG_DEBUG("Kompute OpAlgoCreate preEval called"); +} + +void +OpAlgoCreate::postEval() +{ + KP_LOG_DEBUG("Kompute OpAlgoCreate postSubmit called"); +} + +} diff --git a/src/OpAlgoDispatch.cpp b/src/OpAlgoDispatch.cpp new file mode 100644 index 000000000..25d2ba519 --- /dev/null +++ b/src/OpAlgoDispatch.cpp @@ -0,0 +1,59 @@ +#pragma once + +#include "kompute/operations/OpAlgoDispatch.hpp" + +namespace kp { + +OpAlgoDispatch::OpAlgoDispatch(std::vector> tensors, + std::shared_ptr algorithm) + : OpBase(tensors, algorithm) +{ + KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor"); + + this->mManagesAlgorithm = false; + this->mManagesTensors = false; +} + +OpAlgoDispatch::~OpAlgoDispatch() +{ + KP_LOG_DEBUG("Kompute OpAlgoDispatch destructor started"); +} + +void +OpAlgoDispatch::init(std::shared_ptr physicalDevice, + std::shared_ptr device) +{ + KP_LOG_DEBUG("Kompute OpAlgoDispatch init called"); +} + +void +OpAlgoDispatch::record(std::shared_ptr commandBuffer) +{ + KP_LOG_DEBUG("Kompute OpAlgoDispatch record called"); + + // Barrier to ensure the data is finished writing to buffer memory + for (std::shared_ptr tensor : this->mTensors) { + tensor->recordBufferMemoryBarrier( + commandBuffer, + vk::AccessFlagBits::eHostWrite, + vk::AccessFlagBits::eShaderRead, + vk::PipelineStageFlagBits::eHost, + vk::PipelineStageFlagBits::eComputeShader); + } + + this->mAlgorithm->recordDispatch(commandBuffer); +} + +void +OpAlgoDispatch::preEval() +{ + KP_LOG_DEBUG("Kompute OpAlgoDispatch preEval called"); +} + +void +OpAlgoDispatch::postEval() +{ + KP_LOG_DEBUG("Kompute OpAlgoDispatch postSubmit called"); +} + +} diff --git a/src/OpAlgoLhsRhsOut.cpp b/src/OpAlgoLhsRhsOut.cpp index c93e5c55b..89eb15c60 100644 --- a/src/OpAlgoLhsRhsOut.cpp +++ b/src/OpAlgoLhsRhsOut.cpp @@ -10,15 +10,12 @@ OpAlgoLhsRhsOut::OpAlgoLhsRhsOut() } OpAlgoLhsRhsOut::OpAlgoLhsRhsOut( - std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector> tensors, - const Workgroup& komputeWorkgroup) + std::vector>& tensors, + std::shared_ptr algorithm) // The inheritance is initialised with the copyOutputData to false given that // this depencendant class handles the transfer of data via staging buffers in // a granular way. - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, komputeWorkgroup) + : OpAlgoCreate(tensors, algorithm) { KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor with params"); } @@ -29,7 +26,8 @@ OpAlgoLhsRhsOut::~OpAlgoLhsRhsOut() } void -OpAlgoLhsRhsOut::init() +OpAlgoLhsRhsOut::init(std::shared_ptr physicalDevice, + std::shared_ptr device) { KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut init called"); @@ -70,12 +68,10 @@ OpAlgoLhsRhsOut::init() std::vector shaderFileData = this->fetchSpirvBinaryData(); KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut Initialising algorithm component"); - - this->mAlgorithm->init(shaderFileData, this->mTensors); } void -OpAlgoLhsRhsOut::record() +OpAlgoLhsRhsOut::record(std::shared_ptr commandBuffer) { KP_LOG_DEBUG("Kompute OpAlgoLhsRhsOut record called"); diff --git a/src/OpTensorCopy.cpp b/src/OpTensorCopy.cpp index 98450dc82..8f88eeb65 100644 --- a/src/OpTensorCopy.cpp +++ b/src/OpTensorCopy.cpp @@ -3,18 +3,13 @@ namespace kp { -OpTensorCopy::OpTensorCopy() -{ - KP_LOG_DEBUG("Kompute OpTensorCopy constructor base"); -} - -OpTensorCopy::OpTensorCopy(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector> tensors) - : OpBase(physicalDevice, device, commandBuffer, tensors) +OpTensorCopy::OpTensorCopy(std::vector> tensors) + : OpBase(tensors, nullptr) { KP_LOG_DEBUG("Kompute OpTensorCopy constructor with params"); + + this->mManagesTensors = false; + this->mManagesAlgorithm = false; } OpTensorCopy::~OpTensorCopy() @@ -23,7 +18,8 @@ OpTensorCopy::~OpTensorCopy() } void -OpTensorCopy::init() +OpTensorCopy::init(std::shared_ptr physicalDevice, + std::shared_ptr device) { KP_LOG_DEBUG("Kompute OpTensorCopy init called"); @@ -46,14 +42,14 @@ OpTensorCopy::init() } void -OpTensorCopy::record() +OpTensorCopy::record(std::shared_ptr commandBuffer) { KP_LOG_DEBUG("Kompute OpTensorCopy record called"); // We iterate from the second tensor onwards and record a copy to all for (size_t i = 1; i < this->mTensors.size(); i++) { this->mTensors[i]->recordCopyFrom( - this->mCommandBuffer, this->mTensors[0], false); + commandBuffer, this->mTensors[0], false); } } diff --git a/src/OpTensorCreate.cpp b/src/OpTensorCreate.cpp new file mode 100644 index 000000000..a343f1510 --- /dev/null +++ b/src/OpTensorCreate.cpp @@ -0,0 +1,46 @@ + +#include "kompute/operations/OpTensorCreate.hpp" + +namespace kp { + +OpTensorCreate::OpTensorCreate( + std::vector>& tensors) + : OpBase(tensors, nullptr) +{ + KP_LOG_DEBUG("Compute OpTensorCreate constructor with params"); + this->mManagesTensors = true; +} + +OpTensorCreate::~OpTensorCreate() +{ + KP_LOG_DEBUG("Kompute OpTensorCreate destructor started"); +} + +void +OpTensorCreate::init(std::shared_ptr physicalDevice, + std::shared_ptr device) +{ + KP_LOG_DEBUG("Kompute OpTensorCreate init called"); + + OpBase::init(physicalDevice, device); +} + +void +OpTensorCreate::record(std::shared_ptr commandBuffer) +{ + KP_LOG_DEBUG("Kompute OpTensorCreate record called"); +} + +void +OpTensorCreate::preEval() +{ + KP_LOG_DEBUG("Kompute OpTensorCreate preEval called"); +} + +void +OpTensorCreate::postEval() +{ + KP_LOG_DEBUG("Kompute OpTensorCreate postEval called"); +} + +} diff --git a/src/OpTensorSyncDevice.cpp b/src/OpTensorSyncDevice.cpp index fdd153898..872f82365 100644 --- a/src/OpTensorSyncDevice.cpp +++ b/src/OpTensorSyncDevice.cpp @@ -1,21 +1,11 @@ -#include "kompute/Tensor.hpp" - #include "kompute/operations/OpTensorSyncDevice.hpp" namespace kp { -OpTensorSyncDevice::OpTensorSyncDevice() -{ - KP_LOG_DEBUG("Kompute OpTensorSyncDevice constructor base"); -} - OpTensorSyncDevice::OpTensorSyncDevice( - std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, std::vector> tensors) - : OpBase(physicalDevice, device, commandBuffer, tensors) + : OpBase(tensors, nullptr) { KP_LOG_DEBUG("Kompute OpTensorSyncDevice constructor with params"); } @@ -26,7 +16,8 @@ OpTensorSyncDevice::~OpTensorSyncDevice() } void -OpTensorSyncDevice::init() +OpTensorSyncDevice::init(std::shared_ptr physicalDevice, + std::shared_ptr device) { KP_LOG_DEBUG("Kompute OpTensorSyncDevice init called"); @@ -50,14 +41,14 @@ OpTensorSyncDevice::init() } void -OpTensorSyncDevice::record() +OpTensorSyncDevice::record(std::shared_ptr commandBuffer) { KP_LOG_DEBUG("Kompute OpTensorSyncDevice record called"); for (size_t i = 0; i < this->mTensors.size(); i++) { if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) { this->mTensors[i]->recordCopyFromStagingToDevice( - this->mCommandBuffer, false); + commandBuffer, false); } } } diff --git a/src/OpTensorSyncLocal.cpp b/src/OpTensorSyncLocal.cpp index ccf8e70ce..fd98b092d 100644 --- a/src/OpTensorSyncLocal.cpp +++ b/src/OpTensorSyncLocal.cpp @@ -5,19 +5,14 @@ namespace kp { -OpTensorSyncLocal::OpTensorSyncLocal() -{ - KP_LOG_DEBUG("Kompute OpTensorSyncLocal constructor base"); -} - OpTensorSyncLocal::OpTensorSyncLocal( - std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, std::vector> tensors) - : OpBase(physicalDevice, device, commandBuffer, tensors) + : OpBase(tensors, nullptr) { KP_LOG_DEBUG("Kompute OpTensorSyncLocal constructor with params"); + + this->mManagesTensors = false; + this->mManagesAlgorithm = false; } OpTensorSyncLocal::~OpTensorSyncLocal() @@ -26,7 +21,8 @@ OpTensorSyncLocal::~OpTensorSyncLocal() } void -OpTensorSyncLocal::init() +OpTensorSyncLocal::init(std::shared_ptr physicalDevice, + std::shared_ptr device) { KP_LOG_DEBUG("Kompute OpTensorSyncLocal init called"); @@ -40,24 +36,18 @@ OpTensorSyncLocal::init() throw std::runtime_error( "Kompute OpTensorSyncLocal: Tensor has not been initialized"); } - if (tensor->tensorType() == Tensor::TensorTypes::eStorage) { - KP_LOG_WARN( - "Kompute OpTensorSyncLocal tensor parameter is of type " - "TensorTypes::eStorage and hence cannot be used to receive or " - "pass data."); - } } } void -OpTensorSyncLocal::record() +OpTensorSyncLocal::record(std::shared_ptr commandBuffer) { KP_LOG_DEBUG("Kompute OpTensorSyncLocal record called"); for (size_t i = 0; i < this->mTensors.size(); i++) { if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) { this->mTensors[i]->recordCopyFromDeviceToStaging( - this->mCommandBuffer, true); + commandBuffer, true); } } } diff --git a/src/Sequence.cpp b/src/Sequence.cpp index 19fdf11e8..4f6596efb 100644 --- a/src/Sequence.cpp +++ b/src/Sequence.cpp @@ -3,12 +3,6 @@ namespace kp { -Sequence::Sequence() -{ - KP_LOG_DEBUG("Kompute Sequence base constructor"); - this->mIsInit = false; -} - Sequence::Sequence(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr computeQueue, @@ -20,28 +14,16 @@ Sequence::Sequence(std::shared_ptr physicalDevice, this->mDevice = device; this->mComputeQueue = computeQueue; this->mQueueIndex = queueIndex; - this->mIsInit = false; + + this->createCommandPool(); + this->createCommandBuffer(); } Sequence::~Sequence() { KP_LOG_DEBUG("Kompute Sequence Destructor started"); - if (!this->mIsInit) { - KP_LOG_INFO("Kompute Sequence destructor called but sequence is not " - "initialized so no need to removing GPU resources."); - return; - } else { - this->freeMemoryDestroyGPUResources(); - } -} - -void -Sequence::init() -{ - this->createCommandPool(); - this->createCommandBuffer(); - this->mIsInit = true; + this->freeMemoryDestroyGPUResources(); } bool @@ -194,28 +176,14 @@ Sequence::isRecording() return this->mRecording; } -bool -Sequence::isInit() -{ - return this->mIsInit; -} - void Sequence::freeMemoryDestroyGPUResources() { KP_LOG_DEBUG("Kompute Sequence freeMemoryDestroyGPUResources called"); - if (!this->mIsInit) { - KP_LOG_ERROR("Kompute Sequence freeMemoryDestroyGPUResources called " - "but Sequence is not initialized so there's no relevant " - "GPU resources."); - return; - } - if (!this->mDevice) { KP_LOG_ERROR("Kompute Sequence freeMemoryDestroyGPUResources called " "with null Device pointer"); - this->mIsInit = false; return; } @@ -225,7 +193,6 @@ Sequence::freeMemoryDestroyGPUResources() KP_LOG_ERROR( "Kompute Sequence freeMemoryDestroyGPUResources called with null " "CommandPool pointer"); - this->mIsInit = false; return; } this->mDevice->freeCommandBuffers( @@ -239,7 +206,6 @@ Sequence::freeMemoryDestroyGPUResources() KP_LOG_ERROR( "Kompute Sequence freeMemoryDestroyGPUResources called with null " "CommandPool pointer"); - this->mIsInit = false; return; } this->mDevice->destroy( @@ -253,7 +219,6 @@ Sequence::freeMemoryDestroyGPUResources() this->mOperations.clear(); } - this->mIsInit = false; } void diff --git a/src/Tensor.cpp b/src/Tensor.cpp index 16b7f9e80..1ae4662f8 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -3,23 +3,19 @@ namespace kp { -Tensor::Tensor() +Tensor::Tensor(std::shared_ptr physicalDevice, + std::shared_ptr device, + const std::vector& data, + const TensorTypes& tensorType) { - KP_LOG_DEBUG("Kompute Tensor base constructor"); - this->mTensorType = TensorTypes::eDevice; -} - -Tensor::Tensor(const std::vector& data, TensorTypes tensorType) -{ -#if DEBUG KP_LOG_DEBUG("Kompute Tensor constructor data length: {}, and type: {}", data.size(), tensorType); -#endif - this->mData = data; - this->mShape = { static_cast(data.size()) }; - this->mTensorType = tensorType; + this->mPhysicalDevice = physicalDevice; + this->mDevice = device; + + this->rebuild(data, tensorType); } Tensor::~Tensor() @@ -27,25 +23,25 @@ Tensor::~Tensor() KP_LOG_DEBUG("Kompute Tensor destructor started. Type: {}", this->tensorType()); - if (this->isInit()) { - this->freeMemoryDestroyGPUResources(); - } + this->freeMemoryDestroyGPUResources(); KP_LOG_DEBUG("Kompute Tensor destructor success"); } void -Tensor::init(std::shared_ptr physicalDevice, - std::shared_ptr device) +Tensor::rebuild(const std::vector& data, + TensorTypes tensorType) { - KP_LOG_DEBUG("Kompute Tensor running init with Vulkan params and num data " - "elementS: {}", - this->mData.size()); + KP_LOG_DEBUG("Kompute Tensor rebuilding with size {}", + data.size()); - this->mPhysicalDevice = physicalDevice; - this->mDevice = device; + this->mData = data; + this->mTensorType = tensorType; - this->mIsInit = true; + if (this->mPrimaryBuffer || this->mPrimaryMemory) { + KP_LOG_DEBUG("Kompute Tensor destroying existing resources before rebuild"); + this->freeMemoryDestroyGPUResources(); + } this->allocateMemoryCreateGPUResources(); } @@ -71,13 +67,7 @@ Tensor::memorySize() uint32_t Tensor::size() { - return this->mShape[0]; -} - -std::array -Tensor::shape() -{ - return this->mShape; + return static_cast(this->mData.size()); } Tensor::TensorTypes @@ -86,12 +76,6 @@ Tensor::tensorType() return this->mTensorType; } -bool -Tensor::isInit() -{ - return this->mIsInit && this->mPrimaryBuffer && this->mPrimaryMemory; -} - void Tensor::setData(const std::vector& data) { @@ -166,11 +150,6 @@ Tensor::copyBuffer(std::shared_ptr commandBuffer, bool createBarrier) { - if (!this->mIsInit) { - throw std::runtime_error( - "Kompute Tensor attempted to run copyBuffer without init"); - } - commandBuffer->copyBuffer(*bufferFrom, *bufferTo, copyRegion); if (createBarrier) { @@ -344,11 +323,6 @@ Tensor::allocateMemoryCreateGPUResources() { KP_LOG_DEBUG("Kompute Tensor creating buffer"); - if (!this->mIsInit) { - throw std::runtime_error( - "Kompute Tensor attempted to run createBuffer without init"); - } - if (!this->mPhysicalDevice) { throw std::runtime_error("Kompute Tensor phyisical device is null"); } @@ -457,9 +431,7 @@ Tensor::allocateBindMemory(std::shared_ptr buffer, void Tensor::freeMemoryDestroyGPUResources() { - KP_LOG_DEBUG("Kompute Tensor started freeMemoryDestroyGPUResources"); - - this->mIsInit = false; + KP_LOG_DEBUG("Kompute Tensor started freeMemoryDestroyGPUResources()"); if (!this->mDevice) { KP_LOG_ERROR( @@ -519,7 +491,7 @@ Tensor::freeMemoryDestroyGPUResources() } } - KP_LOG_DEBUG("Kompute Tensor successful freeMemoryDestroyGPUResources"); + KP_LOG_DEBUG("Kompute Tensor successful freeMemoryDestroyGPUResources()"); } } diff --git a/src/include/kompute/Algorithm.hpp b/src/include/kompute/Algorithm.hpp index a859b79d3..4016c5efb 100644 --- a/src/include/kompute/Algorithm.hpp +++ b/src/include/kompute/Algorithm.hpp @@ -13,11 +13,6 @@ namespace kp { class Algorithm { public: - /** - Base constructor for Algorithm. Should not be used unless explicit - intended. - */ - Algorithm(); /** * Default constructor for Algorithm @@ -26,9 +21,13 @@ public: * @param commandBuffer The vulkan command buffer to bind the pipeline and * shaders */ - Algorithm(std::shared_ptr device, - std::shared_ptr commandBuffer, - const Constants& specializationConstants = {}); + Algorithm( + std::shared_ptr device, + const std::vector>& tensors = {}, + const std::vector& spirv = {}, + const Workgroup& workgroup = {}, + const Constants& specializationConstants = {}, + const Constants& pushConstants = {}); /** * Initialiser for the shader data provided to the algorithm as well as @@ -39,8 +38,16 @@ public: * @specalizationInstalces The specialization parameters to pass to the function * processing */ - void init(const std::vector& shaderFileData, - std::vector> tensorParams); + void rebuild( + const std::vector>& tensors = {}, + const std::vector& spirv = {}, + const Workgroup& workgroup = {}, + const Constants& specializationConstants = {}, + const Constants& pushConstants = {}); + + bool isInit(); + + void freeMemoryDestroyGPUResources(); /** * Destructor for Algorithm which is responsible for freeing and desroying @@ -56,12 +63,13 @@ public: * @param y Layout Y dispatch value * @param z Layout Z dispatch value */ - void recordDispatch(uint32_t x = 1, uint32_t y = 1, uint32_t z = 1); + void recordDispatch(std::shared_ptr commandBuffer); + + void setWorkgroup(const Workgroup& workgroup, uint32_t minSize = 1); private: // -------------- NEVER OWNED RESOURCES std::shared_ptr mDevice; - std::shared_ptr mCommandBuffer; // -------------- OPTIONALLY OWNED RESOURCES std::shared_ptr mDescriptorSetLayout; @@ -80,15 +88,19 @@ private: bool mFreePipeline = false; // -------------- ALWAYS OWNED RESOURCES + std::vector mSpirv; Constants mSpecializationConstants; + Constants mPushConstants; + Workgroup mWorkgroup; + + bool mIsInit; // Create util functions - void createShaderModule(const std::vector& shaderFileData); + void createShaderModule(); void createPipeline(); // Parameters - void createParameters(std::vector>& tensorParams); - void createDescriptorPool(); + void createParameters(const std::vector>& tensorParams); }; } // End namespace kp diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index 1ca302b3c..3615d74c0 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -67,9 +67,7 @@ class Manager * @param queueIndex The queue to use from the available queues * @return Shared pointer to the manager owned sequence resource */ - std::shared_ptr sequence( - std::string sequenceName = KP_DEFAULT_SESSION, - uint32_t queueIndex = 0); + std::shared_ptr sequence(uint32_t queueIndex = 0); /** * Function that evaluates operation against named sequence. @@ -228,6 +226,13 @@ class Manager Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice, bool syncDataToGPU = true); + std::shared_ptr algorithm( + const std::vector>& tensors = {}, + const std::vector& spirv = {}, + const Workgroup& workgroup = {}, + const Constants& specializationConstants = {}, + const Constants& pushConstants = {}); + /** * Function that simplifies the common workflow of tensor initialisation. It * will take the constructor parameters for a Tensor and will will us it to @@ -312,10 +317,10 @@ class Manager bool mFreeDevice = false; // -------------- ALWAYS OWNED RESOURCES - std::set> mManagedTensors; - - std::unordered_map> - mManagedSequences; + std::set> mManagedTensors; + std::set> mManagedSequences; + std::set> mManagedAlgorithms; + //std::unique_ptr mDefaultSequence; std::vector mComputeQueueFamilyIndices; std::vector> mComputeQueues; diff --git a/src/include/kompute/Sequence.hpp b/src/include/kompute/Sequence.hpp index d96910894..eeecd0a04 100644 --- a/src/include/kompute/Sequence.hpp +++ b/src/include/kompute/Sequence.hpp @@ -12,11 +12,6 @@ namespace kp { class Sequence { public: - /** - * Base constructor for Sequence. Should not be used unless explicit - * intended. - */ - Sequence(); /** * Main constructor for sequence which requires core vulkan components to * generate all dependent resources. @@ -36,12 +31,6 @@ class Sequence */ ~Sequence(); - /** - * Initialises sequence including the creation of the command pool and the - * command buffer. - */ - void init(); - /** * Begins recording commands for commands to be submitted into the command * buffer. @@ -99,13 +88,6 @@ class Sequence */ bool isRunning(); - /** - * Returns true if the sequence has been successfully initialised. - * - * @return Boolean stating if sequence has been initialised. - */ - bool isInit(); - /** * Destroys and frees the GPU resources which include the buffer and memory * and sets the sequence as init=False. @@ -179,7 +161,6 @@ class Sequence std::vector> mOperations; // State - bool mIsInit = false; bool mRecording = false; bool mIsRunning = false; diff --git a/src/include/kompute/Tensor.hpp b/src/include/kompute/Tensor.hpp index 5d9fb07df..40adcc700 100644 --- a/src/include/kompute/Tensor.hpp +++ b/src/include/kompute/Tensor.hpp @@ -2,8 +2,6 @@ #include "kompute/Core.hpp" -#define KP_MAX_DIM_SIZE 1 - namespace kp { /** @@ -30,11 +28,6 @@ class Tensor eStorage = 2, ///< Type is Device memory (only) }; - /** - * Base constructor, should not be used unless explicitly intended. - */ - Tensor(); - /** * Default constructor with data provided which would be used to create the * respective vulkan buffer and memory. @@ -43,8 +36,10 @@ class Tensor * tensor * @param tensorType Type for the tensor which is of type TensorTypes */ - Tensor(const std::vector& data, - TensorTypes tensorType = TensorTypes::eDevice); + Tensor(std::shared_ptr physicalDevice, + std::shared_ptr device, + const std::vector& data, + const TensorTypes& tensorType = TensorTypes::eDevice); /** * Destructor which is in charge of freeing vulkan resources unless they @@ -58,8 +53,8 @@ class Tensor * would only be created for the tensors of type TensorType::eDevice as * otherwise there is no need to copy from host memory. */ - void init(std::shared_ptr physicalDevice, - std::shared_ptr device); + void rebuild(const std::vector& data, + TensorTypes tensorType = TensorTypes::eDevice); /** * Destroys and frees the GPU resources which include the buffer and memory. @@ -91,26 +86,13 @@ class Tensor * @return Unsigned integer representing the total number of elements */ uint32_t size(); - /** - * Returns the shape of the tensor, which includes the number of dimensions - * and the size per dimension. - * - * @return Array containing the sizes for each dimension. Zero means - * respective dimension is not active. - */ - std::array shape(); + /** * Retrieve the tensor type of the Tensor * * @return Tensor type of tensor */ TensorTypes tensorType(); - /** - * Returns true if the tensor initialisation function has been carried out - * successful, which would mean that the buffer and memory will have been - * provisioned. - */ - bool isInit(); /** * Sets / resets the vector data of the tensor. This function does not @@ -214,9 +196,6 @@ class Tensor TensorTypes mTensorType = TensorTypes::eDevice; - std::array mShape; - bool mIsInit = false; - void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer void createBuffer(std::shared_ptr buffer, vk::BufferUsageFlags bufferUsageFlags); diff --git a/src/include/kompute/operations/OpAlgoCreate.hpp b/src/include/kompute/operations/OpAlgoCreate.hpp new file mode 100644 index 000000000..3f5c859a2 --- /dev/null +++ b/src/include/kompute/operations/OpAlgoCreate.hpp @@ -0,0 +1,77 @@ +#pragma once + +#include + +#include "kompute/Core.hpp" + +#include "kompute/shaders/shaderopmult.hpp" + +#include "kompute/Algorithm.hpp" +#include "kompute/Tensor.hpp" + +#include "kompute/operations/OpBase.hpp" + +namespace kp { + +/** + * Operation that provides a general abstraction that simplifies the use of + * algorithm and parameter components which can be used with shaders. + * By default it enables the user to provide a dynamic number of tensors + * which are then passed as inputs. + */ +class OpAlgoCreate : public OpBase +{ + public: + + /** + * Default constructor with parameters that provides the bare minimum + * requirements for the operations to be able to create and manage their + * sub-components. + * + * @param physicalDevice Vulkan physical device used to find device queues + * @param device Vulkan logical device for passing to Algorithm + * @param commandBuffer Vulkan Command Buffer to record commands into + * @param tensors Tensors that are to be used in this operation + * @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format) + * @param komputeWorkgroup Optional parameter to specify the layout for processing + */ + OpAlgoCreate(std::vector> tensors, + std::shared_ptr algorithm); + + /** + * Default destructor, which is in charge of destroying the algorithm + * components but does not destroy the underlying tensors + */ + virtual ~OpAlgoCreate() override; + + + virtual void init( + std::shared_ptr physicalDevice, + std::shared_ptr device) override; + + /** + * This records the commands that are to be sent to the GPU. This includes + * the barriers that ensure the memory has been copied before going in and + * out of the shader, as well as the dispatch operation that sends the + * shader processing to the gpu. This function also records the GPU memory + * copy of the output data for the staging buffer so it can be read by the + * host. + */ + virtual void record(std::shared_ptr commandBuffer) override; + + + /** + * Does not perform any preEval commands. + */ + virtual void preEval() override; + + /** + * Executes after the recorded commands are submitted, and performs a copy + * of the GPU Device memory into the staging buffer so the output data can + * be retrieved. + */ + virtual void postEval() override; +}; + +} // End namespace kp + diff --git a/src/include/kompute/operations/OpAlgoBase.hpp b/src/include/kompute/operations/OpAlgoDispatch.hpp similarity index 64% rename from src/include/kompute/operations/OpAlgoBase.hpp rename to src/include/kompute/operations/OpAlgoDispatch.hpp index 49d0569b7..7763aa9b9 100644 --- a/src/include/kompute/operations/OpAlgoBase.hpp +++ b/src/include/kompute/operations/OpAlgoDispatch.hpp @@ -1,14 +1,8 @@ #pragma once -#include - #include "kompute/Core.hpp" - -#include "kompute/shaders/shaderopmult.hpp" - #include "kompute/Algorithm.hpp" #include "kompute/Tensor.hpp" - #include "kompute/operations/OpBase.hpp" namespace kp { @@ -19,15 +13,10 @@ namespace kp { * By default it enables the user to provide a dynamic number of tensors * which are then passed as inputs. */ -class OpAlgoBase : public OpBase +class OpAlgoDispatch : public OpBase { public: - /** - * Base constructor, should not be used unless explicitly intended. - */ - OpAlgoBase(); - /** * Default constructor with parameters that provides the bare minimum * requirements for the operations to be able to create and manage their @@ -40,12 +29,8 @@ class OpAlgoBase : public OpBase * @param shaderFilePath Optional parameter to specify the shader to load (either in spirv or raw format) * @param komputeWorkgroup Optional parameter to specify the layout for processing */ - OpAlgoBase(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector>& tensors, - const Workgroup& komputeWorkgroup = {}, - const Constants& specializationConstants = {}); + OpAlgoDispatch(std::vector> tensors, + std::shared_ptr algorithm); /** * Constructor that enables a file to be passed to the operation with @@ -59,13 +44,9 @@ class OpAlgoBase : public OpBase * @param shaderFilePath Parameter to specify the shader to load (either in spirv or raw format) * @param komputeWorkgroup Optional parameter to specify the layout for processing */ - OpAlgoBase(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector>& tensors, - std::string shaderFilePath, - const Workgroup& komputeWorkgroup = {}, - const Constants& specializationConstants = {}); + OpAlgoDispatch(std::vector>& tensors, + std::shared_ptr& algorithm, + std::string shaderFilePath); /** * Constructor that enables raw shader data to be passed to the main operation @@ -78,19 +59,15 @@ class OpAlgoBase : public OpBase * @param shaderDataRaw Optional parameter to specify the shader data either in binary or raw form * @param komputeWorkgroup Optional parameter to specify the layout for processing */ - OpAlgoBase(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector>& tensors, - const std::vector& shaderDataRaw, - const Workgroup& komputeWorkgroup = {}, - const Constants& specializationConstants = {}); + OpAlgoDispatch(std::vector>& tensors, + std::shared_ptr& algorithm, + const std::vector& shaderDataRaw); /** * Default destructor, which is in charge of destroying the algorithm * components but does not destroy the underlying tensors */ - virtual ~OpAlgoBase() override; + virtual ~OpAlgoDispatch() override; /** * The init function is responsible for the initialisation of the algorithm @@ -98,7 +75,8 @@ class OpAlgoBase : public OpBase * on the options provided. Further dependent classes can perform more * specific checks such as ensuring tensors provided are initialised, etc. */ - virtual void init() override; + virtual void init(std::shared_ptr physicalDevice, + std::shared_ptr device) override; /** * This records the commands that are to be sent to the GPU. This includes @@ -108,7 +86,7 @@ class OpAlgoBase : public OpBase * copy of the output data for the staging buffer so it can be read by the * host. */ - virtual void record() override; + virtual void record(std::shared_ptr commandBuffer) override; /** @@ -123,21 +101,6 @@ class OpAlgoBase : public OpBase */ virtual void postEval() override; - protected: - // -------------- NEVER OWNED RESOURCES - - // -------------- OPTIONALLY OWNED RESOURCES - std::shared_ptr mAlgorithm; - bool mFreeAlgorithm = false; - - // -------------- ALWAYS OWNED RESOURCES - - Workgroup mKomputeWorkgroup; - - std::string mShaderFilePath; ///< Optional member variable which can be provided for the OpAlgoBase to find the data automatically and load for processing - std::vector mShaderDataRaw; ///< Optional member variable which can be provided to contain either the raw shader content or the spirv binary content - - virtual std::vector fetchSpirvBinaryData(); }; } // End namespace kp diff --git a/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp b/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp index c1223e738..65cdf14a1 100644 --- a/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp +++ b/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp @@ -7,7 +7,7 @@ #include "kompute/Algorithm.hpp" #include "kompute/Tensor.hpp" -#include "kompute/operations/OpAlgoBase.hpp" +#include "kompute/operations/OpAlgoCreate.hpp" namespace kp { @@ -16,13 +16,9 @@ namespace kp { * right hand and left hand side datapoints together with a single output. * The expected data passed is two input tensors and one output tensor. */ -class OpAlgoLhsRhsOut : public OpAlgoBase +class OpAlgoLhsRhsOut : public OpAlgoCreate { public: - /** - * Base constructor, should not be used unless explicitly intended. - */ - OpAlgoLhsRhsOut(); /** * Default constructor with parameters that provides the bare minimum @@ -36,11 +32,8 @@ class OpAlgoLhsRhsOut : public OpAlgoBase * @param freeTensors Whether operation manages the memory of the Tensors * @param komputeWorkgroup Optional parameter to specify the layout for processing */ - OpAlgoLhsRhsOut(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector> tensors, - const Workgroup& komputeWorkgroup = {}); + OpAlgoLhsRhsOut(std::vector>& tensors, + std::shared_ptr algorithm); /** * Default destructor, which is in charge of destroying the algorithm @@ -54,7 +47,8 @@ class OpAlgoLhsRhsOut : public OpAlgoBase * tensors, and creates the algorithm component which processes the * computation. */ - virtual void init() override; + virtual void init(std::shared_ptr physicalDevice, + std::shared_ptr device) override; /** * This records the commands that are to be sent to the GPU. This includes @@ -64,7 +58,7 @@ class OpAlgoLhsRhsOut : public OpAlgoBase * copy of the output data for the staging buffer so it can be read by the * host. */ - virtual void record() override; + virtual void record(std::shared_ptr commandBuffer) override; /** * Executes after the recorded commands are submitted, and performs a copy diff --git a/src/include/kompute/operations/OpBase.hpp b/src/include/kompute/operations/OpBase.hpp index bc9ee59a6..f54d01390 100644 --- a/src/include/kompute/operations/OpBase.hpp +++ b/src/include/kompute/operations/OpBase.hpp @@ -3,6 +3,7 @@ #include "kompute/Core.hpp" #include "kompute/Tensor.hpp" +#include "kompute/Algorithm.hpp" namespace kp { @@ -17,10 +18,6 @@ namespace kp { class OpBase { public: - /** - * Base constructor, should not be used unless explicitly intended. - */ - OpBase() { KP_LOG_DEBUG("Compute OpBase base constructor"); } /** * Default constructor with parameters that provides the bare minimum @@ -32,17 +29,13 @@ class OpBase * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation */ - OpBase(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector>& tensors) + OpBase(std::vector>& tensors, + std::shared_ptr algorithm) { KP_LOG_DEBUG("Compute OpBase constructor with params"); - - this->mPhysicalDevice = physicalDevice; - this->mDevice = device; - this->mCommandBuffer = commandBuffer; this->mTensors = tensors; + this->mAlgorithm = algorithm; + this->mIsInit = false; } /** @@ -53,37 +46,89 @@ class OpBase virtual ~OpBase() { KP_LOG_DEBUG("Kompute OpBase destructor started"); + this->destroy(); + } - if (!this->mDevice) { - KP_LOG_WARN("Kompute OpBase destructor called with empty device"); - return; - } + virtual std::shared_ptr algorithm() { + return this->mAlgorithm; + } - if (this->mFreeTensors) { - KP_LOG_DEBUG("Kompute OpBase freeing tensors"); - for (std::shared_ptr tensor : this->mTensors) { - if (tensor && tensor->isInit()) { - tensor->freeMemoryDestroyGPUResources(); - } else { - KP_LOG_WARN("Kompute OpBase expected to free " - "tensor but has already been freed."); - } - } - } + virtual std::vector> tensors() { + return this->mTensors; + } + + virtual bool isInit() { + return this->mIsInit; } /** * The init function is responsible for setting up all the resources and * should be called after the Operation has been created. */ - virtual void init() = 0; + // TODO: Potentially remove physicalDevice in favour of memoryProperties (for tensor) + virtual void init( + std::shared_ptr physicalDevice, + std::shared_ptr device) { + + if (this->mTensors.size() < 1) { + throw std::runtime_error("Kompute OpBase init called with 0 tensors"); + } + + if (this->mManagesTensors) { + for (std::shared_ptr tensor : this->mTensors) { + if (tensor->isInit()) { + // TODO: Evaluate whether throwing runtime error or just writing error log + throw std::runtime_error( + "Kompute OpTensorCreate: Tensor has already been initialized"); + } + else { + tensor->init(physicalDevice, device); + } + } + } + + if (this->mManagesAlgorithm) { + this->mAlgorithm->init(device, this->mTensors); + } + } + + virtual void destroy() { + if (!this->mIsInit) { + KP_LOG_WARN("Kompute OpBase destroy called but not initialised"); + } + + if (this->mManagesTensors) { + for (const std::shared_ptr& tensor : this->mTensors) { + if (!tensor->isInit()) { + KP_LOG_WARN("Kompute OpBase attempted to free managed tensor " + "but tensor is not initialised"); + } else { + KP_LOG_DEBUG("Kompute OpBase freeing tensor"); + tensor->freeMemoryDestroyGPUResources(); + } + } + this->mTensors.clear(); + } + + if (this->mManagesAlgorithm) { + if (this->mAlgorithm && this->mAlgorithm->isInit()) { + KP_LOG_DEBUG("Kompute OpBase freeing tensor"); + this->mAlgorithm->freeMemoryDestroyGPUResources(); + } else { + KP_LOG_WARN("Kompute OpBase attempted to free managed algorithm" + "but algorithm is not initialised"); + } + } + + this->mIsInit = false; + } /** * The record function is intended to only send a record command or run * commands that are expected to record operations that are to be submitted * as a batch into the GPU. */ - virtual void record() = 0; + virtual void record(std::shared_ptr commandBuffer) = 0; /** * Pre eval is called before the Sequence has called eval and submitted the commands to @@ -106,19 +151,14 @@ class OpBase virtual void postEval() = 0; protected: - // -------------- NEVER OWNED RESOURCES - std::shared_ptr - mPhysicalDevice; ///< Vulkan Physical Device - std::shared_ptr mDevice; ///< Vulkan Logical Device - std::shared_ptr - mCommandBuffer; ///< Vulkan Command Buffer - // -------------- OPTIONALLY OWNED RESOURCES - std::vector> - mTensors; ///< Tensors referenced by operation that can be managed - ///< optionally by operation - bool mFreeTensors = false; ///< Explicit boolean that specifies whether the - ///< tensors are freed (if they are managed) + std::vector> mTensors; + bool mManagesTensors = false; + std::shared_ptr mAlgorithm; + bool mManagesAlgorithm = false; + + // -------------- ALWAYS OWNED RESOURCES + bool mIsInit; }; } // End namespace kp diff --git a/src/include/kompute/operations/OpMult.hpp b/src/include/kompute/operations/OpMult.hpp index a921fb2d2..485210f0a 100644 --- a/src/include/kompute/operations/OpMult.hpp +++ b/src/include/kompute/operations/OpMult.hpp @@ -11,7 +11,7 @@ #include "kompute/Algorithm.hpp" #include "kompute/Tensor.hpp" -#include "kompute/operations/OpAlgoBase.hpp" +#include "kompute/operations/OpAlgoCreate.hpp" namespace kp { @@ -19,7 +19,7 @@ namespace kp { * Operation that performs multiplication on two tensors and outpus on third * tensor. */ -class OpMult : public OpAlgoBase +class OpMult : public OpAlgoCreate { public: /** @@ -45,7 +45,7 @@ class OpMult : public OpAlgoBase std::shared_ptr commandBuffer, std::vector> tensors, const Workgroup& komputeWorkgroup = {}) - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors, "", komputeWorkgroup) + : OpAlgoCreate(physicalDevice, device, commandBuffer, tensors, "", komputeWorkgroup) { KP_LOG_DEBUG("Kompute OpMult constructor with params"); diff --git a/src/include/kompute/operations/OpTensorCopy.hpp b/src/include/kompute/operations/OpTensorCopy.hpp index 7edbaeb66..d35139e8c 100644 --- a/src/include/kompute/operations/OpTensorCopy.hpp +++ b/src/include/kompute/operations/OpTensorCopy.hpp @@ -14,8 +14,6 @@ namespace kp { class OpTensorCopy : public OpBase { public: - OpTensorCopy(); - /** * Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. * @@ -24,10 +22,7 @@ class OpTensorCopy : public OpBase * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that will be used to create in operation. */ - OpTensorCopy(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector> tensors); + OpTensorCopy(std::vector> tensors); /** * Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release. @@ -37,12 +32,13 @@ class OpTensorCopy : public OpBase /** * Performs basic checks such as ensuring there are at least two tensors provided, that they are initialised and that they are not of type TensorTypes::eStorage. */ - void init() override; + void init(std::shared_ptr physicalDevice, + std::shared_ptr device) override; /** * Records the copy commands from the first tensor into all the other tensors provided. Also optionally records a barrier. */ - void record() override; + void record(std::shared_ptr commandBuffer) override; /** * Does not perform any preEval commands. diff --git a/src/include/kompute/operations/OpTensorCreate.hpp b/src/include/kompute/operations/OpTensorCreate.hpp new file mode 100644 index 000000000..b4ac80862 --- /dev/null +++ b/src/include/kompute/operations/OpTensorCreate.hpp @@ -0,0 +1,71 @@ +#pragma once + +#include "kompute/Core.hpp" + +#include "kompute/operations/OpBase.hpp" +#include "kompute/Tensor.hpp" +#include "kompute/Algorithm.hpp" + +namespace kp { + +/** + * Base Operation which provides the high level interface that Kompute + * operations implement in order to perform a set of actions in the GPU. + * + * Operations can perform actions on tensors, and optionally can also own an + * Algorithm with respective parameters. kp::Operations with kp::Algorithms + * would inherit from kp::OpBaseAlgo. + */ +class OpTensorCreate : public OpBase +{ + public: + + /** + * Default constructor with parameters that provides the bare minimum + * requirements for the operations to be able to create and manage their + * sub-components. + * + * @param physicalDevice Vulkan physical device used to find device queues + * @param device Vulkan logical device for passing to Algorithm + * @param commandBuffer Vulkan Command Buffer to record commands into + * @param tensors Tensors that are to be used in this operation + */ + OpTensorCreate(std::vector>& tensors); + + /** + * Default destructor for OpTensorCreate class. This OpTensorCreate destructor class should + * always be called to destroy and free owned resources unless it is + * intended to destroy the resources in the parent class. + */ + virtual ~OpTensorCreate() override; + + /** + * The init function is responsible for setting up all the resources and + * should be called after the Operation has been created. + */ + virtual void init( + std::shared_ptr physicalDevice, + std::shared_ptr device) override; + + /** + * Record runs the core actions to create the tensors. For device tensors + * it records a copyCommand to move the data from the staging tensor to the + * device tensor. The mapping for staging tensors happens in the init function + * not in the record function. + */ + void record(std::shared_ptr commandBuffer) override; + + /** + * Does not perform any preEval commands. + */ + virtual void preEval() override; + + /** + * Performs a copy back into the main tensor to ensure that the data + * contained is the one that is now being stored in the GPU. + */ + virtual void postEval() override; + +}; + +} // End namespace kp diff --git a/src/include/kompute/operations/OpTensorSyncDevice.hpp b/src/include/kompute/operations/OpTensorSyncDevice.hpp index b80cc1db0..35e97a475 100644 --- a/src/include/kompute/operations/OpTensorSyncDevice.hpp +++ b/src/include/kompute/operations/OpTensorSyncDevice.hpp @@ -1,9 +1,8 @@ #pragma once #include "kompute/Core.hpp" - +#include "kompute/operations/OpBase.hpp" #include "kompute/Tensor.hpp" - #include "kompute/operations/OpBase.hpp" namespace kp { @@ -14,8 +13,6 @@ namespace kp { class OpTensorSyncDevice : public OpBase { public: - OpTensorSyncDevice(); - /** * Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensos provided cannot be of type TensorTypes::eStorage. * @@ -24,10 +21,7 @@ class OpTensorSyncDevice : public OpBase * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that will be used to create in operation. */ - OpTensorSyncDevice(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector> tensors); + OpTensorSyncDevice(std::vector> tensors); /** * Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release. @@ -37,12 +31,13 @@ class OpTensorSyncDevice : public OpBase /** * Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element. */ - void init() override; + void init(std::shared_ptr physicalDevice, + std::shared_ptr device) override; /** * For device tensors, it records the copy command for the tensor to copy the data from its staging to device memory. */ - void record() override; + void record(std::shared_ptr commandBuffer) override; /** * Does not perform any preEval commands. diff --git a/src/include/kompute/operations/OpTensorSyncLocal.hpp b/src/include/kompute/operations/OpTensorSyncLocal.hpp index dd4549b00..eebdd7084 100644 --- a/src/include/kompute/operations/OpTensorSyncLocal.hpp +++ b/src/include/kompute/operations/OpTensorSyncLocal.hpp @@ -14,8 +14,6 @@ namespace kp { class OpTensorSyncLocal : public OpBase { public: - OpTensorSyncLocal(); - /** * Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensors provided cannot be of type TensorTypes::eStorage. * @@ -24,10 +22,7 @@ class OpTensorSyncLocal : public OpBase * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that will be used to create in operation. */ - OpTensorSyncLocal(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector> tensors); + OpTensorSyncLocal(std::vector> tensors); /** * Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release. @@ -37,12 +32,13 @@ class OpTensorSyncLocal : public OpBase /** * Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element. */ - void init() override; + void init(std::shared_ptr physicalDevice, + std::shared_ptr device) override; /** * For device tensors, it records the copy command for the tensor to copy the data from its device to staging memory. */ - void record() override; + void record(std::shared_ptr commandBuffer) override; /** * Does not perform any preEval commands. diff --git a/test/TestAsyncOperations.cpp b/test/TestAsyncOperations.cpp index d5d48b9d4..42efdff9b 100644 --- a/test/TestAsyncOperations.cpp +++ b/test/TestAsyncOperations.cpp @@ -54,7 +54,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution) auto startSync = std::chrono::high_resolution_clock::now(); for (uint32_t i = 0; i < numParallel; i++) { - mgr.evalOpDefault( + mgr.evalOpDefault( { inputsSyncB[i] }, kp::Shader::compile_source(shader)); } @@ -86,7 +86,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution) auto startAsync = std::chrono::high_resolution_clock::now(); for (uint32_t i = 0; i < numParallel; i++) { - mgrAsync.evalOpAsync( + mgrAsync.evalOpAsync( { inputsAsyncB[i] }, "async" + std::to_string(i), kp::Shader::compile_source(shader)); @@ -153,10 +153,10 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution) std::vector result = kp::Shader::compile_source(shader); - mgr.evalOpAsync( + mgr.evalOpAsync( { tensorA }, "asyncOne", kp::Shader::compile_source(shader)); - mgr.evalOpAsync( + mgr.evalOpAsync( { tensorB }, "asyncTwo", kp::Shader::compile_source(shader)); mgr.evalOpAwait("asyncOne"); diff --git a/test/TestDestroy.cpp b/test/TestDestroy.cpp index 43f6ef028..940fdf722 100644 --- a/test/TestDestroy.cpp +++ b/test/TestDestroy.cpp @@ -27,7 +27,7 @@ TEST(TestDestroy, TestDestroyTensorSingle) sq = mgr.sequence(); sq->begin(); - sq->record( + sq->record( { tensorA }, kp::Shader::compile_source(shader)); sq->end(); @@ -70,7 +70,7 @@ TEST(TestDestroy, TestDestroyTensorVector) sq = mgr.sequence(); sq->begin(); - sq->record( + sq->record( { tensorA, tensorB }, kp::Shader::compile_source(shader)); sq->end(); @@ -135,7 +135,7 @@ TEST(TestDestroy, TestDestroySequenceSingle) sq = mgr.sequence(); sq->begin(); - sq->record( + sq->record( { tensorA }, kp::Shader::compile_source(shader)); sq->end(); @@ -175,14 +175,14 @@ TEST(TestDestroy, TestDestroySequenceVector) sq1 = mgr.sequence("One"); sq1->begin(); - sq1->record( + sq1->record( { tensorA }, kp::Shader::compile_source(shader)); sq1->end(); sq1->eval(); sq2 = mgr.sequence("Two"); sq2->begin(); - sq2->record( + sq2->record( { tensorA }, kp::Shader::compile_source(shader)); sq2->end(); sq2->eval(); @@ -216,11 +216,11 @@ TEST(TestDestroy, TestDestroySequenceNameSingleInsideManager) { mgr.rebuild({ tensorA }); - mgr.evalOp( + mgr.evalOp( { tensorA }, "one", kp::Shader::compile_source(shader)); - mgr.evalOp( + mgr.evalOp( { tensorA }, "two", kp::Shader::compile_source(shader)); @@ -256,7 +256,7 @@ TEST(TestDestroy, TestDestroySequenceNameSingleOutsideManager) sq1 = mgr.sequence("One"); sq1->begin(); - sq1->record( + sq1->record( { tensorA }, kp::Shader::compile_source(shader)); sq1->end(); sq1->eval(); @@ -289,11 +289,11 @@ TEST(TestDestroy, TestDestroySequenceNameVectorInsideManager) { mgr.rebuild({ tensorA }); - mgr.evalOp( + mgr.evalOp( { tensorA }, "one", kp::Shader::compile_source(shader)); - mgr.evalOp( + mgr.evalOp( { tensorA }, "two", kp::Shader::compile_source(shader)); @@ -323,11 +323,11 @@ TEST(TestDestroy, TestDestroySequenceNameVectorOutsideManager) { mgr.rebuild({ tensorA }); - mgr.evalOp( + mgr.evalOp( { tensorA }, "one", kp::Shader::compile_source(shader)); - mgr.evalOp( + mgr.evalOp( { tensorA }, "two", kp::Shader::compile_source(shader)); @@ -357,7 +357,7 @@ TEST(TestDestroy, TestDestroySequenceNameDefaultOutsideManager) { mgr.rebuild({ tensorA }); - mgr.evalOpDefault( + mgr.evalOpDefault( { tensorA }, kp::Shader::compile_source(shader)); diff --git a/test/TestLogisticRegression.cpp b/test/TestLogisticRegression.cpp index e0f0b0e62..00425ddc0 100644 --- a/test/TestLogisticRegression.cpp +++ b/test/TestLogisticRegression.cpp @@ -41,7 +41,7 @@ TEST(TestLogisticRegression, TestMainLogisticRegression) sq->record({ wIn, bIn }); - sq->record( + sq->record( params, std::vector( (uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv, @@ -120,7 +120,7 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy) // Record op algo base sq->begin(); - sq->record( + sq->record( params, std::vector( (uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv, diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp index d82091a84..a1503cc83 100644 --- a/test/TestMultipleAlgoExecutions.cpp +++ b/test/TestMultipleAlgoExecutions.cpp @@ -27,11 +27,11 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord) { sq->begin(); - sq->record( + sq->record( { tensorA }, kp::Shader::compile_source(shader)); - sq->record( + sq->record( { tensorA }, kp::Shader::compile_source(shader)); - sq->record( + sq->record( { tensorA }, kp::Shader::compile_source(shader)); sq->record({ tensorA }); @@ -72,19 +72,19 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords) // Then perform the computations sq->begin(); - sq->record({ tensorA }, + sq->record({ tensorA }, kp::Shader::compile_source(shader)); sq->end(); sq->eval(); sq->begin(); - sq->record({ tensorA }, + sq->record({ tensorA }, kp::Shader::compile_source(shader)); sq->end(); sq->eval(); sq->begin(); - sq->record({ tensorA }, + sq->record({ tensorA }, kp::Shader::compile_source(shader)); sq->end(); sq->eval(); @@ -121,7 +121,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) sq->begin(); - sq->record( + sq->record( { tensorA }, kp::Shader::compile_source(shader)); sq->end(); @@ -134,7 +134,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) sq->begin(); - sq->record( + sq->record( { tensorA }, kp::Shader::compile_source(shader)); sq->end(); @@ -147,7 +147,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) sq->begin(); - sq->record( + sq->record( { tensorA }, kp::Shader::compile_source(shader)); sq->end(); @@ -205,7 +205,7 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval) sq->begin(); - sq->record( + sq->record( { tensorA }, kp::Shader::compile_source(shader)); sq->end(); @@ -263,7 +263,7 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrOpCreate) } )"); - mgr.evalOpDefault( + mgr.evalOpDefault( { tensorInA, tensorInB, tensorOut }, kp::Shader::compile_source(shader)); @@ -306,7 +306,7 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrMgrCreate) mgr.evalOpDefault( { tensorInA, tensorInB, tensorOut }); - mgr.evalOpDefault( + mgr.evalOpDefault( { tensorInA, tensorInB, tensorOut }, kp::Shader::compile_source(shader)); @@ -339,7 +339,7 @@ TEST(TestMultipleAlgoExecutions, SequenceAlgoDestroyOutsideManagerScope) sq = mgr.sequence(); sq->begin(); - sq->record( + sq->record( { tensorA }, kp::Shader::compile_source(shader)); sq->end(); diff --git a/test/TestOpAlgoLoopsPassingData.cpp b/test/TestOpAlgoLoopsPassingData.cpp index 77a5fd8d4..83cbb6619 100644 --- a/test/TestOpAlgoLoopsPassingData.cpp +++ b/test/TestOpAlgoLoopsPassingData.cpp @@ -51,7 +51,7 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies) sq->begin(); - sq->record( + sq->record( { tensorA, tensorB }, kp::Shader::compile_source(shader)); diff --git a/test/TestOpShadersFromStringAndFile.cpp b/test/TestOpShadersFromStringAndFile.cpp index 1fd121966..09908b722 100644 --- a/test/TestOpShadersFromStringAndFile.cpp +++ b/test/TestOpShadersFromStringAndFile.cpp @@ -5,7 +5,7 @@ #include "kompute_test/shaders/shadertest_op_custom_shader.hpp" -TEST(TestOpAlgoBase, ShaderRawDataFromConstructor) +TEST(TestOpAlgoCreate, ShaderRawDataFromConstructor) { kp::Manager mgr; @@ -28,7 +28,7 @@ TEST(TestOpAlgoBase, ShaderRawDataFromConstructor) } )"); - mgr.evalOpDefault( + mgr.evalOpDefault( { tensorA, tensorB }, kp::Shader::compile_source(shader)); mgr.evalOpDefault({ tensorA, tensorB }); @@ -37,7 +37,7 @@ TEST(TestOpAlgoBase, ShaderRawDataFromConstructor) EXPECT_EQ(tensorB->data(), std::vector({ 3, 4, 5 })); } -TEST(TestOpAlgoBase, ShaderCompiledDataFromConstructor) +TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor) { kp::Manager mgr; @@ -45,7 +45,7 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromConstructor) std::shared_ptr tensorB{ new kp::Tensor({ 0, 0, 0 }) }; mgr.rebuild({ tensorA, tensorB }); - mgr.evalOpDefault( + mgr.evalOpDefault( { tensorA, tensorB }, std::vector( (uint32_t*)kp::shader_data::test_shaders_glsl_test_op_custom_shader_comp_spv, @@ -59,7 +59,7 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromConstructor) EXPECT_EQ(tensorB->data(), std::vector({ 3, 4, 5 })); } -TEST(TestOpAlgoBase, ShaderCompiledDataFromFile) +TEST(TestOpAlgoCreate, ShaderCompiledDataFromFile) { kp::Manager mgr; @@ -67,7 +67,7 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromFile) std::shared_ptr tensorB{ new kp::Tensor({ 0, 0, 0 }) }; mgr.rebuild({ tensorA, tensorB }); - mgr.evalOpDefault( + mgr.evalOpDefault( { tensorA, tensorB }, "test/shaders/glsl/test_op_custom_shader.comp.spv"); mgr.evalOpDefault({ tensorA, tensorB }); diff --git a/test/TestSpecializationConstant.cpp b/test/TestSpecializationConstant.cpp index 5a3066d07..e4075314f 100644 --- a/test/TestSpecializationConstant.cpp +++ b/test/TestSpecializationConstant.cpp @@ -33,7 +33,7 @@ TEST(TestSpecializationConstants, TestTwoConstants) auto spec = kp::Constants({5.0, 0.3}); sq->begin(); - sq->record( + sq->record( { tensorA, tensorB }, kp::Shader::compile_source(shader), kp::Workgroup(), spec); diff --git a/test/TestWorkgroup.cpp b/test/TestWorkgroup.cpp index 1116fcbe5..015874546 100644 --- a/test/TestWorkgroup.cpp +++ b/test/TestWorkgroup.cpp @@ -23,7 +23,7 @@ TEST(TestWorkgroup, TestSimpleWorkgroup) sq = mgr.sequence(); sq->begin(); - sq->record( + sq->record( { tensorA, tensorB }, std::vector( (uint32_t*)kp::shader_data::test_shaders_glsl_test_workgroup_comp_spv,