diff --git a/.ccls b/.ccls index f215ea9d1..ab6261583 100644 --- a/.ccls +++ b/.ccls @@ -13,6 +13,7 @@ -DDEBUG=1 -DKOMPUTE_INCLUDE_FOR_SYNTAX +-I/usr/include/python3.6/ -I./python/pybind11/include/ -I./external/Vulkan-Headers/include/ -I./external/googletest/googletest/include/ diff --git a/Makefile b/Makefile index 2f0b5e2f5..da1df2cbd 100644 --- a/Makefile +++ b/Makefile @@ -156,6 +156,11 @@ vs_run_tests: vs_build_tests ./build/test/$(VS_BUILD_TYPE)/test_kompute.exe --gtest_filter=$(FILTER_TESTS) +#### PYTHONG #### + +test_python: + python -m pytest -s --log-cli-level=DEBUG -v python/test/ + ####### Run CI Commands ####### # This command uses act to replicate github action diff --git a/README.md b/README.md index 4b199fa6d..3b96a4c6e 100644 --- a/README.md +++ b/README.md @@ -54,9 +54,9 @@ int main() { kp::Manager mgr; // 2. Create and initialise Kompute Tensors through manager - auto tensorInA = mgr.buildTensor({ 2., 2., 2. }); - auto tensorInB = mgr.buildTensor({ 1., 2., 3. }); - auto tensorOut = mgr.buildTensor({ 0., 0., 0. }); + auto tensorInA = mgr.tensor({ 2., 2., 2. }); + auto tensorInB = mgr.tensor({ 1., 2., 3. }); + auto tensorOut = mgr.tensor({ 0., 0., 0. }); // 3. Specify "multiply shader" code (can also be raw string, spir-v bytes or file path) std::string shaderString = (R"( diff --git a/docs/overview/advanced-examples.rst b/docs/overview/advanced-examples.rst index 5823c6df1..665c4f0cf 100644 --- a/docs/overview/advanced-examples.rst +++ b/docs/overview/advanced-examples.rst @@ -97,7 +97,7 @@ Record commands in a single submit by using a Sequence to send in batch to GPU. mgr.evalOpDefault({tensorLHS, tensorRHS, tensorOutput}); // Create a new sequence - std::weak_ptr sqWeakPtr = mgr.getOrCreateManagedSequence(); + std::weak_ptr sqWeakPtr = mgr.sequence(); if (std::shared_ptr sq = sqWeakPtr.lock()) { @@ -226,8 +226,8 @@ Back to `examples list <#simple-examples>`_. // We need to create explicit sequences with their respective queues // The second parameter is the index in the familyIndex array which is relative // to the vector we created the manager with. - mgr.createManagedSequence("queueOne", 0); - mgr.createManagedSequence("queueTwo", 1); + mgr.sequence("queueOne", 0); + mgr.sequence("queueTwo", 1); // Creates tensor an initializes GPU memory (below we show more granularity) auto tensorA = std::make_shared(kp::Tensor(std::vector(10, 0.0))); @@ -422,7 +422,7 @@ Now that we have the inputs and outputs we will be able to use them in the proce kp::Manager mgr; if (std::shared_ptr sq = - mgr.getOrCreateManagedSequence("createTensors").lock()) + mgr.sequence("createTensors").lock()) { // ... diff --git a/docs/overview/async-parallel.rst b/docs/overview/async-parallel.rst index 88df7ac6f..8c983bc7d 100644 --- a/docs/overview/async-parallel.rst +++ b/docs/overview/async-parallel.rst @@ -208,8 +208,8 @@ It's worth mentioning you can have multiple sequences referencing the same queue // We need to create explicit sequences with their respective queues // The second parameter is the index in the familyIndex array which is relative // to the vector we created the manager with. - mgr.createManagedSequence("queueOne", 0); - mgr.createManagedSequence("queueTwo", 1); + mgr.sequence("queueOne", 0); + mgr.sequence("queueTwo", 1); We create the tensors without modifications. diff --git a/docs/overview/reference.rst b/docs/overview/reference.rst index 65721fb8d..8b6160ebb 100644 --- a/docs/overview/reference.rst +++ b/docs/overview/reference.rst @@ -86,16 +86,6 @@ The kp::OpMult operation is a sample implementation of the kp::OpAlgoBase class. .. doxygenclass:: kp::OpMult :members: -OpTensorCreate -------- - -The kp::OpTensorCreate is a tensor only operations which initialises a kp::Tensor by creating the respective vk::Buffer and vk::Memory, as well as transferring the local data into the GPU. - -.. image:: ../images/kompute-vulkan-architecture-opcreatetensor.jpg - :width: 100% - -.. doxygenclass:: kp::OpTensorCreate - :members: OpTensorCopy ------- diff --git a/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp b/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp index e22f2aa00..0337fa63e 100755 --- a/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp +++ b/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp @@ -42,16 +42,9 @@ void KomputeModelML::train(std::vector yData, std::vector xIData, kp::Manager mgr; { + mgr.rebuild(params); - std::shared_ptr sqTensor = - mgr.createManagedSequence(); - - sqTensor->begin(); - sqTensor->record(params); - sqTensor->end(); - sqTensor->eval(); - - std::shared_ptr sq = mgr.createManagedSequence(); + std::shared_ptr sq = mgr.sequence(); // Record op algo base sq->begin(); diff --git a/examples/array_multiplication/src/Main.cpp b/examples/array_multiplication/src/Main.cpp index 14b58cba9..161bb8bd4 100755 --- a/examples/array_multiplication/src/Main.cpp +++ b/examples/array_multiplication/src/Main.cpp @@ -14,9 +14,9 @@ int main() kp::Manager mgr; - auto tensorInA = mgr.buildTensor({ 2.0, 4.0, 6.0 }); - auto tensorInB = mgr.buildTensor({ 0.0, 1.0, 2.0 }); - auto tensorOut = mgr.buildTensor({ 0.0, 0.0, 0.0 }); + auto tensorInA = mgr.tensor({ 2.0, 4.0, 6.0 }); + auto tensorInB = mgr.tensor({ 0.0, 1.0, 2.0 }); + auto tensorOut = mgr.tensor({ 0.0, 0.0, 0.0 }); #ifdef KOMPUTE_ANDROID_SHADER_FROM_STRING std::string shader(R"( diff --git a/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp b/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp index 3f17f0888..304416a0f 100644 --- a/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp +++ b/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp @@ -12,7 +12,7 @@ void KomputeSummatorNode::add(float value) { // Set the new data in the local device this->mSecondaryTensor->setData({value}); // Execute recorded sequence - if (std::shared_ptr sq = this->mSequence.lock()) { + if (std::shared_ptr sq = this->mSequence) { sq->eval(); } else { @@ -29,12 +29,12 @@ float KomputeSummatorNode::get_total() const { void KomputeSummatorNode::_init() { std::cout << "CALLING INIT" << std::endl; - this->mPrimaryTensor = this->mManager.buildTensor({ 0.0 }); - this->mSecondaryTensor = this->mManager.buildTensor({ 0.0 }); - this->mSequence = this->mManager.getOrCreateManagedSequence("AdditionSeq"); + this->mPrimaryTensor = this->mManager.tensor({ 0.0 }); + this->mSecondaryTensor = this->mManager.tensor({ 0.0 }); + this->mSequence = this->mManager.sequence("AdditionSeq"); // We now record the steps in the sequence - if (std::shared_ptr sq = this->mSequence.lock()) + if (std::shared_ptr sq = this->mSequence) { std::string shader(R"( @@ -59,7 +59,7 @@ void KomputeSummatorNode::_init() { { this->mSecondaryTensor }); // Then we run the operation with both tensors - sq->record>( + sq->record( { this->mPrimaryTensor, this->mSecondaryTensor }, std::vector(shader.begin(), shader.end())); diff --git a/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp b/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp index 788486e82..26a38181e 100644 --- a/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp +++ b/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp @@ -28,9 +28,9 @@ float KomputeSummator::get_total() const { void KomputeSummator::_init() { std::cout << "CALLING INIT" << std::endl; - this->mPrimaryTensor = this->mManager.buildTensor({ 0.0 }); - this->mSecondaryTensor = this->mManager.buildTensor({ 0.0 }); - this->mSequence = this->mManager.getOrCreateManagedSequence("AdditionSeq"); + this->mPrimaryTensor = this->mManager.tensor({ 0.0 }); + this->mSecondaryTensor = this->mManager.tensor({ 0.0 }); + this->mSequence = this->mManager.sequence("AdditionSeq"); // We now record the steps in the sequence { diff --git a/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp b/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp index f583d910f..010a3164d 100644 --- a/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp +++ b/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp @@ -50,15 +50,10 @@ void KomputeModelMLNode::train(Array yArr, Array xIArr, Array xJArr) { { kp::Manager mgr; - std::shared_ptr sqTensor = - mgr.createManagedSequence(); + mgr.rebuild(params); - sqTensor->begin(); - sqTensor->record(params); - sqTensor->end(); - sqTensor->eval(); - - std::shared_ptr sq = mgr.createManagedSequence(); + { + std::shared_ptr sq = mgr.sequence(); // Record op algo base sq->begin(); diff --git a/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp b/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp index 4135e83ed..e9a9c51b5 100644 --- a/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp +++ b/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp @@ -55,15 +55,9 @@ void KomputeModelML::train(Array yArr, Array xIArr, Array xJArr) { kp::Manager mgr; { - std::shared_ptr sqTensor = - mgr.createManagedSequence(); + mgr.rebuild(params); - sqTensor->begin(); - sqTensor->record(params); - sqTensor->end(); - sqTensor->eval(); - - std::shared_ptr sq = mgr.createManagedSequence(); + std::shared_ptr sq = mgr.sequence(); // Record op algo base sq->begin(); diff --git a/examples/logistic_regression/src/Main.cpp b/examples/logistic_regression/src/Main.cpp index d3b8b3557..14664a563 100755 --- a/examples/logistic_regression/src/Main.cpp +++ b/examples/logistic_regression/src/Main.cpp @@ -35,15 +35,9 @@ int main() kp::Manager mgr; - std::shared_ptr sqTensor = - mgr.createManagedSequence(); + mgr.rebuild(params); - sqTensor->begin(); - sqTensor->record(params); - sqTensor->end(); - sqTensor->eval(); - - std::shared_ptr sq = mgr.createManagedSequence(); + std::shared_ptr sq = mgr.sequence(); // Record op algo base sq->begin(); diff --git a/python/src/docstrings.hpp b/python/src/docstrings.hpp index 6b3a1dc78..79b864f8b 100644 --- a/python/src/docstrings.hpp +++ b/python/src/docstrings.hpp @@ -119,7 +119,7 @@ integrate with the vulkan kompute use. @param device Vulkan logical device to use for all base resources @param physicalDeviceIndex Index for vulkan physical device used)doc"; -static const char *__doc_kp_Manager_buildTensor = +static const char *__doc_kp_Manager_tensor = R"doc(Function that simplifies the common workflow of tensor creation and initialization. It will take the constructor parameters for a Tensor and will will us it to create a new Tensor and then create it using @@ -133,15 +133,6 @@ static const char *__doc_kp_Manager_createDevice = R"doc()doc"; static const char *__doc_kp_Manager_createInstance = R"doc()doc"; -static const char *__doc_kp_Manager_createManagedSequence = -R"doc(Create a new managed Kompute sequence so it's available within the -manager. - -@param sequenceName The name for the named sequence to be created, if -empty then default indexed value is used @param queueIndex The queue -to use from the available queues @return Weak pointer to the manager -owned sequence resource)doc"; - static const char *__doc_kp_Manager_evalOp = R"doc(Function that evaluates operation against named sequence. @@ -187,7 +178,7 @@ R"doc(Function that evaluates operation against a newly created sequence. TArgs Template parameters that will be used to initialise Operation to allow for extensible configurations on initialisation)doc"; -static const char *__doc_kp_Manager_getOrCreateManagedSequence = +static const char *__doc_kp_Manager_sequence = R"doc(Get or create a managed Sequence that will be contained by this manager. If the named sequence does not currently exist, it would be created and initialised. diff --git a/python/src/main.cpp b/python/src/main.cpp index 6e795fad5..889084c7e 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -105,8 +105,6 @@ PYBIND11_MODULE(kp, m) { .def("is_init", &kp::Sequence::isInit, "Checks if the Sequence has been initialized") // record - .def("record_tensor_create", &kp::Sequence::record, - "Records operation to create and initialise tensor GPU memory and buffer") .def("record_tensor_copy", &kp::Sequence::record, "Records operation to copy one tensor to one or many tensors") .def("record_tensor_sync_device", &kp::Sequence::record, @@ -157,11 +155,16 @@ PYBIND11_MODULE(kp, m) { [](uint32_t physicalDeviceIndex, const std::vector& familyQueueIndices) { return std::unique_ptr(new kp::Manager(physicalDeviceIndex, familyQueueIndices)); }), "Manager initialiser can provide specified device and array of GPU queueFamilies to load.") - .def("get_create_sequence", &kp::Manager::getOrCreateManagedSequence, "Get a Sequence or create a new one with given name") - .def("create_sequence", &kp::Manager::createManagedSequence, - py::arg("name") = "", py::arg("queueIndex") = 0, "Create a sequence with specific name and specified index of available queues") - .def("build_tensor", &kp::Manager::buildTensor, - py::arg("data"), py::arg("tensorType") = kp::Tensor::TensorTypes::eDevice, + .def("sequence", &kp::Manager::sequence, + py::arg("name") = "", py::arg("queueIndex") = 0, "Get or create a sequence with specific name and specified index of available queues") + .def("tensor", &kp::Manager::tensor, + py::arg("data"), py::arg("tensorType") = kp::Tensor::TensorTypes::eDevice, py::arg("syncDataToGPU") = true, + "Build and initialise tensor") + .def("rebuild", py::overload_cast>, bool>(&kp::Manager::rebuild), + py::arg("tensors"), py::arg("syncDataToGPU") = true, + "Build and initialise list of tensors") + .def("rebuild", py::overload_cast, bool>(&kp::Manager::rebuild), + py::arg("tensor"), py::arg("syncDataToGPU") = true, "Build and initialise tensor") // Await functions @@ -172,8 +175,6 @@ PYBIND11_MODULE(kp, m) { py::arg("waitFor") = UINT64_MAX, "Awaits for asynchronous operation on the last anonymous Sequence created") // eval default - .def("eval_tensor_create_def", &kp::Manager::evalOpDefault, - "Evaluates operation to create and initialise tensor GPU memory and buffer with new anonymous Sequence") .def("eval_tensor_copy_def", &kp::Manager::evalOpDefault, "Evaluates operation to copy one tensor to one or many tensors with new anonymous Sequence") .def("eval_tensor_sync_device_def", &kp::Manager::evalOpDefault, @@ -209,8 +210,6 @@ PYBIND11_MODULE(kp, m) { "Evaluates operation to run left right out operation with custom shader with new anonymous Sequence") // eval - .def("eval_tensor_create", &kp::Manager::evalOp, - "Evaluates operation to create and initialise tensor GPU memory and buffer with explicitly named Sequence") .def("eval_tensor_copy", &kp::Manager::evalOp, "Evaluates operation to copy one tensor to one or many tensors with explicitly named Sequence") .def("eval_tensor_sync_device", &kp::Manager::evalOp, @@ -249,8 +248,6 @@ PYBIND11_MODULE(kp, m) { "Evaluates operation to run left right out operation with custom shader with explicitly named Sequence") // eval async default - .def("eval_async_tensor_create_def", &kp::Manager::evalOpAsyncDefault, - "Evaluates asynchronously operation to create and initialise tensor GPU memory and buffer with anonymous Sequence") .def("eval_async_tensor_copy_def", &kp::Manager::evalOpAsyncDefault, "Evaluates asynchronously operation to copy one tensor to one or many tensors with anonymous Sequence") .def("eval_async_tensor_sync_device_def", &kp::Manager::evalOpAsyncDefault, @@ -286,8 +283,6 @@ PYBIND11_MODULE(kp, m) { "Evaluates asynchronously operation to run left right out operation with custom shader with anonymous Sequence") // eval async - .def("eval_async_tensor_create", &kp::Manager::evalOpAsync, - "Evaluates asynchronously operation to create and initialise tensor GPU memory and buffer with explicitly named Sequence") .def("eval_async_tensor_copy", &kp::Manager::evalOpAsync, "Evaluates asynchronously operation to copy one tensor to one or many tensors with explicitly named Sequence") .def("eval_async_tensor_sync_device", &kp::Manager::evalOpAsync, diff --git a/python/test/test_array_multiplication.py b/python/test/test_array_multiplication.py index 337c7a5db..bcad405a6 100644 --- a/python/test/test_array_multiplication.py +++ b/python/test/test_array_multiplication.py @@ -14,7 +14,7 @@ def test_array_multiplication(): tensor_out = kp.Tensor([0, 0, 0]) # 3. Initialise the Kompute Tensors in the GPU - mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out]) + mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out]) # 4. Define the multiplication shader code to run on the GPU @ps.python2shader diff --git a/python/test/test_kompute.py b/python/test/test_kompute.py index 9dee9df93..7050b9c22 100644 --- a/python/test/test_kompute.py +++ b/python/test/test_kompute.py @@ -2,6 +2,7 @@ import os import kp import numpy as np +import logging DIRNAME = os.path.dirname(os.path.abspath(__file__)) @@ -16,7 +17,7 @@ def test_opmult(): mgr = kp.Manager() - mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out]) + mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out]) mgr.eval_algo_mult_def([tensor_in_a, tensor_in_b, tensor_out]) @@ -41,7 +42,7 @@ def test_opalgobase_data(): layout (local_size_x = 1) in; - // The input tensors bind index is relative to index in parameter passed + // The input rebuild bind index is relative to index in parameter passed layout(set = 0, binding = 0) buffer bina { float tina[]; }; layout(set = 0, binding = 1) buffer binb { float tinb[]; }; layout(set = 0, binding = 2) buffer bout { float tout[]; }; @@ -52,7 +53,7 @@ def test_opalgobase_data(): } """ - mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out]) + mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out]) mgr.eval_algo_str_def([tensor_in_a, tensor_in_b, tensor_out], shaderData) @@ -75,7 +76,7 @@ def test_opalgobase_file(): shaderFilePath = os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp") - mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out]) + mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out]) mgr.eval_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath) @@ -93,14 +94,14 @@ def test_sequence(): tensor_in_b = kp.Tensor([1, 2, 3]) tensor_out = kp.Tensor([0, 0, 0]) - mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out]) + mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out]) shaderFilePath = os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp") mgr.eval_async_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath) mgr.eval_await_def() - seq = mgr.create_sequence("op") + seq = mgr.sequence("op") seq.begin() seq.record_tensor_sync_local([tensor_in_a]) seq.record_tensor_sync_local([tensor_in_b]) @@ -118,32 +119,35 @@ def test_workgroup(): tensor_a = kp.Tensor(np.zeros([16,8])) tensor_b = kp.Tensor(np.zeros([16,8])) - mgr.eval_tensor_create_def([tensor_a, tensor_b]) + + mgr.rebuild([tensor_a, tensor_b]) shader_src = """ #version 450 layout (local_size_x = 1) in; - // The input tensors bind index is relative to index in parameter passed + // The input rebuild bind index is relative to index in parameter passed layout(set = 0, binding = 0) writeonly buffer bout { float toutx[]; }; layout(set = 0, binding = 1) writeonly buffer bout2 { float touty[]; }; void main() { uint index = gl_WorkGroupID.x*gl_NumWorkGroups.y + gl_WorkGroupID.y; - + toutx[index] = gl_GlobalInvocationID.x; touty[index] = gl_GlobalInvocationID.y; } """ shader_src = bytes(shader_src, encoding='utf8') - seq = mgr.create_sequence() + seq = mgr.sequence("new") seq.begin() seq.record_algo_data([tensor_a, tensor_b], shader_src, (16,8,1)) seq.end() seq.eval() - + mgr.eval_tensor_sync_local_def([tensor_a, tensor_b]) + assert np.all(tensor_a.numpy() == np.stack([np.arange(16)]*8, axis=1).ravel()) assert np.all(tensor_b.numpy() == np.stack([np.arange(8)]*16, axis=0).ravel()) + diff --git a/python/test/test_logistic_regression.py b/python/test/test_logistic_regression.py index f87375887..6783bbc87 100644 --- a/python/test/test_logistic_regression.py +++ b/python/test/test_logistic_regression.py @@ -66,10 +66,10 @@ def test_logistic_regression(): params = [tensor_x_i, tensor_x_j, tensor_y, tensor_w_in, tensor_w_out_i, tensor_w_out_j, tensor_b_in, tensor_b_out, tensor_l_out, tensor_m] - mgr.eval_tensor_create_def(params) + mgr.rebuild(params) # Create a managed sequence - sq = mgr.create_sequence() + sq = mgr.sequence() # Clear previous operations and begin recording for new operations sq.begin() diff --git a/single_include/AggregateHeaders.cpp b/single_include/AggregateHeaders.cpp index 9ce53e85c..57ab728cc 100644 --- a/single_include/AggregateHeaders.cpp +++ b/single_include/AggregateHeaders.cpp @@ -8,7 +8,6 @@ #include "kompute/operations/OpAlgoBase.hpp" #include "kompute/operations/OpAlgoLhsRhsOut.hpp" #include "kompute/operations/OpMult.hpp" -#include "kompute/operations/OpTensorCreate.hpp" #include "kompute/operations/OpTensorCopy.hpp" #include "kompute/operations/OpTensorSyncDevice.hpp" #include "kompute/operations/OpTensorSyncLocal.hpp" diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index d388fa24b..772397a26 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -697,6 +697,7 @@ static const unsigned int shaders_glsl_logisticregression_comp_spv_len = 4920; } #endif // define SHADEROP_SHADERLOGISTICREGRESSION_HPP +#include #include #define KP_MAX_DIM_SIZE 1 @@ -723,7 +724,7 @@ class Tensor enum class TensorTypes { eDevice = 0, ///< Type is device memory, source and destination - eHost = 1, ///< Type is host memory, source and destination + eHost = 1, ///< Type is host memory, source and destination eStorage = 2, ///< Type is Device memory (only) }; @@ -736,7 +737,8 @@ class Tensor * Default constructor with data provided which would be used to create the * respective vulkan buffer and memory. * - * @param data Non-zero-sized vector of data that will be used by the tensor + * @param data Non-zero-sized vector of data that will be used by the + * tensor * @param tensorType Type for the tensor which is of type TensorTypes */ Tensor(const std::vector& data, @@ -829,24 +831,30 @@ class Tensor bool createBarrier); /** - * Records a copy from the internal staging memory to the device memory using an optional barrier to wait for the operation. This function would only be relevant for kp::Tensors of type eDevice. + * Records a copy from the internal staging memory to the device memory + * using an optional barrier to wait for the operation. This function would + * only be relevant for kp::Tensors of type eDevice. * * @param commandBuffer Vulkan Command Buffer to record the commands into * @param createBarrier Whether to create a barrier that ensures the data is * copied before further operations. Default is true. */ - void recordCopyFromStagingToDevice(std::shared_ptr commandBuffer, - bool createBarrier); + void recordCopyFromStagingToDevice( + std::shared_ptr commandBuffer, + bool createBarrier); /** - * Records a copy from the internal device memory to the staging memory using an optional barrier to wait for the operation. This function would only be relevant for kp::Tensors of type eDevice. + * Records a copy from the internal device memory to the staging memory + * using an optional barrier to wait for the operation. This function would + * only be relevant for kp::Tensors of type eDevice. * * @param commandBuffer Vulkan Command Buffer to record the commands into * @param createBarrier Whether to create a barrier that ensures the data is * copied before further operations. Default is true. */ - void recordCopyFromDeviceToStaging(std::shared_ptr commandBuffer, - bool createBarrier); + void recordCopyFromDeviceToStaging( + std::shared_ptr commandBuffer, + bool createBarrier); /** * Records the buffer memory barrier into the command buffer which @@ -908,9 +916,17 @@ class Tensor bool mIsInit = false; void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer - void createBuffer(std::shared_ptr buffer, vk::BufferUsageFlags bufferUsageFlags); - void allocateBindMemory(std::shared_ptr buffer, std::shared_ptr memory, vk::MemoryPropertyFlags memoryPropertyFlags); - void copyBuffer(std::shared_ptr commandBuffer, std::shared_ptr bufferFrom, std::shared_ptr bufferTo, vk::DeviceSize bufferSize, vk::BufferCopy copyRegion, bool createBarrier); + void createBuffer(std::shared_ptr buffer, + vk::BufferUsageFlags bufferUsageFlags); + void allocateBindMemory(std::shared_ptr buffer, + std::shared_ptr memory, + vk::MemoryPropertyFlags memoryPropertyFlags); + void copyBuffer(std::shared_ptr commandBuffer, + std::shared_ptr bufferFrom, + std::shared_ptr bufferTo, + vk::DeviceSize bufferSize, + vk::BufferCopy copyRegion, + bool createBarrier); // Private util functions vk::BufferUsageFlags getPrimaryBufferUsageFlags(); @@ -949,13 +965,11 @@ class OpBase * @param device Vulkan logical device for passing to Algorithm * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation - * @param freeTensors Whether operation manages the memory of the Tensors */ OpBase(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, - std::vector>& tensors, - bool freeTensors) + std::vector>& tensors) { SPDLOG_DEBUG("Compute OpBase constructor with params"); @@ -963,14 +977,12 @@ class OpBase this->mDevice = device; this->mCommandBuffer = commandBuffer; this->mTensors = tensors; - this->mFreeTensors = freeTensors; } /** * Default destructor for OpBase class. This OpBase destructor class should * always be called to destroy and free owned resources unless it is - * intended to destroy the resources in the parent class. This can be done - * by passing the mFreeTensors=false. + * intended to destroy the resources in the parent class. */ virtual ~OpBase() { @@ -1234,50 +1246,38 @@ class Sequence namespace kp { /** - Operation that creates tensor and manages the memory of the components - created + Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging. */ -class OpTensorCreate : public OpBase +class OpTensorSyncDevice : public OpBase { public: - OpTensorCreate(); + OpTensorSyncDevice(); /** - * Default constructor with parameters that provides the bare minimum - * requirements for the operations to be able to create and manage their - * sub-components. + * Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensos provided cannot be of type TensorTypes::eStorage. * * @param physicalDevice Vulkan physical device used to find device queues * @param device Vulkan logical device for passing to Algorithm * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that will be used to create in operation. - * @param freeTensors Whether operation manages the memory of the Tensors */ - OpTensorCreate(std::shared_ptr physicalDevice, + OpTensorSyncDevice(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, std::vector> tensors); /** - * Default destructor which in this case expects the parent class to free - * the tensors + * Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release. */ - ~OpTensorCreate() override; + ~OpTensorSyncDevice() override; /** - * In charge of initialising the primary Tensor as well as the staging - * tensor as required. It will only initialise a staging tensor if the - * Primary tensor is of type Device. For staging tensors it performs a - * mapDataIntoHostMemory which would perform immediately as opposed to - * on sequence eval/submission. + * Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element. */ void init() override; /** - * Record runs the core actions to create the tensors. For device tensors - * it records a copyCommand to move the data from the staging tensor to the - * device tensor. The mapping for staging tensors happens in the init function - * not in the record function. + * For device tensors, it records the copy command for the tensor to copy the data from its staging to device memory. */ void record() override; @@ -1287,8 +1287,7 @@ class OpTensorCreate : public OpBase virtual void preEval() override; /** - * Performs a copy back into the main tensor to ensure that the data - * contained is the one that is now being stored in the GPU. + * Does not perform any postEval commands. */ virtual void postEval() override; @@ -1352,23 +1351,12 @@ class Manager * * @param sequenceName The name for the named sequence to be retrieved or * created + * @param queueIndex The queue to use from the available queues * @return Shared pointer to the manager owned sequence resource */ - std::shared_ptr getOrCreateManagedSequence( - std::string sequenceName); - - /** - * Create a new managed Kompute sequence so it's available within the - * manager. - * - * @param sequenceName The name for the named sequence to be created, if - * empty then default indexed value is used - * @param queueIndex The queue to use from the available queues - * @return Weak pointer to the manager owned sequence resource - */ - std::shared_ptr createManagedSequence( - std::string sequenceName = "", - uint32_t queueIndex = 0); + std::shared_ptr sequence( + std::string sequenceName = KP_DEFAULT_SESSION, + uint32_t queueIndex = 0); /** * Function that evaluates operation against named sequence. @@ -1385,7 +1373,7 @@ class Manager { SPDLOG_DEBUG("Kompute Manager evalOp triggered"); std::shared_ptr sq = - this->getOrCreateManagedSequence(sequenceName); + this->sequence(sequenceName); SPDLOG_DEBUG("Kompute Manager evalOp running sequence BEGIN"); sq->begin(); @@ -1415,10 +1403,8 @@ class Manager { SPDLOG_DEBUG("Kompute Manager evalOp Default triggered"); this->mCurrentSequenceIndex++; - this->evalOp(tensors, - KP_DEFAULT_SESSION + - std::to_string(this->mCurrentSequenceIndex), - std::forward(params)...); + this->evalOp( + tensors, KP_DEFAULT_SESSION, std::forward(params)...); } /** @@ -1437,7 +1423,7 @@ class Manager SPDLOG_DEBUG("Kompute Manager evalOpAsync triggered"); std::shared_ptr sq = - this->getOrCreateManagedSequence(sequenceName); + this->sequence(sequenceName); SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence BEGIN"); sq->begin(); @@ -1468,10 +1454,8 @@ class Manager { SPDLOG_DEBUG("Kompute Manager evalOpAsyncDefault triggered"); this->mCurrentSequenceIndex++; - this->evalOpAsync(tensors, - KP_DEFAULT_SESSION + - std::to_string(this->mCurrentSequenceIndex), - std::forward(params)...); + this->evalOpAsync( + tensors, KP_DEFAULT_SESSION, std::forward(params)...); } /** @@ -1512,36 +1496,98 @@ class Manager void evalOpAwaitDefault(uint64_t waitFor = UINT64_MAX) { SPDLOG_DEBUG("Kompute Manager evalOpAwaitDefault triggered"); - this->evalOpAwait(KP_DEFAULT_SESSION + - std::to_string(this->mCurrentSequenceIndex), - waitFor); + this->evalOpAwait(KP_DEFAULT_SESSION, waitFor); } /** * Function that simplifies the common workflow of tensor creation and * initialization. It will take the constructor parameters for a Tensor - * and will will us it to create a new Tensor and then create it using - * the OpCreateTensor command. + * and will will us it to create a new Tensor and then create it. The + * tensor memory will then be managed and owned by the manager. * * @param data The data to initialize the tensor with * @param tensorType The type of tensor to initialize + * @param syncDataToGPU Whether to sync the data to GPU memory * @returns Initialized Tensor with memory Syncd to GPU device */ - std::shared_ptr buildTensor( + std::shared_ptr tensor( const std::vector& data, - Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) + Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice, + bool syncDataToGPU = true) { - SPDLOG_DEBUG("Kompute Manager createInitTensor triggered"); + SPDLOG_DEBUG("Kompute Manager tensor triggered"); SPDLOG_DEBUG("Kompute Manager creating new tensor shared ptr"); std::shared_ptr tensor = std::make_shared(kp::Tensor(data, tensorType)); - this->evalOpDefault({ tensor }); + tensor->init(this->mPhysicalDevice, this->mDevice); + + if (syncDataToGPU) { + this->evalOpDefault({ tensor }); + } + this->mManagedTensors.insert(tensor); return tensor; } + /** + * Function that simplifies the common workflow of tensor initialisation. It + * will take the constructor parameters for a Tensor and will will us it to + * create a new Tensor. The tensor memory will then be managed and owned by + * the manager. + * + * @param tensors Array of tensors to rebuild + * @param syncDataToGPU Whether to sync the data to GPU memory + * @returns Initialized Tensor with memory Syncd to GPU device + */ + void rebuild(std::vector> tensors, + bool syncDataToGPU = true) + { + SPDLOG_DEBUG("Kompute Manager rebuild triggered"); + for (std::shared_ptr tensor : tensors) { + + // False syncData to run all tensors at once instead one by one + this->rebuild(tensor, false); + } + + if (syncDataToGPU) { + this->evalOpDefault(tensors); + } + } + + /** + * Function that simplifies the common workflow of tensor initialisation. It + * will take the constructor parameters for a Tensor and will will us it to + * create a new Tensor. The tensor memory will then be managed and owned by + * the manager. + * + * @param tensors Single tensor to rebuild + * @param syncDataToGPU Whether to sync the data to GPU memory + * @returns Initialized Tensor with memory Syncd to GPU device + */ + void rebuild(std::shared_ptr tensor, + bool syncDataToGPU = true) + { + SPDLOG_DEBUG("Kompute Manager rebuild Tensor triggered"); + + if (tensor->isInit()) { + tensor->freeMemoryDestroyGPUResources(); + } + + tensor->init(this->mPhysicalDevice, this->mDevice); + + std::set>::iterator it = + this->mManagedTensors.find(tensor); + if (it == this->mManagedTensors.end()) { + this->mManagedTensors.insert(tensor); + } + + if (syncDataToGPU) { + this->evalOpDefault({ tensor }); + } + } + private: // -------------- OPTIONALLY OWNED RESOURCES std::shared_ptr mInstance = nullptr; @@ -1552,6 +1598,8 @@ class Manager bool mFreeDevice = false; // -------------- ALWAYS OWNED RESOURCES + std::set> mManagedTensors; + std::unordered_map> mManagedSequences; @@ -1999,59 +2047,6 @@ class OpTensorCopy : public OpBase namespace kp { -/** - Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging. -*/ -class OpTensorSyncDevice : public OpBase -{ - public: - OpTensorSyncDevice(); - - /** - * Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensos provided cannot be of type TensorTypes::eStorage. - * - * @param physicalDevice Vulkan physical device used to find device queues - * @param device Vulkan logical device for passing to Algorithm - * @param commandBuffer Vulkan Command Buffer to record commands into - * @param tensors Tensors that will be used to create in operation. - */ - OpTensorSyncDevice(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector> tensors); - - /** - * Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release. - */ - ~OpTensorSyncDevice() override; - - /** - * Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element. - */ - void init() override; - - /** - * For device tensors, it records the copy command for the tensor to copy the data from its staging to device memory. - */ - void record() override; - - /** - * Does not perform any preEval commands. - */ - virtual void preEval() override; - - /** - * Does not perform any postEval commands. - */ - virtual void postEval() override; - - private: -}; - -} // End namespace kp - -namespace kp { - /** Operation that syncs tensor's local memory by mapping device data into the local CPU memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging. */ diff --git a/src/Manager.cpp b/src/Manager.cpp old mode 100755 new mode 100644 index df9d64db6..e7bb88f2b --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -68,6 +68,18 @@ Manager::~Manager() this->mManagedSequences.clear(); } + if (this->mManagedTensors.size()) { + SPDLOG_DEBUG("Kompute Manager explicitly freeing tensors"); + for (const std::shared_ptr& tensor : this->mManagedTensors) { + if (!tensor->isInit()) { + SPDLOG_ERROR("Kompute Manager attempted to free managed tensor " + "but not tensor is not initialised"); + } + tensor->freeMemoryDestroyGPUResources(); + } + this->mManagedTensors.clear(); + } + if (this->mFreeDevice) { SPDLOG_INFO("Destroying device"); this->mDevice->destroy( @@ -99,48 +111,34 @@ Manager::~Manager() } std::shared_ptr -Manager::getOrCreateManagedSequence(std::string sequenceName) +Manager::sequence(std::string sequenceName, uint32_t queueIndex) { - SPDLOG_DEBUG("Kompute Manager creating Sequence object"); + SPDLOG_DEBUG("Kompute Manager sequence() with sequenceName: {} " + "and queueIndex: {}", + sequenceName, + queueIndex); + + std::shared_ptr sq = nullptr; std::unordered_map>::iterator found = this->mManagedSequences.find(sequenceName); if (found == this->mManagedSequences.end()) { - return this->createManagedSequence(sequenceName); + std::shared_ptr sq = + std::make_shared(this->mPhysicalDevice, + this->mDevice, + this->mComputeQueues[queueIndex], + this->mComputeQueueFamilyIndices[queueIndex]); + sq->init(); + + this->mManagedSequences.insert({ sequenceName, sq }); + + return sq; } else { return found->second; } } -std::shared_ptr -Manager::createManagedSequence(std::string sequenceName, uint32_t queueIndex) -{ - - SPDLOG_DEBUG("Kompute Manager createManagedSequence with sequenceName: {} " - "and queueIndex: {}", - sequenceName, - queueIndex); - - std::shared_ptr sq = - std::make_shared(this->mPhysicalDevice, - this->mDevice, - this->mComputeQueues[queueIndex], - this->mComputeQueueFamilyIndices[queueIndex]); - sq->init(); - - if (sequenceName.empty()) { - this->mCurrentSequenceIndex++; - this->mManagedSequences.insert( - { KP_DEFAULT_SESSION + std::to_string(this->mCurrentSequenceIndex), - sq }); - } else { - // TODO: Check if sequence doesn't already exist - this->mManagedSequences.insert({ sequenceName, sq }); - } - return sq; -} - void Manager::createInstance() { diff --git a/src/OpAlgoBase.cpp b/src/OpAlgoBase.cpp index c6ecf316d..ad4bbc17b 100644 --- a/src/OpAlgoBase.cpp +++ b/src/OpAlgoBase.cpp @@ -14,7 +14,7 @@ OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, std::shared_ptr commandBuffer, std::vector>& tensors, KomputeWorkgroup komputeWorkgroup) - : OpBase(physicalDevice, device, commandBuffer, tensors, false) + : OpBase(physicalDevice, device, commandBuffer, tensors) { SPDLOG_DEBUG("Kompute OpAlgoBase constructor with params numTensors: {}", tensors.size()); diff --git a/src/OpTensorCopy.cpp b/src/OpTensorCopy.cpp index 3df23aa57..3726c71ec 100644 --- a/src/OpTensorCopy.cpp +++ b/src/OpTensorCopy.cpp @@ -12,7 +12,7 @@ OpTensorCopy::OpTensorCopy(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, std::vector> tensors) - : OpBase(physicalDevice, device, commandBuffer, tensors, false) + : OpBase(physicalDevice, device, commandBuffer, tensors) { SPDLOG_DEBUG("Kompute OpTensorCopy constructor with params"); } diff --git a/src/OpTensorCreate.cpp b/src/OpTensorCreate.cpp deleted file mode 100644 index 7918415e9..000000000 --- a/src/OpTensorCreate.cpp +++ /dev/null @@ -1,76 +0,0 @@ - -#include "kompute/Tensor.hpp" - -#include "kompute/operations/OpTensorCreate.hpp" - -namespace kp { - -OpTensorCreate::OpTensorCreate() -{ - SPDLOG_DEBUG("Kompute OpTensorCreate constructor base"); -} - -OpTensorCreate::OpTensorCreate( - std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector> tensors) - : OpBase(physicalDevice, device, commandBuffer, tensors, true) -{ - SPDLOG_DEBUG("Kompute OpTensorCreate constructor with params"); -} - -OpTensorCreate::~OpTensorCreate() -{ - SPDLOG_DEBUG("Kompute OpTensorCreate destructor started"); -} - -void -OpTensorCreate::init() -{ - SPDLOG_DEBUG("Kompute OpTensorCreate init called"); - - if (this->mTensors.size() < 1) { - throw std::runtime_error( - "Kompute OpTensorCreate called with less than 1 tensor"); - } - - for (std::shared_ptr tensor : this->mTensors) { - if (tensor->isInit()) { - throw std::runtime_error( - "Kompute OpTensorCreate: Tensor has already been initialized"); - } - if (tensor->tensorType() != Tensor::TensorTypes::eStorage) { - tensor->init(this->mPhysicalDevice, this->mDevice); - - tensor->mapDataIntoHostMemory(); - } - } -} - -void -OpTensorCreate::record() -{ - SPDLOG_DEBUG("Kompute OpTensorCreate record called"); - - for (size_t i = 0; i < this->mTensors.size(); i++) { - if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) { - this->mTensors[i]->recordCopyFromStagingToDevice( - this->mCommandBuffer, false); - } - } -} - -void -OpTensorCreate::preEval() -{ - SPDLOG_DEBUG("Kompute OpTensorCreate preEval called"); -} - -void -OpTensorCreate::postEval() -{ - SPDLOG_DEBUG("Kompute OpTensorCreate postEval called"); -} - -} diff --git a/src/OpTensorSyncDevice.cpp b/src/OpTensorSyncDevice.cpp index 340786eb5..92bd7512f 100644 --- a/src/OpTensorSyncDevice.cpp +++ b/src/OpTensorSyncDevice.cpp @@ -15,7 +15,7 @@ OpTensorSyncDevice::OpTensorSyncDevice( std::shared_ptr device, std::shared_ptr commandBuffer, std::vector> tensors) - : OpBase(physicalDevice, device, commandBuffer, tensors, false) + : OpBase(physicalDevice, device, commandBuffer, tensors) { SPDLOG_DEBUG("Kompute OpTensorSyncDevice constructor with params"); } diff --git a/src/OpTensorSyncLocal.cpp b/src/OpTensorSyncLocal.cpp index 09d966e12..c7a4fb638 100644 --- a/src/OpTensorSyncLocal.cpp +++ b/src/OpTensorSyncLocal.cpp @@ -15,7 +15,7 @@ OpTensorSyncLocal::OpTensorSyncLocal( std::shared_ptr device, std::shared_ptr commandBuffer, std::vector> tensors) - : OpBase(physicalDevice, device, commandBuffer, tensors, false) + : OpBase(physicalDevice, device, commandBuffer, tensors) { SPDLOG_DEBUG("Kompute OpTensorSyncLocal constructor with params"); } diff --git a/src/Sequence.cpp b/src/Sequence.cpp index 50ef8b0f1..3c3b7b10a 100644 --- a/src/Sequence.cpp +++ b/src/Sequence.cpp @@ -20,7 +20,7 @@ Sequence::Sequence(std::shared_ptr physicalDevice, this->mDevice = device; this->mComputeQueue = computeQueue; this->mQueueIndex = queueIndex; - this->mIsInit = true; + this->mIsInit = false; } Sequence::~Sequence() @@ -203,6 +203,8 @@ Sequence::isInit() void Sequence::freeMemoryDestroyGPUResources() { + SPDLOG_DEBUG("Kompute Sequence freeMemoryDestroyGPUResources called"); + if (!this->mIsInit) { SPDLOG_ERROR("Kompute Sequence freeMemoryDestroyGPUResources called " "but Sequence is not initialized so there's no relevant " diff --git a/src/Tensor.cpp b/src/Tensor.cpp index f04165cf9..a1ba15441 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -229,8 +229,12 @@ Tensor::mapDataFromHostMemory() if (this->mTensorType == TensorTypes::eHost) { hostVisibleMemory = this->mPrimaryMemory; - } else { + } else if (this->mTensorType == TensorTypes::eDevice) { hostVisibleMemory = this->mStagingMemory; + } else { + SPDLOG_WARN( + "Kompute Tensor mapping data not supported on storage tensor"); + return; } vk::DeviceSize bufferSize = this->memorySize(); @@ -252,8 +256,12 @@ Tensor::mapDataIntoHostMemory() if (this->mTensorType == TensorTypes::eHost) { hostVisibleMemory = this->mPrimaryMemory; - } else { + } else if (this->mTensorType == TensorTypes::eDevice) { hostVisibleMemory = this->mStagingMemory; + } else { + SPDLOG_WARN( + "Kompute Tensor mapping data not supported on storage tensor"); + return; } vk::DeviceSize bufferSize = this->memorySize(); diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index 8c689ba57..5ef32ff65 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -1,12 +1,13 @@ #pragma once +#include #include #include "kompute/Core.hpp" #include "kompute/Sequence.hpp" -#include "kompute/operations/OpTensorCreate.hpp" +#include "kompute/operations/OpTensorSyncDevice.hpp" #define KP_DEFAULT_SESSION "DEFAULT" @@ -63,23 +64,12 @@ class Manager * * @param sequenceName The name for the named sequence to be retrieved or * created + * @param queueIndex The queue to use from the available queues * @return Shared pointer to the manager owned sequence resource */ - std::shared_ptr getOrCreateManagedSequence( - std::string sequenceName); - - /** - * Create a new managed Kompute sequence so it's available within the - * manager. - * - * @param sequenceName The name for the named sequence to be created, if - * empty then default indexed value is used - * @param queueIndex The queue to use from the available queues - * @return Weak pointer to the manager owned sequence resource - */ - std::shared_ptr createManagedSequence( - std::string sequenceName = "", - uint32_t queueIndex = 0); + std::shared_ptr sequence( + std::string sequenceName = KP_DEFAULT_SESSION, + uint32_t queueIndex = 0); /** * Function that evaluates operation against named sequence. @@ -96,7 +86,7 @@ class Manager { SPDLOG_DEBUG("Kompute Manager evalOp triggered"); std::shared_ptr sq = - this->getOrCreateManagedSequence(sequenceName); + this->sequence(sequenceName); SPDLOG_DEBUG("Kompute Manager evalOp running sequence BEGIN"); sq->begin(); @@ -126,10 +116,8 @@ class Manager { SPDLOG_DEBUG("Kompute Manager evalOp Default triggered"); this->mCurrentSequenceIndex++; - this->evalOp(tensors, - KP_DEFAULT_SESSION + - std::to_string(this->mCurrentSequenceIndex), - std::forward(params)...); + this->evalOp( + tensors, KP_DEFAULT_SESSION, std::forward(params)...); } /** @@ -148,7 +136,7 @@ class Manager SPDLOG_DEBUG("Kompute Manager evalOpAsync triggered"); std::shared_ptr sq = - this->getOrCreateManagedSequence(sequenceName); + this->sequence(sequenceName); SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence BEGIN"); sq->begin(); @@ -179,10 +167,8 @@ class Manager { SPDLOG_DEBUG("Kompute Manager evalOpAsyncDefault triggered"); this->mCurrentSequenceIndex++; - this->evalOpAsync(tensors, - KP_DEFAULT_SESSION + - std::to_string(this->mCurrentSequenceIndex), - std::forward(params)...); + this->evalOpAsync( + tensors, KP_DEFAULT_SESSION, std::forward(params)...); } /** @@ -223,36 +209,98 @@ class Manager void evalOpAwaitDefault(uint64_t waitFor = UINT64_MAX) { SPDLOG_DEBUG("Kompute Manager evalOpAwaitDefault triggered"); - this->evalOpAwait(KP_DEFAULT_SESSION + - std::to_string(this->mCurrentSequenceIndex), - waitFor); + this->evalOpAwait(KP_DEFAULT_SESSION, waitFor); } /** * Function that simplifies the common workflow of tensor creation and * initialization. It will take the constructor parameters for a Tensor - * and will will us it to create a new Tensor and then create it using - * the OpCreateTensor command. + * and will will us it to create a new Tensor and then create it. The + * tensor memory will then be managed and owned by the manager. * * @param data The data to initialize the tensor with * @param tensorType The type of tensor to initialize + * @param syncDataToGPU Whether to sync the data to GPU memory * @returns Initialized Tensor with memory Syncd to GPU device */ - std::shared_ptr buildTensor( + std::shared_ptr tensor( const std::vector& data, - Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) + Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice, + bool syncDataToGPU = true) { - SPDLOG_DEBUG("Kompute Manager createInitTensor triggered"); + SPDLOG_DEBUG("Kompute Manager tensor triggered"); SPDLOG_DEBUG("Kompute Manager creating new tensor shared ptr"); std::shared_ptr tensor = std::make_shared(kp::Tensor(data, tensorType)); - this->evalOpDefault({ tensor }); + tensor->init(this->mPhysicalDevice, this->mDevice); + + if (syncDataToGPU) { + this->evalOpDefault({ tensor }); + } + this->mManagedTensors.insert(tensor); return tensor; } + /** + * Function that simplifies the common workflow of tensor initialisation. It + * will take the constructor parameters for a Tensor and will will us it to + * create a new Tensor. The tensor memory will then be managed and owned by + * the manager. + * + * @param tensors Array of tensors to rebuild + * @param syncDataToGPU Whether to sync the data to GPU memory + * @returns Initialized Tensor with memory Syncd to GPU device + */ + void rebuild(std::vector> tensors, + bool syncDataToGPU = true) + { + SPDLOG_DEBUG("Kompute Manager rebuild triggered"); + for (std::shared_ptr tensor : tensors) { + + // False syncData to run all tensors at once instead one by one + this->rebuild(tensor, false); + } + + if (syncDataToGPU) { + this->evalOpDefault(tensors); + } + } + + /** + * Function that simplifies the common workflow of tensor initialisation. It + * will take the constructor parameters for a Tensor and will will us it to + * create a new Tensor. The tensor memory will then be managed and owned by + * the manager. + * + * @param tensors Single tensor to rebuild + * @param syncDataToGPU Whether to sync the data to GPU memory + * @returns Initialized Tensor with memory Syncd to GPU device + */ + void rebuild(std::shared_ptr tensor, + bool syncDataToGPU = true) + { + SPDLOG_DEBUG("Kompute Manager rebuild Tensor triggered"); + + if (tensor->isInit()) { + tensor->freeMemoryDestroyGPUResources(); + } + + tensor->init(this->mPhysicalDevice, this->mDevice); + + std::set>::iterator it = + this->mManagedTensors.find(tensor); + if (it == this->mManagedTensors.end()) { + this->mManagedTensors.insert(tensor); + } + + if (syncDataToGPU) { + this->evalOpDefault({ tensor }); + } + } + private: // -------------- OPTIONALLY OWNED RESOURCES std::shared_ptr mInstance = nullptr; @@ -263,6 +311,8 @@ class Manager bool mFreeDevice = false; // -------------- ALWAYS OWNED RESOURCES + std::set> mManagedTensors; + std::unordered_map> mManagedSequences; diff --git a/src/include/kompute/operations/OpBase.hpp b/src/include/kompute/operations/OpBase.hpp index 6e35df994..a423abc20 100644 --- a/src/include/kompute/operations/OpBase.hpp +++ b/src/include/kompute/operations/OpBase.hpp @@ -31,13 +31,11 @@ class OpBase * @param device Vulkan logical device for passing to Algorithm * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation - * @param freeTensors Whether operation manages the memory of the Tensors */ OpBase(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, - std::vector>& tensors, - bool freeTensors) + std::vector>& tensors) { SPDLOG_DEBUG("Compute OpBase constructor with params"); @@ -45,14 +43,12 @@ class OpBase this->mDevice = device; this->mCommandBuffer = commandBuffer; this->mTensors = tensors; - this->mFreeTensors = freeTensors; } /** * Default destructor for OpBase class. This OpBase destructor class should * always be called to destroy and free owned resources unless it is - * intended to destroy the resources in the parent class. This can be done - * by passing the mFreeTensors=false. + * intended to destroy the resources in the parent class. */ virtual ~OpBase() { diff --git a/src/include/kompute/operations/OpTensorCreate.hpp b/src/include/kompute/operations/OpTensorCreate.hpp deleted file mode 100644 index 4b8c784cc..000000000 --- a/src/include/kompute/operations/OpTensorCreate.hpp +++ /dev/null @@ -1,74 +0,0 @@ -#pragma once - -#include "kompute/Core.hpp" - -#include "kompute/Tensor.hpp" - -#include "kompute/operations/OpBase.hpp" - -namespace kp { - -/** - Operation that creates tensor and manages the memory of the components - created -*/ -class OpTensorCreate : public OpBase -{ - public: - OpTensorCreate(); - - /** - * Default constructor with parameters that provides the bare minimum - * requirements for the operations to be able to create and manage their - * sub-components. - * - * @param physicalDevice Vulkan physical device used to find device queues - * @param device Vulkan logical device for passing to Algorithm - * @param commandBuffer Vulkan Command Buffer to record commands into - * @param tensors Tensors that will be used to create in operation. - * @param freeTensors Whether operation manages the memory of the Tensors - */ - OpTensorCreate(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector> tensors); - - /** - * Default destructor which in this case expects the parent class to free - * the tensors - */ - ~OpTensorCreate() override; - - /** - * In charge of initialising the primary Tensor as well as the staging - * tensor as required. It will only initialise a staging tensor if the - * Primary tensor is of type Device. For staging tensors it performs a - * mapDataIntoHostMemory which would perform immediately as opposed to - * on sequence eval/submission. - */ - void init() override; - - /** - * Record runs the core actions to create the tensors. For device tensors - * it records a copyCommand to move the data from the staging tensor to the - * device tensor. The mapping for staging tensors happens in the init function - * not in the record function. - */ - void record() override; - - /** - * Does not perform any preEval commands. - */ - virtual void preEval() override; - - /** - * Performs a copy back into the main tensor to ensure that the data - * contained is the one that is now being stored in the GPU. - */ - virtual void postEval() override; - - - private: -}; - -} // End namespace kp diff --git a/test/TestAsyncOperations.cpp b/test/TestAsyncOperations.cpp index 43bccf99b..ddbcb6597 100644 --- a/test/TestAsyncOperations.cpp +++ b/test/TestAsyncOperations.cpp @@ -49,7 +49,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution) inputsSyncB.push_back(std::make_shared(kp::Tensor(data))); } - mgr.evalOpDefault(inputsSyncB); + mgr.rebuild(inputsSyncB); auto startSync = std::chrono::high_resolution_clock::now(); @@ -77,10 +77,10 @@ TEST(TestAsyncOperations, TestManagerParallelExecution) inputsAsyncB.push_back(std::make_shared(kp::Tensor(data))); } - mgrAsync.evalOpDefault(inputsAsyncB); + mgrAsync.rebuild(inputsAsyncB); for (uint32_t i = 0; i < numParallel; i++) { - mgrAsync.createManagedSequence("async" + std::to_string(i), i); + mgrAsync.sequence("async" + std::to_string(i), i); } auto startAsync = std::chrono::high_resolution_clock::now(); @@ -146,10 +146,10 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution) std::shared_ptr tensorA{ new kp::Tensor(data) }; std::shared_ptr tensorB{ new kp::Tensor(data) }; - mgr.createManagedSequence("asyncOne"); - mgr.createManagedSequence("asyncTwo"); + mgr.sequence("asyncOne"); + mgr.sequence("asyncTwo"); - mgr.evalOpDefault({ tensorA, tensorB }); + mgr.rebuild({ tensorA, tensorB }); mgr.evalOpAsync( { tensorA }, "asyncOne", std::vector(shader.begin(), shader.end())); diff --git a/test/TestLogisticRegression.cpp b/test/TestLogisticRegression.cpp index 7c3f15387..b974655a9 100644 --- a/test/TestLogisticRegression.cpp +++ b/test/TestLogisticRegression.cpp @@ -32,14 +32,9 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegression) { kp::Manager mgr; - std::shared_ptr sqTensor = mgr.createManagedSequence(); + mgr.rebuild(params); - sqTensor->begin(); - sqTensor->record(params); - sqTensor->end(); - sqTensor->eval(); - - std::shared_ptr sq = mgr.createManagedSequence(); + std::shared_ptr sq = mgr.sequence(); // Record op algo base sq->begin(); @@ -122,14 +117,9 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegressionManualCopy) { kp::Manager mgr; - std::shared_ptr sqTensor = mgr.createManagedSequence(); + mgr.rebuild(params); - sqTensor->begin(); - sqTensor->record(params); - sqTensor->end(); - sqTensor->eval(); - - std::shared_ptr sq = mgr.createManagedSequence(); + std::shared_ptr sq = mgr.sequence(); // Record op algo base sq->begin(); diff --git a/test/TestManager.cpp b/test/TestManager.cpp index 198e617af..754941561 100644 --- a/test/TestManager.cpp +++ b/test/TestManager.cpp @@ -8,14 +8,14 @@ TEST(TestManager, EndToEndOpMultFlow) kp::Manager mgr; std::shared_ptr tensorLHS{ new kp::Tensor({ 0, 1, 2 }) }; - mgr.evalOpDefault({ tensorLHS }); + mgr.rebuild({ tensorLHS }); std::shared_ptr tensorRHS{ new kp::Tensor({ 2, 4, 6 }) }; - mgr.evalOpDefault({ tensorRHS }); + mgr.rebuild({ tensorRHS }); std::shared_ptr tensorOutput{ new kp::Tensor({ 0, 0, 0 }) }; - mgr.evalOpDefault({ tensorOutput }); + mgr.rebuild({ tensorOutput }); mgr.evalOpDefault({ tensorLHS, tensorRHS, tensorOutput }); @@ -36,15 +36,13 @@ TEST(TestManager, OpMultSequenceFlow) kp::Manager mgr; { + mgr.rebuild({ tensorLHS, tensorRHS, tensorOutput }); + std::shared_ptr sq = - mgr.getOrCreateManagedSequence("newSequence"); + mgr.sequence("newSequence"); sq->begin(); - sq->record({ tensorLHS }); - sq->record({ tensorRHS }); - sq->record({ tensorOutput }); - sq->record({ tensorLHS, tensorRHS, tensorOutput }); sq->record({ tensorOutput }); @@ -61,16 +59,16 @@ TEST(TestManager, TestMultipleSequences) kp::Manager mgr; std::shared_ptr sqOne = - mgr.getOrCreateManagedSequence("sqOne"); + mgr.sequence("sqOne"); std::shared_ptr sqTwo = - mgr.getOrCreateManagedSequence("sqTwo"); + mgr.sequence("sqTwo"); std::shared_ptr sqOneRef = - mgr.getOrCreateManagedSequence("sqOne"); + mgr.sequence("sqOne"); std::shared_ptr sqTwoRef = - mgr.getOrCreateManagedSequence("sqTwo"); + mgr.sequence("sqTwo"); EXPECT_EQ(sqOne, sqOneRef); EXPECT_NE(sqTwo, sqOneRef); @@ -90,17 +88,17 @@ TEST(TestManager, TestMultipleTensorsAtOnce) kp::Manager mgr; std::shared_ptr sq = - mgr.getOrCreateManagedSequence("newSequence"); + mgr.sequence("newSequence"); { - sq->begin(); - - sq->record({ tensorLHS, tensorRHS, tensorOutput }); + mgr.rebuild({ tensorLHS, tensorRHS, tensorOutput }); EXPECT_TRUE(tensorLHS->isInit()); EXPECT_TRUE(tensorRHS->isInit()); EXPECT_TRUE(tensorOutput->isInit()); + sq->begin(); + sq->record({ tensorLHS, tensorRHS, tensorOutput }); sq->record({ tensorOutput }); @@ -116,8 +114,8 @@ TEST(TestManager, TestCreateInitTensor) { kp::Manager mgr; - std::shared_ptr tensorA = mgr.buildTensor({ 0, 1, 2 }); - std::shared_ptr tensorB = mgr.buildTensor({ 0, 0, 0 }); + std::shared_ptr tensorA = mgr.tensor({ 0, 1, 2 }); + std::shared_ptr tensorB = mgr.tensor({ 0, 0, 0 }); mgr.evalOpDefault({ tensorA, tensorB }); @@ -126,7 +124,7 @@ TEST(TestManager, TestCreateInitTensor) EXPECT_EQ(tensorB->data(), std::vector({ 0, 1, 2 })); std::shared_ptr tensorC = - mgr.buildTensor({ 0, 0, 0 }, kp::Tensor::TensorTypes::eHost); + mgr.tensor({ 0, 0, 0 }, kp::Tensor::TensorTypes::eHost); mgr.evalOpDefault({ tensorA, tensorC }); diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp index 11e94caa4..63f9778b0 100644 --- a/test/TestMultipleAlgoExecutions.cpp +++ b/test/TestMultipleAlgoExecutions.cpp @@ -19,14 +19,14 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord) pa[index] = pa[index] + 1; })"); + mgr.rebuild({ tensorA }); + std::shared_ptr sq = - mgr.getOrCreateManagedSequence("newSequence"); + mgr.sequence("newSequence"); { sq->begin(); - sq->record({ tensorA }); - sq->record( { tensorA }, std::vector(shader.begin(), shader.end())); sq->record( @@ -58,13 +58,15 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords) pa[index] = pa[index] + 1; })"); - std::shared_ptr sqTensor = mgr.createManagedSequence(); + mgr.rebuild({ tensorA }, false); - std::shared_ptr sq = mgr.createManagedSequence(); + std::shared_ptr sqTensor = mgr.sequence(); + + std::shared_ptr sq = mgr.sequence(); // First create the tensor in a separate sequence sqTensor->begin(); - sqTensor->record({ tensorA }); + sqTensor->record({ tensorA }); sqTensor->end(); sqTensor->eval(); @@ -111,24 +113,11 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) pa[index] = pa[index] + 1; })"); - { - std::shared_ptr sq = - mgr.getOrCreateManagedSequence("newSequence"); - - sq->begin(); - - sq->record({ tensorA }); - - sq->record( - { tensorA }, std::vector(shader.begin(), shader.end())); - - sq->end(); - sq->eval(); - } + mgr.rebuild({ tensorA }); { std::shared_ptr sq = - mgr.getOrCreateManagedSequence("newSequence2"); + mgr.sequence("newSequence"); sq->begin(); @@ -141,7 +130,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) { std::shared_ptr sq = - mgr.getOrCreateManagedSequence("newSequence3"); + mgr.sequence("newSequence2"); sq->begin(); @@ -154,7 +143,20 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) { std::shared_ptr sq = - mgr.getOrCreateManagedSequence("newSequence5"); + mgr.sequence("newSequence3"); + + sq->begin(); + + sq->record( + { tensorA }, std::vector(shader.begin(), shader.end())); + + sq->end(); + sq->eval(); + } + + { + std::shared_ptr sq = + mgr.sequence("newSequence5"); sq->begin(); @@ -183,13 +185,15 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval) pa[index] = pa[index] + 1; })"); + mgr.rebuild({ tensorA }, false); + { std::shared_ptr sq = - mgr.getOrCreateManagedSequence("newSequence"); + mgr.sequence("newSequence"); sq->begin(); - sq->record({ tensorA }); + sq->record({ tensorA }); sq->end(); sq->eval(); @@ -197,7 +201,7 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval) { std::shared_ptr sq = - mgr.getOrCreateManagedSequence("newSequence2"); + mgr.sequence("newSequence2"); sq->begin(); @@ -213,7 +217,7 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval) { std::shared_ptr sq = - mgr.getOrCreateManagedSequence("newSequence3"); + mgr.sequence("newSequence3"); sq->begin(); @@ -238,7 +242,7 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrOpCreate) std::shared_ptr tensorInB{ new kp::Tensor({ 0.0, 1.0, 2.0 }) }; std::shared_ptr tensorOut{ new kp::Tensor({ 0.0, 0.0, 0.0 }) }; - mgr.evalOpDefault({ tensorInA, tensorInB, tensorOut }); + mgr.rebuild({ tensorInA, tensorInB, tensorOut }); std::string shader(R"( // The version to use @@ -273,9 +277,12 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrMgrCreate) kp::Manager mgr; - auto tensorInA = mgr.buildTensor({ 2.0, 4.0, 6.0 }); - auto tensorInB = mgr.buildTensor({ 0.0, 1.0, 2.0 }); - auto tensorOut = mgr.buildTensor({ 0.0, 0.0, 0.0 }); + auto tensorInA = mgr.tensor( + { 2.0, 4.0, 6.0 }, kp::Tensor::TensorTypes::eDevice, false); + auto tensorInB = mgr.tensor( + { 0.0, 1.0, 2.0 }, kp::Tensor::TensorTypes::eDevice, false); + auto tensorOut = mgr.tensor( + { 0.0, 0.0, 0.0 }, kp::Tensor::TensorTypes::eDevice, false); std::string shader(R"( // The version to use @@ -296,6 +303,9 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrMgrCreate) } )"); + mgr.evalOpDefault( + { tensorInA, tensorInB, tensorOut }); + mgr.evalOpDefault( { tensorInA, tensorInB, tensorOut }, std::vector(shader.begin(), shader.end())); @@ -304,3 +314,39 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrMgrCreate) EXPECT_EQ(tensorOut->data(), std::vector({ 0.0, 4.0, 12.0 })); } + +TEST(TestMultipleAlgoExecutions, SequenceAlgoDestroyOutsideManagerScope) +{ + std::shared_ptr tensorA{ new kp::Tensor({ 0, 0, 0 }) }; + + std::string shader(R"( + #version 450 + layout (local_size_x = 1) in; + layout(set = 0, binding = 0) buffer a { float pa[]; }; + void main() { + uint index = gl_GlobalInvocationID.x; + pa[index] = pa[index] + 1; + })"); + + { + std::shared_ptr sq = nullptr; + + { + kp::Manager mgr; + + mgr.rebuild({ tensorA }); + + sq = mgr.sequence(); + + sq->begin(); + sq->record( + { tensorA }, std::vector(shader.begin(), shader.end())); + sq->end(); + + sq->eval(); + + mgr.evalOpDefault({ tensorA }); + } + } + EXPECT_EQ(tensorA->data(), std::vector({ 1, 1, 1 })); +} diff --git a/test/TestOpAlgoLoopsPassingData.cpp b/test/TestOpAlgoLoopsPassingData.cpp index bd7727790..c2431bc4d 100644 --- a/test/TestOpAlgoLoopsPassingData.cpp +++ b/test/TestOpAlgoLoopsPassingData.cpp @@ -30,13 +30,15 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies) } )"); + mgr.rebuild({ tensorA, tensorB }, false); + { std::shared_ptr sq = - mgr.getOrCreateManagedSequence("default"); + mgr.sequence("default"); sq->begin(); - sq->record({ tensorA, tensorB }); + sq->record({ tensorA, tensorB }); sq->end(); @@ -45,7 +47,7 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies) { std::shared_ptr sq = - mgr.getOrCreateManagedSequence("run"); + mgr.sequence("run"); sq->begin(); @@ -63,7 +65,7 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies) { std::shared_ptr sq = - mgr.getOrCreateManagedSequence("export"); + mgr.sequence("export"); sq->begin(); diff --git a/test/TestOpShadersFromStringAndFile.cpp b/test/TestOpShadersFromStringAndFile.cpp index 273421b26..ae2bfce15 100644 --- a/test/TestOpShadersFromStringAndFile.cpp +++ b/test/TestOpShadersFromStringAndFile.cpp @@ -11,7 +11,7 @@ TEST(TestOpAlgoBase, ShaderRawDataFromConstructor) std::shared_ptr tensorA{ new kp::Tensor({ 3, 4, 5 }) }; std::shared_ptr tensorB{ new kp::Tensor({ 0, 0, 0 }) }; - mgr.evalOpDefault({ tensorA, tensorB }); + mgr.rebuild({ tensorA, tensorB }); std::string shader(R"( #version 450 @@ -43,7 +43,7 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromConstructor) std::shared_ptr tensorA{ new kp::Tensor({ 3, 4, 5 }) }; std::shared_ptr tensorB{ new kp::Tensor({ 0, 0, 0 }) }; - mgr.evalOpDefault({ tensorA, tensorB }); + mgr.rebuild({ tensorA, tensorB }); mgr.evalOpDefault( { tensorA, tensorB }, @@ -65,7 +65,7 @@ TEST(TestOpAlgoBase, ShaderRawDataFromFile) std::shared_ptr tensorA{ new kp::Tensor({ 3, 4, 5 }) }; std::shared_ptr tensorB{ new kp::Tensor({ 0, 0, 0 }) }; - mgr.evalOpDefault({ tensorA, tensorB }); + mgr.rebuild({ tensorA, tensorB }); mgr.evalOpDefault( { tensorA, tensorB }, "test/shaders/glsl/test_op_custom_shader.comp"); @@ -82,7 +82,7 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromFile) std::shared_ptr tensorA{ new kp::Tensor({ 3, 4, 5 }) }; std::shared_ptr tensorB{ new kp::Tensor({ 0, 0, 0 }) }; - mgr.evalOpDefault({ tensorA, tensorB }); + mgr.rebuild({ tensorA, tensorB }); mgr.evalOpDefault( { tensorA, tensorB }, "test/shaders/glsl/test_op_custom_shader.comp.spv"); diff --git a/test/TestOpTensorCopy.cpp b/test/TestOpTensorCopy.cpp index 0e840cad6..3f2bc9500 100644 --- a/test/TestOpTensorCopy.cpp +++ b/test/TestOpTensorCopy.cpp @@ -8,13 +8,13 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensor) kp::Manager mgr; - std::vector testVecA{ 9, 8, 7 }; + std::vector testVecA{ 1, 2, 3 }; std::vector testVecB{ 0, 0, 0 }; std::shared_ptr tensorA{ new kp::Tensor(testVecA) }; std::shared_ptr tensorB{ new kp::Tensor(testVecB) }; - mgr.evalOpDefault({ tensorA, tensorB }); + mgr.rebuild({ tensorA, tensorB }); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -33,7 +33,7 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti) kp::Manager mgr; - std::vector testVecA{ 9, 8, 7 }; + std::vector testVecA{ 2, 3, 4 }; std::vector testVecB{ 0, 0, 0 }; std::vector testVecC{ 0, 0, 0 }; @@ -41,7 +41,7 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti) std::shared_ptr tensorB{ new kp::Tensor(testVecB) }; std::shared_ptr tensorC{ new kp::Tensor(testVecC) }; - mgr.evalOpDefault({ tensorA, tensorB, tensorC }); + mgr.rebuild({ tensorA, tensorB, tensorC }); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -63,14 +63,17 @@ TEST(TestOpTensorCopy, CopyDeviceToHostTensor) kp::Manager mgr; - std::vector testVecA{ 9, 8, 7 }; + std::vector testVecA{ 3, 4, 5 }; std::vector testVecB{ 0, 0, 0 }; std::shared_ptr tensorA{ new kp::Tensor(testVecA) }; std::shared_ptr tensorB{ new kp::Tensor( testVecB, kp::Tensor::TensorTypes::eHost) }; - mgr.evalOpDefault({ tensorA, tensorB }); + mgr.rebuild({ tensorA, tensorB }, false); + + // Only calling sync on device type tensor + mgr.evalOpDefault({ tensorA }); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -89,14 +92,20 @@ TEST(TestOpTensorCopy, CopyHostToDeviceTensor) kp::Manager mgr; - std::vector testVecA{ 9, 8, 7 }; + std::vector testVecA{ 4, 5, 6 }; std::vector testVecB{ 0, 0, 0 }; std::shared_ptr tensorA{ new kp::Tensor( testVecA, kp::Tensor::TensorTypes::eHost) }; std::shared_ptr tensorB{ new kp::Tensor(testVecB) }; - mgr.evalOpDefault({ tensorA, tensorB }); + mgr.rebuild({ tensorA, tensorB }, false); + + // Manually copy data into host memory of Tensor + tensorA->mapDataIntoHostMemory(); + + // Only calling sync on device type tensor + mgr.evalOpDefault({ tensorB }); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -115,7 +124,7 @@ TEST(TestOpTensorCopy, CopyHostToHostTensor) kp::Manager mgr; - std::vector testVecA{ 9, 8, 7 }; + std::vector testVecA{ 5, 6, 7 }; std::vector testVecB{ 0, 0, 0 }; std::shared_ptr tensorA{ new kp::Tensor( @@ -123,7 +132,7 @@ TEST(TestOpTensorCopy, CopyHostToHostTensor) std::shared_ptr tensorB{ new kp::Tensor( testVecB, kp::Tensor::TensorTypes::eHost) }; - mgr.evalOpDefault({ tensorA, tensorB }); + mgr.rebuild({ tensorA, tensorB }); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -142,12 +151,12 @@ TEST(TestOpTensorCopy, SingleTensorShouldFail) kp::Manager mgr; - std::vector testVecA{ 9, 8, 7 }; + std::vector testVecA{ 6, 7, 8 }; std::shared_ptr tensorA{ new kp::Tensor( testVecA, kp::Tensor::TensorTypes::eHost) }; - mgr.evalOpDefault({ tensorA }); + mgr.rebuild({ tensorA }, false); EXPECT_TRUE(tensorA->isInit()); diff --git a/test/TestOpTensorCreate.cpp b/test/TestOpTensorCreate.cpp index f0ba87433..ca3473576 100644 --- a/test/TestOpTensorCreate.cpp +++ b/test/TestOpTensorCreate.cpp @@ -5,20 +5,19 @@ TEST(TestOpTensorCreate, CreateSingleTensorSingleOp) { - - kp::Manager mgr; - std::vector testVecA{ 9, 8, 7 }; - std::shared_ptr tensorA{ new kp::Tensor(testVecA) }; - mgr.evalOpDefault({ tensorA }); + { + kp::Manager mgr; - EXPECT_TRUE(tensorA->isInit()); + mgr.rebuild({ tensorA }); - EXPECT_EQ(tensorA->data(), testVecA); + EXPECT_TRUE(tensorA->isInit()); + + EXPECT_EQ(tensorA->data(), testVecA); + } - tensorA->freeMemoryDestroyGPUResources(); EXPECT_FALSE(tensorA->isInit()); } @@ -33,7 +32,7 @@ TEST(TestOpTensorCreate, CreateMultipleTensorSingleOp) std::shared_ptr tensorA{ new kp::Tensor(testVecA) }; std::shared_ptr tensorB{ new kp::Tensor(testVecB) }; - mgr.evalOpDefault({ tensorA, tensorB }); + mgr.rebuild({ tensorA, tensorB }); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -53,8 +52,8 @@ TEST(TestOpTensorCreate, CreateMultipleTensorMultipleOp) std::shared_ptr tensorA{ new kp::Tensor(testVecA) }; std::shared_ptr tensorB{ new kp::Tensor(testVecB) }; - mgr.evalOpDefault({ tensorA }); - mgr.evalOpDefault({ tensorB }); + mgr.rebuild({ tensorA }); + mgr.rebuild({ tensorB }); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -63,7 +62,7 @@ TEST(TestOpTensorCreate, CreateMultipleTensorMultipleOp) EXPECT_EQ(tensorB->data(), testVecB); } -TEST(TestOpTensorCreate, ManageTensorMemoryWhenOpTensorCreateDestroyed) +TEST(TestOpTensorCreate, TestTensorMemoryManagedByManagerDestroyed) { std::vector testVecA{ 9, 8, 7 }; @@ -74,8 +73,8 @@ TEST(TestOpTensorCreate, ManageTensorMemoryWhenOpTensorCreateDestroyed) { kp::Manager mgr; - mgr.evalOpDefault({ tensorA }); - mgr.evalOpDefault({ tensorB }); + mgr.rebuild({ tensorA }); + mgr.rebuild({ tensorB }); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -88,6 +87,32 @@ TEST(TestOpTensorCreate, ManageTensorMemoryWhenOpTensorCreateDestroyed) EXPECT_FALSE(tensorB->isInit()); } +TEST(TestOpTensorCreate, TestTensorMemoryManagedByManagerNOTDestroyed) +{ + + std::vector testVecA{ 9, 8, 7 }; + std::vector testVecB{ 6, 5, 4 }; + + std::shared_ptr tensorA{ new kp::Tensor(testVecA) }; + std::shared_ptr tensorB{ new kp::Tensor(testVecB) }; + + kp::Manager mgr; + + { + mgr.rebuild({ tensorA }); + mgr.rebuild({ tensorB }); + + EXPECT_TRUE(tensorA->isInit()); + EXPECT_TRUE(tensorB->isInit()); + + EXPECT_EQ(tensorA->data(), testVecA); + EXPECT_EQ(tensorB->data(), testVecB); + } + + EXPECT_TRUE(tensorA->isInit()); + EXPECT_TRUE(tensorB->isInit()); +} + TEST(TestOpTensorCreate, NoErrorIfTensorFreedBefore) { @@ -99,8 +124,8 @@ TEST(TestOpTensorCreate, NoErrorIfTensorFreedBefore) kp::Manager mgr; - mgr.evalOpDefault({ tensorA }); - mgr.evalOpDefault({ tensorB }); + mgr.rebuild({ tensorA }); + mgr.rebuild({ tensorB }); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -123,7 +148,7 @@ TEST(TestOpTensorCreate, ExceptionOnZeroSizeTensor) kp::Manager mgr; try { - mgr.evalOpDefault({ tensorA }); + mgr.rebuild({ tensorA }); } catch (const std::runtime_error& err) { // check exception ASSERT_TRUE(std::string(err.what()).find("zero-sized") != diff --git a/test/TestOpTensorSync.cpp b/test/TestOpTensorSync.cpp index 72f53ac54..f992805f5 100644 --- a/test/TestOpTensorSync.cpp +++ b/test/TestOpTensorSync.cpp @@ -13,7 +13,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemorySingleTensor) std::shared_ptr tensorA{ new kp::Tensor(testVecPreA) }; - mgr.evalOpDefault({ tensorA }); + mgr.rebuild({ tensorA }, false); EXPECT_TRUE(tensorA->isInit()); @@ -37,7 +37,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemoryMultiTensor) std::shared_ptr tensorB{ new kp::Tensor({ 0, 0, 0 }) }; std::shared_ptr tensorC{ new kp::Tensor({ 0, 0, 0 }) }; - mgr.evalOpDefault({ tensorA, tensorB, tensorC }); + mgr.rebuild({ tensorA, tensorB, tensorC }, false); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); diff --git a/test/TestSequence.cpp b/test/TestSequence.cpp index 882729dcf..0dec484b5 100644 --- a/test/TestSequence.cpp +++ b/test/TestSequence.cpp @@ -9,7 +9,7 @@ TEST(TestSequence, CmdBufSequenceBeginEnd) { std::shared_ptr sq = - mgr.getOrCreateManagedSequence("newSequence"); + mgr.sequence("newSequence"); EXPECT_TRUE(sq->eval()); EXPECT_TRUE(!sq->isRecording()); @@ -32,10 +32,11 @@ TEST(TestSequence, SequenceDestructorViaManager) { kp::Manager mgr; - sq = mgr.getOrCreateManagedSequence("newSequence"); + sq = mgr.sequence("newSequence"); EXPECT_TRUE(sq->isInit()); } EXPECT_FALSE(sq->isInit()); } + diff --git a/test/TestTensor.cpp b/test/TestTensor.cpp index 5e54e8585..705c825f7 100644 --- a/test/TestTensor.cpp +++ b/test/TestTensor.cpp @@ -23,11 +23,11 @@ TEST(TestTensor, CopyFromHostData) kp::Manager mgr; - if (std::shared_ptr sq = - mgr.getOrCreateManagedSequence("new")) { - sq->begin(); + mgr.rebuild({ tensorA, tensorB }); - sq->record({ tensorA, tensorB }); + if (std::shared_ptr sq = + mgr.sequence("new")) { + sq->begin(); sq->record({ tensorA, tensorB });