From 3304767f2cf0e1902fe190758e7a95c45d0bc6fd Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Fri, 26 Feb 2021 18:58:19 +0000 Subject: [PATCH] Updated to enable for opmult to work --- single_include/kompute/Kompute.hpp | 4 +- src/Algorithm.cpp | 37 ++++++++----------- src/Manager.cpp | 4 +- src/OpAlgoDispatch.cpp | 6 ++- src/OpTensorSyncDevice.cpp | 2 + src/Sequence.cpp | 4 +- .../kompute/operations/OpAlgoDispatch.hpp | 2 +- src/include/kompute/operations/OpMult.hpp | 2 +- test/TestAsyncOperations.cpp | 9 +++-- test/TestDestroy.cpp | 6 ++- test/TestLogisticRegression.cpp | 12 ++++-- 11 files changed, 49 insertions(+), 39 deletions(-) diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index e60a5bdf0..79bc6e1b4 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -1686,7 +1686,7 @@ class OpAlgoDispatch : public OpBase { public: - OpAlgoDispatch(const std::shared_ptr& algorithm); + OpAlgoDispatch(const std::shared_ptr& algorithm, bool skipAlgoInit = false); /** * Default destructor, which is in charge of destroying the algorithm @@ -1745,7 +1745,7 @@ class OpMult : public OpAlgoDispatch * @param komputeWorkgroup Optional parameter to specify the layout for processing */ OpMult(std::vector> tensors, std::shared_ptr algorithm) - : OpAlgoDispatch(algorithm) + : OpAlgoDispatch(algorithm, true) { KP_LOG_DEBUG("Kompute OpMult constructor with params"); diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp index aee9ddd36..f00bc1090 100644 --- a/src/Algorithm.cpp +++ b/src/Algorithm.cpp @@ -15,7 +15,14 @@ Algorithm::Algorithm( KP_LOG_DEBUG("Kompute Algorithm Constructor with device"); this->mDevice = device; - this->rebuild(tensors, spirv, workgroup, specializationConstants, pushConstants); + + if (tensors.size() && spirv.size()) { + KP_LOG_INFO("Kompute Algorithm initialising with tensor size: {} and spirv size: {}", tensors.size(), spirv.size()); + this->rebuild(tensors, spirv, workgroup, specializationConstants, pushConstants); + } + else { + KP_LOG_INFO("Kompute Algorithm constructor with empty tensors and or spirv so not rebuilding vulkan components"); + } } Algorithm::~Algorithm() @@ -39,10 +46,10 @@ Algorithm::rebuild( this->mSpirv = spirv; this->mSpecializationConstants = specializationConstants; this->mPushConstants = pushConstants; - this->setWorkgroup(workgroup); + this->setWorkgroup(workgroup, this->mTensors.size() ? this->mTensors[0]->size() : 1); // Descriptor pool is created first so if available then destroy all before rebuild - if (this->mFreeDescriptorPool) { + if (this->isInit()) { this->destroy(); } @@ -340,20 +347,6 @@ Algorithm::recordDispatch(std::shared_ptr commandBuffer) { KP_LOG_DEBUG("Kompute Algorithm calling record dispatch"); - if(this->mPipelineCache) { - KP_LOG_WARN("Value valid"); - } - else { - KP_LOG_WARN("NOT Value valid"); - } - - if(this->mPipeline) { - KP_LOG_WARN("Value valid"); - } - else { - KP_LOG_WARN("NOT Value valid"); - } - commandBuffer->bindPipeline(vk::PipelineBindPoint::eCompute, *this->mPipeline); @@ -376,10 +369,7 @@ Algorithm::recordDispatch(std::shared_ptr commandBuffer) void Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize) { - KP_LOG_INFO("Kompute OpAlgoCreate setting dispatch size X: {}, Y: {}, Z: {}", - this->mWorkgroup[0], - this->mWorkgroup[1], - this->mWorkgroup[2]); + KP_LOG_INFO("Kompute OpAlgoCreate setting dispatch size"); // The dispatch size is set up based on either explicitly provided template // parameters or by default it would take the shape and size of the tensors @@ -394,6 +384,11 @@ Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize) { } else { this->mWorkgroup = { minSize, 1, 1 }; } + + KP_LOG_INFO("Kompute OpAlgoCreate set dispatch size X: {}, Y: {}, Z: {}", + this->mWorkgroup[0], + this->mWorkgroup[1], + this->mWorkgroup[2]); } const Workgroup& diff --git a/src/Manager.cpp b/src/Manager.cpp index 833069d9f..9f25e1826 100644 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -33,7 +33,7 @@ Manager::Manager() Manager::Manager(uint32_t physicalDeviceIndex, const std::vector& familyQueueIndices) { - this->mManageResources = false; + this->mManageResources = true; this->createInstance(); this->createDevice(familyQueueIndices, physicalDeviceIndex); @@ -43,7 +43,7 @@ Manager::Manager(std::shared_ptr instance, std::shared_ptr physicalDevice, std::shared_ptr device) { - this->mManageResources = true; + this->mManageResources = false; this->mInstance = instance; this->mPhysicalDevice = physicalDevice; diff --git a/src/OpAlgoDispatch.cpp b/src/OpAlgoDispatch.cpp index a20900189..09050fba0 100644 --- a/src/OpAlgoDispatch.cpp +++ b/src/OpAlgoDispatch.cpp @@ -4,10 +4,14 @@ namespace kp { -OpAlgoDispatch::OpAlgoDispatch(const std::shared_ptr& algorithm) +OpAlgoDispatch::OpAlgoDispatch(const std::shared_ptr& algorithm, bool skipAlgoCheck) { KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor"); + if (!skipAlgoCheck && !algorithm->isInit()) { + throw std::runtime_error("Kompute OpAlgoDispatch constructor with non initialised algorithm"); + } + this->mAlgorithm = algorithm; } diff --git a/src/OpTensorSyncDevice.cpp b/src/OpTensorSyncDevice.cpp index 2cdd4e443..a73224297 100644 --- a/src/OpTensorSyncDevice.cpp +++ b/src/OpTensorSyncDevice.cpp @@ -19,6 +19,8 @@ OpTensorSyncDevice::OpTensorSyncDevice( OpTensorSyncDevice::~OpTensorSyncDevice() { KP_LOG_DEBUG("Kompute OpTensorSyncDevice destructor started"); + + this->mTensors.clear(); } void diff --git a/src/Sequence.cpp b/src/Sequence.cpp index e341e734c..9424e119d 100644 --- a/src/Sequence.cpp +++ b/src/Sequence.cpp @@ -78,8 +78,7 @@ Sequence::eval() std::shared_ptr Sequence::eval(std::shared_ptr op) { this->clear(); - this->record(op); - this->eval(); + return this->record(op)->eval(); } std::shared_ptr @@ -88,6 +87,7 @@ Sequence::evalAsync() if (this->isRecording()) { this->end(); } + if (this->mIsRunning) { throw std::runtime_error("Kompute Sequence evalAsync called when an eval async was " "called without successful wait"); diff --git a/src/include/kompute/operations/OpAlgoDispatch.hpp b/src/include/kompute/operations/OpAlgoDispatch.hpp index 1b5ab1bf0..e61c3166d 100644 --- a/src/include/kompute/operations/OpAlgoDispatch.hpp +++ b/src/include/kompute/operations/OpAlgoDispatch.hpp @@ -17,7 +17,7 @@ class OpAlgoDispatch : public OpBase { public: - OpAlgoDispatch(const std::shared_ptr& algorithm); + OpAlgoDispatch(const std::shared_ptr& algorithm, bool skipAlgoInit = false); /** * Default destructor, which is in charge of destroying the algorithm diff --git a/src/include/kompute/operations/OpMult.hpp b/src/include/kompute/operations/OpMult.hpp index 992b0e8a0..fea38bdee 100644 --- a/src/include/kompute/operations/OpMult.hpp +++ b/src/include/kompute/operations/OpMult.hpp @@ -33,7 +33,7 @@ class OpMult : public OpAlgoDispatch * @param komputeWorkgroup Optional parameter to specify the layout for processing */ OpMult(std::vector> tensors, std::shared_ptr algorithm) - : OpAlgoDispatch(algorithm) + : OpAlgoDispatch(algorithm, true) { KP_LOG_DEBUG("Kompute OpMult constructor with params"); diff --git a/test/TestAsyncOperations.cpp b/test/TestAsyncOperations.cpp index 6e5ba8adf..da9f8d887 100644 --- a/test/TestAsyncOperations.cpp +++ b/test/TestAsyncOperations.cpp @@ -80,8 +80,11 @@ TEST(TestAsyncOperations, TestManagerParallelExecution) std::vector> inputsAsyncB; + std::vector> algosAsync; + for (uint32_t i = 0; i < numParallel; i++) { inputsAsyncB.push_back(mgr.tensor(data)); + algosAsync.push_back(mgr.algorithm({inputsAsyncB[i]}, spirv)); } std::vector> sqs; @@ -93,7 +96,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution) auto startAsync = std::chrono::high_resolution_clock::now(); for (uint32_t i = 0; i < numParallel; i++) { - sqs[i]->evalAsync(algorithms[i]); + sqs[i]->evalAsync(algosAsync[i]); } for (uint32_t i = 0; i < numParallel; i++) { @@ -157,8 +160,8 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution) sq1->eval({ tensorA, tensorB }); - std::shared_ptr algo1 = mgr.algorithm({tensorA}); - std::shared_ptr algo2 = mgr.algorithm({tensorB}); + std::shared_ptr algo1 = mgr.algorithm({tensorA}, spirv); + std::shared_ptr algo2 = mgr.algorithm({tensorB}, spirv); sq1->evalAsync(algo1); sq2->evalAsync(algo2); diff --git a/test/TestDestroy.cpp b/test/TestDestroy.cpp index 43b7c8e1a..cf753267e 100644 --- a/test/TestDestroy.cpp +++ b/test/TestDestroy.cpp @@ -74,7 +74,7 @@ TEST(TestDestroy, TestDestroyTensorVector) mgr.sequence() ->record(algo->getTensors()) ->record(algo) - ->record(algo->getTensors()) + ->record(algo->getTensors()) ->eval(); tensorA->destroy(); @@ -111,12 +111,14 @@ TEST(TestDestroy, TestDestroySequenceSingle) tensorA = mgr.tensor({0, 0, 0}); - mgr.sequence() + sq = mgr.sequence() ->record({tensorA}) ->record(mgr.algorithm({tensorA}, spirv)) ->record({tensorA}) ->eval(); + sq->destroy(); + EXPECT_FALSE(sq->isInit()); } } diff --git a/test/TestLogisticRegression.cpp b/test/TestLogisticRegression.cpp index 16c08afb9..f7ad9eda1 100644 --- a/test/TestLogisticRegression.cpp +++ b/test/TestLogisticRegression.cpp @@ -32,13 +32,15 @@ TEST(TestLogisticRegression, TestMainLogisticRegression) wIn, wOutI, wOutJ, bIn, bOut, lOut }; + mgr.sequence()->eval(params); + std::vector spirv = std::vector( - (uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv, - (uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv + - kp::shader_data::shaders_glsl_logisticregression_comp_spv_len)); + (uint32_t*)kp::shader_data::test_shaders_glsl_test_logistic_regression_comp_spv, + (uint32_t*)(kp::shader_data::test_shaders_glsl_test_logistic_regression_comp_spv + + kp::shader_data::test_shaders_glsl_test_logistic_regression_comp_spv_len)); std::shared_ptr algorithm = - mgr.algorithm(params, spirv, kp::Workgroup(), kp::Constants({5.0})); + mgr.algorithm(params, spirv, kp::Workgroup({5}), kp::Constants({5.0})); std::shared_ptr sq = mgr.sequence() @@ -104,6 +106,8 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy) wIn, wOutI, wOutJ, bIn, bOut, lOut }; + mgr.sequence()->record(params)->eval(); + std::vector spirv = std::vector( (uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv, (uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv +