diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index a9860425c..9b032bb4f 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -1358,7 +1358,7 @@ class Manager std::unordered_map>::iterator found = this->mManagedSequences.find(sequenceName); - if (found == this->mManagedSequences.end()) { + if (found != this->mManagedSequences.end()) { std::shared_ptr sq = found->second; SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence BEGIN"); diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index e2709836c..340fd782d 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -130,7 +130,7 @@ class Manager std::unordered_map>::iterator found = this->mManagedSequences.find(sequenceName); - if (found == this->mManagedSequences.end()) { + if (found != this->mManagedSequences.end()) { std::shared_ptr sq = found->second; SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence BEGIN"); diff --git a/test/TestAsyncOperations.cpp b/test/TestAsyncOperations.cpp index 15fd9caef..1c0f74e59 100644 --- a/test/TestAsyncOperations.cpp +++ b/test/TestAsyncOperations.cpp @@ -9,6 +9,8 @@ TEST(TestAsyncOperations, TestManagerAsync) { uint32_t size = 100000; + uint32_t numParallel = 6; + std::string shader(R"( #version 450 @@ -20,91 +22,90 @@ TEST(TestAsyncOperations, TestManagerAsync) void main() { uint index = gl_GlobalInvocationID.x; - for (int i = 0; i < 100000; i++) + for (int i = 0; i < 10000; i++) { pa[index] += 1.0; } pb[index] = pa[index]; + pa[index] = 0; } )"); std::vector data(size, 0.0); - std::vector resultSync(size, 100000); - std::vector resultAsync(size, 100000); - - std::shared_ptr tensorSyncA{ new kp::Tensor(data) }; - std::shared_ptr tensorSyncB{ new kp::Tensor(data) }; - std::shared_ptr tensorSyncC{ new kp::Tensor(data) }; - std::shared_ptr tensorSyncD{ new kp::Tensor(data) }; - std::shared_ptr tensorSyncE{ new kp::Tensor(data) }; - std::shared_ptr tensorSyncF{ new kp::Tensor(data) }; + std::vector resultSync(size, 10000); + std::vector resultAsync(size, 10000); kp::Manager mgr; - mgr.evalOpDefault({ tensorSyncA, tensorSyncB, tensorSyncC, tensorSyncD, tensorSyncE, tensorSyncF }); + std::vector> inputsSyncA; + std::vector> inputsSyncB; + + for (uint32_t i = 0; i < numParallel; i++) { + inputsSyncA.push_back(std::make_shared(kp::Tensor(data))); + inputsSyncB.push_back(std::make_shared(kp::Tensor(data))); + } + + mgr.evalOpDefault(inputsSyncA); + mgr.evalOpDefault(inputsSyncB); auto startSync = std::chrono::high_resolution_clock::now(); - mgr.evalOpDefault>( - { tensorSyncA, tensorSyncB }, std::vector(shader.begin(), shader.end())); + for (uint32_t i = 0; i < numParallel; i++) { + mgr.evalOpDefault>( + { inputsSyncA[i], inputsSyncB[i] }, + std::vector(shader.begin(), shader.end())); - mgr.evalOpDefault>( - { tensorSyncC, tensorSyncD }, std::vector(shader.begin(), shader.end())); + } - mgr.evalOpDefault>( - { tensorSyncE, tensorSyncF }, std::vector(shader.begin(), shader.end())); - - mgr.evalOpDefault({ tensorSyncB, tensorSyncD, tensorSyncF }); + mgr.evalOpDefault(inputsSyncB); auto endSync = std::chrono::high_resolution_clock::now(); auto durationSync = std::chrono::duration_cast(endSync - startSync).count(); - EXPECT_EQ(tensorSyncB->data(), resultSync); - EXPECT_EQ(tensorSyncD->data(), resultSync); - EXPECT_EQ(tensorSyncF->data(), resultSync); + for (uint32_t i = 0; i < numParallel; i++) { + EXPECT_EQ(inputsSyncB[i]->data(), resultSync); + } - //std::shared_ptr tensorAsyncA{ new kp::Tensor(data) }; - //std::shared_ptr tensorAsyncB{ new kp::Tensor(data) }; - //std::shared_ptr tensorAsyncC{ new kp::Tensor(data) }; - //std::shared_ptr tensorAsyncD{ new kp::Tensor(data) }; - //std::shared_ptr tensorAsyncE{ new kp::Tensor(data) }; - //std::shared_ptr tensorAsyncF{ new kp::Tensor(data) }; + kp::Manager mgrAsync(0, numParallel); - //kp::Manager mgrAsync(0, 1); + std::vector> inputsAsyncA; + std::vector> inputsAsyncB; - //mgrAsync.evalOpDefault({ tensorAsyncA, tensorAsyncB, tensorAsyncC, tensorAsyncD, tensorAsyncE, tensorAsyncF }); + for (uint32_t i = 0; i < numParallel; i++) { + inputsAsyncA.push_back(std::make_shared(kp::Tensor(data))); + inputsAsyncB.push_back(std::make_shared(kp::Tensor(data))); + } - //mgrAsync.createManagedSequence("async0", 0); - ////mgrAsync.createManagedSequence("async1", 1); - ////mgrAsync.createManagedSequence("async2", 2); + mgrAsync.evalOpDefault(inputsAsyncA); + mgrAsync.evalOpDefault(inputsAsyncB); - //auto startAsync = std::chrono::high_resolution_clock::now(); + for (uint32_t i = 0; i < numParallel; i++) { + mgrAsync.createManagedSequence("async" + std::to_string(i), i); + } - //mgrAsync.evalOpAsync>( - // { tensorAsyncA, tensorAsyncB }, "async0", std::vector(shader.begin(), shader.end())); + auto startAsync = std::chrono::high_resolution_clock::now(); - ////mgrAsync.evalOpAsync>( - //// { tensorAsyncC, tensorAsyncD }, "async1", std::vector(shader.begin(), shader.end())); + for (uint32_t i = 0; i < numParallel; i++) { + mgrAsync.evalOpAsync>( + { inputsAsyncA[i], inputsAsyncB[i] }, + "async" + std::to_string(i), + std::vector(shader.begin(), shader.end())); + } - ////mgrAsync.evalOpAsync>( - //// { tensorAsyncE, tensorAsyncF }, "async2", std::vector(shader.begin(), shader.end())); + for (uint32_t i = 0; i < numParallel; i++) { + mgrAsync.evalOpAwait("async" + std::to_string(i)); + } - //mgrAsync.evalOpAwait("async0"); - ////mgrAsync.evalOpAwait("async1"); - ////mgrAsync.evalOpAwait("async2"); + mgrAsync.evalOpDefault({ inputsAsyncB }); - //mgrAsync.evalOpDefault({ tensorAsyncB }); - ////mgrAsync.evalOpDefault({ tensorAsyncD }); - ////mgrAsync.evalOpDefault({ tensorAsyncF }); + auto endAsync = std::chrono::high_resolution_clock::now(); + auto durationAsync = std::chrono::duration_cast(endAsync - startAsync).count(); - //auto endAsync = std::chrono::high_resolution_clock::now(); - //auto durationAsync = std::chrono::duration_cast(endAsync - startAsync).count(); + for (uint32_t i = 0; i < numParallel; i++) { + EXPECT_EQ(inputsAsyncB[i]->data(), resultAsync); + } - //EXPECT_EQ(tensorAsyncB->data(), resultAsync); - ////EXPECT_EQ(tensorAsyncD->data(), resultAsync); - ////EXPECT_EQ(tensorAsyncF->data(), resultAsync); - - ////SPDLOG_DEBUG("Total Sync: {}", durationSync); - //SPDLOG_DEBUG("Total Async: {}", durationAsync); + SPDLOG_ERROR("Total Sync: {}", durationSync); + SPDLOG_ERROR("Total Async: {}", durationAsync); }