diff --git a/src/Sequence.cpp b/src/Sequence.cpp index 7f2c63a44..6fd9773e2 100644 --- a/src/Sequence.cpp +++ b/src/Sequence.cpp @@ -118,38 +118,74 @@ Sequence::end() bool Sequence::eval() { - SPDLOG_DEBUG("Kompute sequence compute recording EVAL"); + SPDLOG_DEBUG("Kompute sequence EVAL BEGIN"); - if (this->isRecording()) { - SPDLOG_WARN("Kompute Sequence eval called when still recording"); + bool evalResult = this->evalAsync(); + if (!evalResult) { + SPDLOG_DEBUG("Kompute sequence EVAL FAILURE"); return false; } + evalResult = this->evalAwait(); + + SPDLOG_DEBUG("Kompute sequence EVAL SUCCESS"); + + return evalResult; +} + +bool +Sequence::evalAsync() +{ + if (this->isRecording()) { + SPDLOG_WARN("Kompute Sequence evalAsync called when still recording"); + return false; + } + if (this->mEvalBusy) { + SPDLOG_WARN("Kompute Sequence evalAsync called when an eval async was called without successful wait"); + return false; + } + + this->mEvalBusy = true; + for (size_t i = 0; i < this->mOperations.size(); i++) { this->mOperations[i]->preEval(); } const vk::PipelineStageFlags waitStageMask = - vk::PipelineStageFlagBits::eTransfer; + vk::PipelineStageFlagBits::eTransfer & vk::PipelineStageFlagBits::eComputeShader; vk::SubmitInfo submitInfo( 0, nullptr, &waitStageMask, 1, this->mCommandBuffer.get()); - vk::Fence fence = this->mDevice->createFence(vk::FenceCreateInfo()); + this->mFence = this->mDevice->createFence(vk::FenceCreateInfo()); SPDLOG_DEBUG( "Kompute sequence submitting command buffer into compute queue"); - this->mComputeQueue->submit(1, &submitInfo, fence); - this->mDevice->waitForFences(1, &fence, VK_TRUE, UINT64_MAX); - this->mDevice->destroy(fence); + this->mComputeQueue->submit(1, &submitInfo, this->mFence); +} + +bool +Sequence::evalAwait(uint64_t waitFor) +{ + if (!this->mEvalBusy) { + SPDLOG_WARN("Kompute Sequence evalAwait called without existing eval"); + return false; + } + + vk::Result result = this->mDevice->waitForFences(1, &this->mFence, VK_TRUE, waitFor); + this->mDevice->destroy(this->mFence); + + if (result == vk::Result::eTimeout) { + SPDLOG_WARN("Kompute Sequence evalAwait timed out"); + this->mEvalBusy = false; + return false; + } for (size_t i = 0; i < this->mOperations.size(); i++) { this->mOperations[i]->postEval(); } - SPDLOG_DEBUG("Kompute sequence EVAL success"); - - return true; + this->mEvalBusy = false; } bool diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index e2c7832fd..70eea2e73 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -96,6 +96,63 @@ class Manager SPDLOG_DEBUG("Kompute Manager evalOp running sequence SUCCESS"); } + /** + * Operation that adds extra operations to existing or new created + * sequences. + * + * @param tensors The tensors to be used in the operation recorded + * @param sequenceName The name of the sequence to be retrieved or created + * @param TArgs Template parameters that will be used to initialise + * Operation to allow for extensible configurations on initialisation + */ + template + void evalOpAsync(std::vector> tensors, + std::string sequenceName, + TArgs&&... params) + { + SPDLOG_DEBUG("Kompute Manager evalOp triggered"); + std::weak_ptr sqWeakPtr = + this->getOrCreateManagedSequence(sequenceName); + + if (std::shared_ptr sq = sqWeakPtr.lock()) { + SPDLOG_DEBUG("Kompute Manager evalOp running sequence BEGIN"); + sq->begin(); + + SPDLOG_DEBUG("Kompute Manager evalOp running sequence RECORD"); + sq->record(tensors, std::forward(params)...); + + SPDLOG_DEBUG("Kompute Manager evalOp running sequence END"); + sq->end(); + + SPDLOG_DEBUG("Kompute Manager evalOp running sequence EVAL"); + sq->evalAsync(); + } + SPDLOG_DEBUG("Kompute Manager evalOp running sequence SUCCESS"); + } + + /** + * Operation that adds extra operations to existing or new created + * sequences. + * + * @param tensors The tensors to be used in the operation recorded + * @param sequenceName The name of the sequence to be retrieved or created + * @param TArgs Template parameters that will be used to initialise + * Operation to allow for extensible configurations on initialisation + */ + template + void evalOpAwait(std::string sequenceName, uint64_t waitFor = UINT64_MAX) + { + SPDLOG_DEBUG("Kompute Manager evalOpAwait triggered"); + std::weak_ptr sqWeakPtr = + this->getOrCreateManagedSequence(sequenceName); + + if (std::shared_ptr sq = sqWeakPtr.lock()) { + SPDLOG_DEBUG("Kompute Manager evalOpAwait running sequence EVAL AWAIT"); + sq->evalAwait(waitFor); + } + SPDLOG_DEBUG("Kompute Manager evalOpAwait running sequence SUCCESS"); + } + /** * Operation that adds extra operations to existing or new created * sequences. diff --git a/src/include/kompute/Sequence.hpp b/src/include/kompute/Sequence.hpp index 6e4a4ab4f..749fdbeeb 100644 --- a/src/include/kompute/Sequence.hpp +++ b/src/include/kompute/Sequence.hpp @@ -45,19 +45,42 @@ class Sequence /** * Begins recording commands for commands to be submitted into the command * buffer. + * + * @return Boolean stating whether execution was successful. */ bool begin(); + /** * Ends the recording and stops recording commands when the record command * is sent. + * + * @return Boolean stating whether execution was successful. */ bool end(); + /** * Eval sends all the recorded and stored operations in the vector of * operations into the gpu as a submit job with a barrier. + * + * @return Boolean stating whether execution was successful. */ bool eval(); + /** + * Eval Async sends all the recorded and stored operations in the vector of operations into the gpu as a submit job with a barrier. EvalAwait() must be called after to ensure the sequence is terminated correctly. + * + * @return Boolean stating whether execution was successful. + */ + bool evalAsync(); + + /** + * Eval Await waits for the fence to finish processing and then once it finishes, it runs the postEval of all operations. + * + * @param waitFor Number of milliseconds to wait before timing out. + * @return Boolean stating whether execution was successful. + */ + bool evalAwait(uint64_t waitFor = UINT64_MAX); + /** * Returns true if the sequence is currently in recording activated. * @@ -134,12 +157,14 @@ class Sequence std::shared_ptr mCommandBuffer = nullptr; bool mFreeCommandBuffer = false; - // Base op objects + // -------------- ALWAYS OWNED RESOURCES + vk::Fence mFence; std::vector> mOperations; // State bool mIsInit = false; bool mRecording = false; + bool mEvalBusy = false; // Create functions void createCommandPool();