From 6f5a8f8968c980e76202be62acc4f252639006f0 Mon Sep 17 00:00:00 2001 From: alexander-g <3867427+alexander-g@users.noreply.github.com> Date: Sat, 6 Mar 2021 11:45:29 +0100 Subject: [PATCH] support for timestamps --- python/src/main.cpp | 3 +- single_include/kompute/Kompute.hpp | 16 +++++++- src/Manager.cpp | 5 ++- src/Sequence.cpp | 63 +++++++++++++++++++++++++++++- src/include/kompute/Manager.hpp | 4 +- src/include/kompute/Sequence.hpp | 13 +++++- 6 files changed, 96 insertions(+), 8 deletions(-) diff --git a/python/src/main.cpp b/python/src/main.cpp index 8aac68c98..9f660618e 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -129,6 +129,7 @@ PYBIND11_MODULE(kp, m) { .def("is_recording", &kp::Sequence::isRecording) .def("is_running", &kp::Sequence::isRunning) .def("is_init", &kp::Sequence::isInit) + .def("get_timestamps", &kp::Sequence::getTimestamps) .def("clear", &kp::Sequence::clear) .def("destroy", &kp::Sequence::destroy); @@ -136,7 +137,7 @@ PYBIND11_MODULE(kp, m) { .def(py::init()) .def(py::init()) .def(py::init&>()) - .def("sequence", &kp::Manager::sequence, py::arg("queueIndex") = 0) + .def("sequence", &kp::Manager::sequence, py::arg("queueIndex") = 0, py::arg("nrOfTimestamps") = 0) .def("tensor", [np](kp::Manager& self, const py::array_t data, kp::Tensor::TensorTypes tensor_type) { diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index 7b67e2024..663d1d6d1 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -1527,11 +1527,13 @@ class Sequence : public std::enable_shared_from_this * @param device Vulkan logical device * @param computeQueue Vulkan compute queue * @param queueIndex Vulkan compute queue index in device + * @param nrOfTimestamps Maximum number of timestamps to allocate */ Sequence(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr computeQueue, - uint32_t queueIndex); + uint32_t queueIndex, + uint32_t nrOfTimestamps = 0); /** * Destructor for sequence which is responsible for cleaning all subsequent * owned operations. @@ -1649,6 +1651,12 @@ class Sequence : public std::enable_shared_from_this */ void clear(); + /** + * Return the timestamps that were latched at the beginning and + * after each operation during the last eval() call. + */ + std::vector getTimestamps(); + /** * Begins recording commands for commands to be submitted into the command * buffer. @@ -1706,6 +1714,7 @@ class Sequence : public std::enable_shared_from_this // -------------- ALWAYS OWNED RESOURCES vk::Fence mFence; std::vector> mOperations; + std::shared_ptr timestampQueryPool = nullptr; // State bool mRecording = false; @@ -1714,6 +1723,7 @@ class Sequence : public std::enable_shared_from_this // Create functions void createCommandPool(); void createCommandBuffer(); + void createTimestampQueryPool(uint32_t); }; } // End namespace kp @@ -1778,9 +1788,11 @@ class Manager * @param sequenceName The name for the named sequence to be retrieved or * created * @param queueIndex The queue to use from the available queues + * @param nrOfTimestamps The maximum number of timestamps to allocate. + * If zero (default), disables latching of timestamps. * @return Shared pointer to the manager owned sequence resource */ - std::shared_ptr sequence(uint32_t queueIndex = 0); + std::shared_ptr sequence(uint32_t queueIndex = 0, uint32_t nrOfTimestamps = 0); /** * Function that simplifies the common workflow of tensor creation and diff --git a/src/Manager.cpp b/src/Manager.cpp index 38f67de0d..a364eb07e 100644 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -377,7 +377,7 @@ Manager::algorithm(const std::vector>& tensors, } std::shared_ptr -Manager::sequence(uint32_t queueIndex) +Manager::sequence(uint32_t queueIndex, uint32_t nrOfTimestamps) { KP_LOG_DEBUG("Kompute Manager sequence() with queueIndex: {}", queueIndex); @@ -385,7 +385,8 @@ Manager::sequence(uint32_t queueIndex) this->mPhysicalDevice, this->mDevice, this->mComputeQueues[queueIndex], - this->mComputeQueueFamilyIndices[queueIndex]) }; + this->mComputeQueueFamilyIndices[queueIndex], + nrOfTimestamps) }; if (this->mManageResources) { this->mManagedSequences.push_back(sq); diff --git a/src/Sequence.cpp b/src/Sequence.cpp index fa715cefc..21cbf5af2 100644 --- a/src/Sequence.cpp +++ b/src/Sequence.cpp @@ -6,7 +6,8 @@ namespace kp { Sequence::Sequence(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr computeQueue, - uint32_t queueIndex) + uint32_t queueIndex, + uint32_t nrOfTimestamps) { KP_LOG_DEBUG("Kompute Sequence Constructor with existing device & queue"); @@ -17,6 +18,8 @@ Sequence::Sequence(std::shared_ptr physicalDevice, this->createCommandPool(); this->createCommandBuffer(); + if(nrOfTimestamps>0) + this->createTimestampQueryPool(nrOfTimestamps+1); //+1 for the first one } Sequence::~Sequence() @@ -44,6 +47,13 @@ Sequence::begin() KP_LOG_INFO("Kompute Sequence command now started recording"); this->mCommandBuffer->begin(vk::CommandBufferBeginInfo()); this->mRecording = true; + + //latch the first timestamp before any commands are submitted + if(this->timestampQueryPool) + this->mCommandBuffer->writeTimestamp( + vk::PipelineStageFlagBits::eAllCommands, + *this->timestampQueryPool, 0 + ); } void @@ -261,6 +271,12 @@ Sequence::record(std::shared_ptr op) this->mOperations.push_back(op); + if(this->timestampQueryPool) + this->mCommandBuffer->writeTimestamp( + vk::PipelineStageFlagBits::eAllCommands, + *this->timestampQueryPool, this->mOperations.size() + ); + return shared_from_this(); } @@ -308,4 +324,49 @@ Sequence::createCommandBuffer() KP_LOG_DEBUG("Kompute Sequence Command Buffer Created"); } +void +Sequence::createTimestampQueryPool(uint32_t query_size) +{ + KP_LOG_DEBUG("Kompute Sequence creating query pool"); + if (!this->mDevice) { + throw std::runtime_error("Kompute Sequence device is null"); + } + if (!this->mPhysicalDevice) { + throw std::runtime_error("Kompute Sequence physical device is null"); + } + + vk::PhysicalDeviceProperties physicalDeviceProperties = + this->mPhysicalDevice->getProperties(); + + if(physicalDeviceProperties.limits.timestampComputeAndGraphics){ + vk::QueryPoolCreateInfo queryPoolInfo; + queryPoolInfo.setQueryCount(query_size); + queryPoolInfo.setQueryType(vk::QueryType::eTimestamp); + this->timestampQueryPool = std::make_shared(this->mDevice->createQueryPool(queryPoolInfo)); + + KP_LOG_DEBUG("Query pool for timestamps created"); + } + else{ + KP_LOG_DEBUG("Device does not support timestamps"); + } +} + +std::vector +Sequence::getTimestamps(){ + if(!this->timestampQueryPool) + throw std::runtime_error("Timestamp latching not enabled"); + + const auto n = this->mOperations.size()+1; + std::vector timestamps(n, 0); + //XXX: the C++ method this->mDevice->getQueryPoolResults does not compile for me + const VkResult result = + vkGetQueryPoolResults(*this->mDevice, *this->timestampQueryPool, + 0, n, timestamps.size()*sizeof(std::uint64_t), timestamps.data(), + sizeof(uint64_t), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); + if(result!=VK_SUCCESS) + throw std::runtime_error("vkGetQueryPoolResults failed"); + + return timestamps; +} + } diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index 61212abf2..214da7839 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -64,9 +64,11 @@ class Manager * @param sequenceName The name for the named sequence to be retrieved or * created * @param queueIndex The queue to use from the available queues + * @param nrOfTimestamps The maximum number of timestamps to allocate. + * If zero (default), disables latching of timestamps. * @return Shared pointer to the manager owned sequence resource */ - std::shared_ptr sequence(uint32_t queueIndex = 0); + std::shared_ptr sequence(uint32_t queueIndex = 0, uint32_t nrOfTimestamps = 0); /** * Function that simplifies the common workflow of tensor creation and diff --git a/src/include/kompute/Sequence.hpp b/src/include/kompute/Sequence.hpp index 5741fb4e6..c25f8a6eb 100644 --- a/src/include/kompute/Sequence.hpp +++ b/src/include/kompute/Sequence.hpp @@ -3,6 +3,7 @@ #include "kompute/Core.hpp" #include "kompute/operations/OpBase.hpp" +#include "kompute/operations/OpAlgoDispatch.hpp" namespace kp { @@ -20,11 +21,13 @@ class Sequence : public std::enable_shared_from_this * @param device Vulkan logical device * @param computeQueue Vulkan compute queue * @param queueIndex Vulkan compute queue index in device + * @param nrOfTimestamps Maximum number of timestamps to allocate */ Sequence(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr computeQueue, - uint32_t queueIndex); + uint32_t queueIndex, + uint32_t nrOfTimestamps = 0); /** * Destructor for sequence which is responsible for cleaning all subsequent * owned operations. @@ -142,6 +145,12 @@ class Sequence : public std::enable_shared_from_this */ void clear(); + /** + * Return the timestamps that were latched at the beginning and + * after each operation during the last eval() call. + */ + std::vector getTimestamps(); + /** * Begins recording commands for commands to be submitted into the command * buffer. @@ -199,6 +208,7 @@ class Sequence : public std::enable_shared_from_this // -------------- ALWAYS OWNED RESOURCES vk::Fence mFence; std::vector> mOperations; + std::shared_ptr timestampQueryPool = nullptr; // State bool mRecording = false; @@ -207,6 +217,7 @@ class Sequence : public std::enable_shared_from_this // Create functions void createCommandPool(); void createCommandBuffer(); + void createTimestampQueryPool(uint32_t); }; } // End namespace kp