From 6f5a8f8968c980e76202be62acc4f252639006f0 Mon Sep 17 00:00:00 2001 From: alexander-g <3867427+alexander-g@users.noreply.github.com> Date: Sat, 6 Mar 2021 11:45:29 +0100 Subject: [PATCH 1/3] support for timestamps --- python/src/main.cpp | 3 +- single_include/kompute/Kompute.hpp | 16 +++++++- src/Manager.cpp | 5 ++- src/Sequence.cpp | 63 +++++++++++++++++++++++++++++- src/include/kompute/Manager.hpp | 4 +- src/include/kompute/Sequence.hpp | 13 +++++- 6 files changed, 96 insertions(+), 8 deletions(-) diff --git a/python/src/main.cpp b/python/src/main.cpp index 8aac68c98..9f660618e 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -129,6 +129,7 @@ PYBIND11_MODULE(kp, m) { .def("is_recording", &kp::Sequence::isRecording) .def("is_running", &kp::Sequence::isRunning) .def("is_init", &kp::Sequence::isInit) + .def("get_timestamps", &kp::Sequence::getTimestamps) .def("clear", &kp::Sequence::clear) .def("destroy", &kp::Sequence::destroy); @@ -136,7 +137,7 @@ PYBIND11_MODULE(kp, m) { .def(py::init()) .def(py::init()) .def(py::init&>()) - .def("sequence", &kp::Manager::sequence, py::arg("queueIndex") = 0) + .def("sequence", &kp::Manager::sequence, py::arg("queueIndex") = 0, py::arg("nrOfTimestamps") = 0) .def("tensor", [np](kp::Manager& self, const py::array_t data, kp::Tensor::TensorTypes tensor_type) { diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index 7b67e2024..663d1d6d1 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -1527,11 +1527,13 @@ class Sequence : public std::enable_shared_from_this * @param device Vulkan logical device * @param computeQueue Vulkan compute queue * @param queueIndex Vulkan compute queue index in device + * @param nrOfTimestamps Maximum number of timestamps to allocate */ Sequence(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr computeQueue, - uint32_t queueIndex); + uint32_t queueIndex, + uint32_t nrOfTimestamps = 0); /** * Destructor for sequence which is responsible for cleaning all subsequent * owned operations. @@ -1649,6 +1651,12 @@ class Sequence : public std::enable_shared_from_this */ void clear(); + /** + * Return the timestamps that were latched at the beginning and + * after each operation during the last eval() call. + */ + std::vector getTimestamps(); + /** * Begins recording commands for commands to be submitted into the command * buffer. @@ -1706,6 +1714,7 @@ class Sequence : public std::enable_shared_from_this // -------------- ALWAYS OWNED RESOURCES vk::Fence mFence; std::vector> mOperations; + std::shared_ptr timestampQueryPool = nullptr; // State bool mRecording = false; @@ -1714,6 +1723,7 @@ class Sequence : public std::enable_shared_from_this // Create functions void createCommandPool(); void createCommandBuffer(); + void createTimestampQueryPool(uint32_t); }; } // End namespace kp @@ -1778,9 +1788,11 @@ class Manager * @param sequenceName The name for the named sequence to be retrieved or * created * @param queueIndex The queue to use from the available queues + * @param nrOfTimestamps The maximum number of timestamps to allocate. + * If zero (default), disables latching of timestamps. * @return Shared pointer to the manager owned sequence resource */ - std::shared_ptr sequence(uint32_t queueIndex = 0); + std::shared_ptr sequence(uint32_t queueIndex = 0, uint32_t nrOfTimestamps = 0); /** * Function that simplifies the common workflow of tensor creation and diff --git a/src/Manager.cpp b/src/Manager.cpp index 38f67de0d..a364eb07e 100644 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -377,7 +377,7 @@ Manager::algorithm(const std::vector>& tensors, } std::shared_ptr -Manager::sequence(uint32_t queueIndex) +Manager::sequence(uint32_t queueIndex, uint32_t nrOfTimestamps) { KP_LOG_DEBUG("Kompute Manager sequence() with queueIndex: {}", queueIndex); @@ -385,7 +385,8 @@ Manager::sequence(uint32_t queueIndex) this->mPhysicalDevice, this->mDevice, this->mComputeQueues[queueIndex], - this->mComputeQueueFamilyIndices[queueIndex]) }; + this->mComputeQueueFamilyIndices[queueIndex], + nrOfTimestamps) }; if (this->mManageResources) { this->mManagedSequences.push_back(sq); diff --git a/src/Sequence.cpp b/src/Sequence.cpp index fa715cefc..21cbf5af2 100644 --- a/src/Sequence.cpp +++ b/src/Sequence.cpp @@ -6,7 +6,8 @@ namespace kp { Sequence::Sequence(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr computeQueue, - uint32_t queueIndex) + uint32_t queueIndex, + uint32_t nrOfTimestamps) { KP_LOG_DEBUG("Kompute Sequence Constructor with existing device & queue"); @@ -17,6 +18,8 @@ Sequence::Sequence(std::shared_ptr physicalDevice, this->createCommandPool(); this->createCommandBuffer(); + if(nrOfTimestamps>0) + this->createTimestampQueryPool(nrOfTimestamps+1); //+1 for the first one } Sequence::~Sequence() @@ -44,6 +47,13 @@ Sequence::begin() KP_LOG_INFO("Kompute Sequence command now started recording"); this->mCommandBuffer->begin(vk::CommandBufferBeginInfo()); this->mRecording = true; + + //latch the first timestamp before any commands are submitted + if(this->timestampQueryPool) + this->mCommandBuffer->writeTimestamp( + vk::PipelineStageFlagBits::eAllCommands, + *this->timestampQueryPool, 0 + ); } void @@ -261,6 +271,12 @@ Sequence::record(std::shared_ptr op) this->mOperations.push_back(op); + if(this->timestampQueryPool) + this->mCommandBuffer->writeTimestamp( + vk::PipelineStageFlagBits::eAllCommands, + *this->timestampQueryPool, this->mOperations.size() + ); + return shared_from_this(); } @@ -308,4 +324,49 @@ Sequence::createCommandBuffer() KP_LOG_DEBUG("Kompute Sequence Command Buffer Created"); } +void +Sequence::createTimestampQueryPool(uint32_t query_size) +{ + KP_LOG_DEBUG("Kompute Sequence creating query pool"); + if (!this->mDevice) { + throw std::runtime_error("Kompute Sequence device is null"); + } + if (!this->mPhysicalDevice) { + throw std::runtime_error("Kompute Sequence physical device is null"); + } + + vk::PhysicalDeviceProperties physicalDeviceProperties = + this->mPhysicalDevice->getProperties(); + + if(physicalDeviceProperties.limits.timestampComputeAndGraphics){ + vk::QueryPoolCreateInfo queryPoolInfo; + queryPoolInfo.setQueryCount(query_size); + queryPoolInfo.setQueryType(vk::QueryType::eTimestamp); + this->timestampQueryPool = std::make_shared(this->mDevice->createQueryPool(queryPoolInfo)); + + KP_LOG_DEBUG("Query pool for timestamps created"); + } + else{ + KP_LOG_DEBUG("Device does not support timestamps"); + } +} + +std::vector +Sequence::getTimestamps(){ + if(!this->timestampQueryPool) + throw std::runtime_error("Timestamp latching not enabled"); + + const auto n = this->mOperations.size()+1; + std::vector timestamps(n, 0); + //XXX: the C++ method this->mDevice->getQueryPoolResults does not compile for me + const VkResult result = + vkGetQueryPoolResults(*this->mDevice, *this->timestampQueryPool, + 0, n, timestamps.size()*sizeof(std::uint64_t), timestamps.data(), + sizeof(uint64_t), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); + if(result!=VK_SUCCESS) + throw std::runtime_error("vkGetQueryPoolResults failed"); + + return timestamps; +} + } diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index 61212abf2..214da7839 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -64,9 +64,11 @@ class Manager * @param sequenceName The name for the named sequence to be retrieved or * created * @param queueIndex The queue to use from the available queues + * @param nrOfTimestamps The maximum number of timestamps to allocate. + * If zero (default), disables latching of timestamps. * @return Shared pointer to the manager owned sequence resource */ - std::shared_ptr sequence(uint32_t queueIndex = 0); + std::shared_ptr sequence(uint32_t queueIndex = 0, uint32_t nrOfTimestamps = 0); /** * Function that simplifies the common workflow of tensor creation and diff --git a/src/include/kompute/Sequence.hpp b/src/include/kompute/Sequence.hpp index 5741fb4e6..c25f8a6eb 100644 --- a/src/include/kompute/Sequence.hpp +++ b/src/include/kompute/Sequence.hpp @@ -3,6 +3,7 @@ #include "kompute/Core.hpp" #include "kompute/operations/OpBase.hpp" +#include "kompute/operations/OpAlgoDispatch.hpp" namespace kp { @@ -20,11 +21,13 @@ class Sequence : public std::enable_shared_from_this * @param device Vulkan logical device * @param computeQueue Vulkan compute queue * @param queueIndex Vulkan compute queue index in device + * @param nrOfTimestamps Maximum number of timestamps to allocate */ Sequence(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr computeQueue, - uint32_t queueIndex); + uint32_t queueIndex, + uint32_t nrOfTimestamps = 0); /** * Destructor for sequence which is responsible for cleaning all subsequent * owned operations. @@ -142,6 +145,12 @@ class Sequence : public std::enable_shared_from_this */ void clear(); + /** + * Return the timestamps that were latched at the beginning and + * after each operation during the last eval() call. + */ + std::vector getTimestamps(); + /** * Begins recording commands for commands to be submitted into the command * buffer. @@ -199,6 +208,7 @@ class Sequence : public std::enable_shared_from_this // -------------- ALWAYS OWNED RESOURCES vk::Fence mFence; std::vector> mOperations; + std::shared_ptr timestampQueryPool = nullptr; // State bool mRecording = false; @@ -207,6 +217,7 @@ class Sequence : public std::enable_shared_from_this // Create functions void createCommandPool(); void createCommandBuffer(); + void createTimestampQueryPool(uint32_t); }; } // End namespace kp From 6da6bca339811c53499e637fe7b1aac76b1145d5 Mon Sep 17 00:00:00 2001 From: alexander-g <3867427+alexander-g@users.noreply.github.com> Date: Sun, 7 Mar 2021 11:35:20 +0100 Subject: [PATCH 2/3] requested changes --- python/src/main.cpp | 2 +- single_include/kompute/Kompute.hpp | 126 ++++++++++++++++++++--------- src/Manager.cpp | 4 +- src/Sequence.cpp | 47 ++++++----- src/include/kompute/Sequence.hpp | 6 +- 5 files changed, 123 insertions(+), 62 deletions(-) diff --git a/python/src/main.cpp b/python/src/main.cpp index a6a03562b..7165d41e7 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -140,7 +140,7 @@ PYBIND11_MODULE(kp, m) { py::arg("device") = 0, py::arg("family_queue_indices") = std::vector(), py::arg("desired_extensions") = std::vector()) - .def("sequence", &kp::Manager::sequence, py::arg("queueIndex") = 0, py::arg("nrOfTimestamps") = 0) + .def("sequence", &kp::Manager::sequence, py::arg("queue_index") = 0, py::arg("total_timestamps") = 0) .def("tensor", [np](kp::Manager& self, const py::array_t data, kp::Tensor::TensorTypes tensor_type) { diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index 0ae6cce5a..38213bb6e 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -820,12 +820,14 @@ class Tensor }; /** - * Default constructor with data provided which would be used to create the + * Constructor with data provided which would be used to create the * respective vulkan buffer and memory. * + * @param physicalDevice The physical device to use to fetch properties + * @param device The device to use to create the buffer and memory from * @param data Non-zero-sized vector of data that will be used by the * tensor - * @param tensorType Type for the tensor which is of type TensorTypes + * @param tensorTypes Type for the tensor which is of type TensorTypes */ Tensor(std::shared_ptr physicalDevice, std::shared_ptr device, @@ -839,10 +841,11 @@ class Tensor ~Tensor(); /** - * Initialiser which calls the initialisation for all the respective tensors - * as well as creates the respective staging tensors. The staging tensors - * would only be created for the tensors of type TensorType::eDevice as - * otherwise there is no need to copy from host memory. + * Function to trigger reinitialisation of the tensor buffer and memory with + * new data as well as new potential device type. + * + * @param data Vector of data to use to initialise vector from + * @param tensorType The type to use for the tensor */ void rebuild(const std::vector& data, TensorTypes tensorType = TensorTypes::eDevice); @@ -852,6 +855,11 @@ class Tensor */ void destroy(); + /** + * Check whether tensor is initialized based on the created gpu resources. + * + * @returns Boolean stating whether tensor is initialized + */ bool isInit(); /** @@ -1210,6 +1218,8 @@ class OpBase * The record function is intended to only send a record command or run * commands that are expected to record operations that are to be submitted * as a batch into the GPU. + * + * @param commandBuffer The command buffer to record the command into. */ virtual void record(const vk::CommandBuffer& commandBuffer) = 0; @@ -1220,6 +1230,8 @@ class OpBase * there are situations where eval can be called multiple times, so the * resources that are created should be idempotent in case it's called multiple * times in a row. + * + * @param commandBuffer The command buffer to record the command into. */ virtual void preEval(const vk::CommandBuffer& commandBuffer) = 0; @@ -1230,6 +1242,8 @@ class OpBase * there are situations where eval can be called multiple times, so the * resources that are destroyed should not require a re-init unless explicitly * provided by the user. + * + * @param commandBuffer The command buffer to record the command into. */ virtual void postEval(const vk::CommandBuffer& commandBuffer) = 0; }; @@ -1239,38 +1253,47 @@ class OpBase namespace kp { /** - Operation that copies the data from the first tensor to the rest of the tensors provided, using a record command for all the vectors. This operation does not own/manage the memory of the tensors passed to it. The operation must only receive tensors of type + * Operation that copies the data from the first tensor to the rest of the tensors + * provided, using a record command for all the vectors. This operation does not + * own/manage the memory of the tensors passed to it. The operation must only + * receive tensors of type */ class OpTensorCopy : public OpBase { public: /** - * Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. + * Default constructor with parameters that provides the core vulkan resources + * and the tensors that will be used in the operation. * - * @param physicalDevice Vulkan physical device used to find device queues - * @param device Vulkan logical device for passing to Algorithm - * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that will be used to create in operation. */ OpTensorCopy(const std::vector>& tensors); /** - * Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release. + * Default destructor. This class does not manage memory so it won't be + * expecting the parent to perform a release. */ ~OpTensorCopy() override; /** - * Records the copy commands from the first tensor into all the other tensors provided. Also optionally records a barrier. + * Records the copy commands from the first tensor into all the other + * tensors provided. Also optionally records a barrier. + * + * @param commandBuffer The command buffer to record the command into. */ void record(const vk::CommandBuffer& commandBuffer) override; /** * Does not perform any preEval commands. + * + * @param commandBuffer The command buffer to record the command into. */ virtual void preEval(const vk::CommandBuffer& commandBuffer) override; /** * Copies the local vectors for all the tensors to sync the data with the gpu. + * + * @param commandBuffer The command buffer to record the command into. */ virtual void postEval(const vk::CommandBuffer& commandBuffer) override; @@ -1284,17 +1307,20 @@ class OpTensorCopy : public OpBase namespace kp { /** - Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging. + * Operation that syncs tensor's device by mapping local data into the device memory. + * For TensorTypes::eDevice it will use a record operation for the memory to be syncd + * into GPU memory which means that the operation will be done in sync with GPU commands. + * For TensorTypes::eHost it will only map the data into host memory which will + * happen during preEval before the recorded commands are dispatched. */ class OpTensorSyncDevice : public OpBase { public: /** - * Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensos provided cannot be of type TensorTypes::eStorage. + * Default constructor with parameters that provides the core vulkan resources + * and the tensors that will be used in the operation. The tensos provided cannot + * be of type TensorTypes::eStorage. * - * @param physicalDevice Vulkan physical device used to find device queues - * @param device Vulkan logical device for passing to Algorithm - * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that will be used to create in operation. */ OpTensorSyncDevice(const std::vector>& tensors); @@ -1305,17 +1331,24 @@ class OpTensorSyncDevice : public OpBase ~OpTensorSyncDevice() override; /** - * For device tensors, it records the copy command for the tensor to copy the data from its staging to device memory. + * For device tensors, it records the copy command for the tensor to copy the + * data from its staging to device memory. + * + * @param commandBuffer The command buffer to record the command into. */ void record(const vk::CommandBuffer& commandBuffer) override; /** * Does not perform any preEval commands. + * + * @param commandBuffer The command buffer to record the command into. */ virtual void preEval(const vk::CommandBuffer& commandBuffer) override; /** * Does not perform any postEval commands. + * + * @param commandBuffer The command buffer to record the command into. */ virtual void postEval(const vk::CommandBuffer& commandBuffer) override; @@ -1329,38 +1362,50 @@ class OpTensorSyncDevice : public OpBase namespace kp { /** - Operation that syncs tensor's local memory by mapping device data into the local CPU memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging. + * Operation that syncs tensor's local memory by mapping device data into the + * local CPU memory. For TensorTypes::eDevice it will use a record operation + * for the memory to be syncd into GPU memory which means that the operation + * will be done in sync with GPU commands. For TensorTypes::eHost it will + * only map the data into host memory which will happen during preEval before + * the recorded commands are dispatched. */ class OpTensorSyncLocal : public OpBase { public: /** - * Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensors provided cannot be of type TensorTypes::eStorage. + * Default constructor with parameters that provides the core vulkan resources + * and the tensors that will be used in the operation. The tensors provided + * cannot be of type TensorTypes::eStorage. * - * @param physicalDevice Vulkan physical device used to find device queues - * @param device Vulkan logical device for passing to Algorithm - * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that will be used to create in operation. */ OpTensorSyncLocal(const std::vector>& tensors); /** - * Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release. + * Default destructor. This class does not manage memory so it won't be expecting + * the parent to perform a release. */ ~OpTensorSyncLocal() override; /** - * For device tensors, it records the copy command for the tensor to copy the data from its device to staging memory. + * For device tensors, it records the copy command for the tensor to copy the + * data from its device to staging memory. + * + * @param commandBuffer The command buffer to record the command into. */ void record(const vk::CommandBuffer& commandBuffer) override; /** * Does not perform any preEval commands. + * + * @param commandBuffer The command buffer to record the command into. */ virtual void preEval(const vk::CommandBuffer& commandBuffer) override; /** * For host tensors it performs the map command from the host memory into local memory. + * + * @param commandBuffer The command buffer to record the command into. */ virtual void postEval(const vk::CommandBuffer& commandBuffer) override; @@ -1383,6 +1428,13 @@ class OpAlgoDispatch : public OpBase { public: + /** + * Constructor that stores the algorithm to use as well as the relevant + * push constants to override when recording. + * + * @param algorithm The algorithm object to use for dispatch + * @param pushConstants The push constants to use for override + */ OpAlgoDispatch(const std::shared_ptr& algorithm, const kp::Constants& pushConstants = {}); @@ -1399,18 +1451,22 @@ class OpAlgoDispatch : public OpBase * shader processing to the gpu. This function also records the GPU memory * copy of the output data for the staging buffer so it can be read by the * host. + * + * @param commandBuffer The command buffer to record the command into. */ virtual void record(const vk::CommandBuffer& commandBuffer) override; /** * Does not perform any preEval commands. + * + * @param commandBuffer The command buffer to record the command into. */ virtual void preEval(const vk::CommandBuffer& commandBuffer) override; /** - * Executes after the recorded commands are submitted, and performs a copy - * of the GPU Device memory into the staging buffer so the output data can - * be retrieved. + * Does not perform any postEval commands. + * + * @param commandBuffer The command buffer to record the command into. */ virtual void postEval(const vk::CommandBuffer& commandBuffer) override; @@ -1439,11 +1495,9 @@ class OpMult : public OpAlgoDispatch * requirements for the operations to be able to create and manage their * sub-components. * - * @param physicalDevice Vulkan physical device used to find device queues - * @param device Vulkan logical device for passing to Algorithm - * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation - * @param komputeWorkgroup Optional parameter to specify the layout for processing + * @param algorithm An algorithm that will be overridden with the OpMult + * shader data and the tensors provided which are expected to be 3 */ OpMult(std::vector> tensors, std::shared_ptr algorithm) : OpAlgoDispatch(algorithm) @@ -1489,13 +1543,13 @@ class Sequence : public std::enable_shared_from_this * @param device Vulkan logical device * @param computeQueue Vulkan compute queue * @param queueIndex Vulkan compute queue index in device - * @param nrOfTimestamps Maximum number of timestamps to allocate + * @param totalTimestamps Maximum number of timestamps to allocate */ Sequence(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr computeQueue, uint32_t queueIndex, - uint32_t nrOfTimestamps = 0); + uint32_t totalTimestamps = 0); /** * Destructor for sequence which is responsible for cleaning all subsequent * owned operations. @@ -1754,7 +1808,7 @@ class Sequence : public std::enable_shared_from_this // Create functions void createCommandPool(); void createCommandBuffer(); - void createTimestampQueryPool(uint32_t); + void createTimestampQueryPool(uint32_t totalTimestamps); }; } // End namespace kp diff --git a/src/Manager.cpp b/src/Manager.cpp index 563e102bf..d6743739c 100644 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -431,7 +431,7 @@ Manager::algorithm(const std::vector>& tensors, } std::shared_ptr -Manager::sequence(uint32_t queueIndex, uint32_t nrOfTimestamps) +Manager::sequence(uint32_t queueIndex, uint32_t total_timestamps) { KP_LOG_DEBUG("Kompute Manager sequence() with queueIndex: {}", queueIndex); @@ -440,7 +440,7 @@ Manager::sequence(uint32_t queueIndex, uint32_t nrOfTimestamps) this->mDevice, this->mComputeQueues[queueIndex], this->mComputeQueueFamilyIndices[queueIndex], - nrOfTimestamps) }; + total_timestamps) }; if (this->mManageResources) { this->mManagedSequences.push_back(sq); diff --git a/src/Sequence.cpp b/src/Sequence.cpp index 21cbf5af2..6e379eb92 100644 --- a/src/Sequence.cpp +++ b/src/Sequence.cpp @@ -7,7 +7,7 @@ Sequence::Sequence(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr computeQueue, uint32_t queueIndex, - uint32_t nrOfTimestamps) + uint32_t totalTimestamps) { KP_LOG_DEBUG("Kompute Sequence Constructor with existing device & queue"); @@ -18,8 +18,8 @@ Sequence::Sequence(std::shared_ptr physicalDevice, this->createCommandPool(); this->createCommandBuffer(); - if(nrOfTimestamps>0) - this->createTimestampQueryPool(nrOfTimestamps+1); //+1 for the first one + if(totalTimestamps>0) + this->createTimestampQueryPool(totalTimestamps+1); //+1 for the first one } Sequence::~Sequence() @@ -246,6 +246,16 @@ Sequence::destroy() this->mOperations.clear(); } + if(this->timestampQueryPool){ + KP_LOG_INFO("Destroying QueryPool"); + this->mDevice->destroy( + *this->timestampQueryPool, + (vk::Optional)nullptr); + + this->timestampQueryPool = nullptr; + KP_LOG_DEBUG("Kompute Sequence Destroyed QueryPool"); + } + if (this->mDevice) { this->mDevice = nullptr; } @@ -325,11 +335,11 @@ Sequence::createCommandBuffer() } void -Sequence::createTimestampQueryPool(uint32_t query_size) +Sequence::createTimestampQueryPool(uint32_t totalTimestamps) { KP_LOG_DEBUG("Kompute Sequence creating query pool"); - if (!this->mDevice) { - throw std::runtime_error("Kompute Sequence device is null"); + if (!this->isInit()) { + throw std::runtime_error("createTimestampQueryPool() called on uninitialized Sequence"); } if (!this->mPhysicalDevice) { throw std::runtime_error("Kompute Sequence physical device is null"); @@ -339,32 +349,29 @@ Sequence::createTimestampQueryPool(uint32_t query_size) this->mPhysicalDevice->getProperties(); if(physicalDeviceProperties.limits.timestampComputeAndGraphics){ - vk::QueryPoolCreateInfo queryPoolInfo; - queryPoolInfo.setQueryCount(query_size); - queryPoolInfo.setQueryType(vk::QueryType::eTimestamp); - this->timestampQueryPool = std::make_shared(this->mDevice->createQueryPool(queryPoolInfo)); + vk::QueryPoolCreateInfo queryPoolInfo; + queryPoolInfo.setQueryCount(totalTimestamps); + queryPoolInfo.setQueryType(vk::QueryType::eTimestamp); + this->timestampQueryPool = std::make_shared(this->mDevice->createQueryPool(queryPoolInfo)); - KP_LOG_DEBUG("Query pool for timestamps created"); + KP_LOG_DEBUG("Query pool for timestamps created"); } else{ - KP_LOG_DEBUG("Device does not support timestamps"); + throw std::runtime_error("Device does not support timestamps"); } } std::vector -Sequence::getTimestamps(){ +Sequence::getTimestamps() +{ if(!this->timestampQueryPool) throw std::runtime_error("Timestamp latching not enabled"); const auto n = this->mOperations.size()+1; std::vector timestamps(n, 0); - //XXX: the C++ method this->mDevice->getQueryPoolResults does not compile for me - const VkResult result = - vkGetQueryPoolResults(*this->mDevice, *this->timestampQueryPool, - 0, n, timestamps.size()*sizeof(std::uint64_t), timestamps.data(), - sizeof(uint64_t), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); - if(result!=VK_SUCCESS) - throw std::runtime_error("vkGetQueryPoolResults failed"); + this->mDevice->getQueryPoolResults(*this->timestampQueryPool, + 0, n, timestamps.size()*sizeof(std::uint64_t), timestamps.data(), + sizeof(uint64_t), vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait); return timestamps; } diff --git a/src/include/kompute/Sequence.hpp b/src/include/kompute/Sequence.hpp index 502720f72..d29f6aaf0 100644 --- a/src/include/kompute/Sequence.hpp +++ b/src/include/kompute/Sequence.hpp @@ -21,13 +21,13 @@ class Sequence : public std::enable_shared_from_this * @param device Vulkan logical device * @param computeQueue Vulkan compute queue * @param queueIndex Vulkan compute queue index in device - * @param nrOfTimestamps Maximum number of timestamps to allocate + * @param totalTimestamps Maximum number of timestamps to allocate */ Sequence(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr computeQueue, uint32_t queueIndex, - uint32_t nrOfTimestamps = 0); + uint32_t totalTimestamps = 0); /** * Destructor for sequence which is responsible for cleaning all subsequent * owned operations. @@ -286,7 +286,7 @@ class Sequence : public std::enable_shared_from_this // Create functions void createCommandPool(); void createCommandBuffer(); - void createTimestampQueryPool(uint32_t); + void createTimestampQueryPool(uint32_t totalTimestamps); }; } // End namespace kp From 259d3f1d13604cf639f7ea034b1fa2b69e93623f Mon Sep 17 00:00:00 2001 From: alexander-g <3867427+alexander-g@users.noreply.github.com> Date: Sun, 7 Mar 2021 14:16:50 +0100 Subject: [PATCH 3/3] test case --- Makefile | 2 +- src/Manager.cpp | 4 ++-- src/include/kompute/Manager.hpp | 2 +- test/TestSequence.cpp | 30 ++++++++++++++++++++++++++++++ 4 files changed, 34 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 872209015..9fdcbdcbe 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ VCPKG_WIN_PATH ?= "C:\\Users\\axsau\\Programming\\lib\\vcpkg\\scripts\\buildsyst VCPKG_UNIX_PATH ?= "/c/Users/axsau/Programming/lib/vcpkg/scripts/buildsystems/vcpkg.cmake" # Regext to pass to catch2 to filter tests -FILTER_TESTS ?= "-TestAsyncOperations.TestManagerParallelExecution" +FILTER_TESTS ?= "-TestAsyncOperations.TestManagerParallelExecution:TestSequence.SequenceTimestamps" ifeq ($(OS),Windows_NT) # is Windows_NT on XP, 2000, 7, Vista, 10... CMAKE_BIN ?= "C:\Program Files\CMake\bin\cmake.exe" diff --git a/src/Manager.cpp b/src/Manager.cpp index d6743739c..e3bdbb2d9 100644 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -431,7 +431,7 @@ Manager::algorithm(const std::vector>& tensors, } std::shared_ptr -Manager::sequence(uint32_t queueIndex, uint32_t total_timestamps) +Manager::sequence(uint32_t queueIndex, uint32_t totalTimestamps) { KP_LOG_DEBUG("Kompute Manager sequence() with queueIndex: {}", queueIndex); @@ -440,7 +440,7 @@ Manager::sequence(uint32_t queueIndex, uint32_t total_timestamps) this->mDevice, this->mComputeQueues[queueIndex], this->mComputeQueueFamilyIndices[queueIndex], - total_timestamps) }; + totalTimestamps) }; if (this->mManageResources) { this->mManagedSequences.push_back(sq); diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index db99c6d1e..d9c6ddf3e 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -64,7 +64,7 @@ class Manager * If zero (default), disables latching of timestamps. * @returns Shared pointer with initialised sequence */ - std::shared_ptr sequence(uint32_t queueIndex = 0, uint32_t nrOfTimestamps = 0); + std::shared_ptr sequence(uint32_t queueIndex = 0, uint32_t totalTimestamps = 0); /** * Create a managed tensor that will be destroyed by this manager diff --git a/test/TestSequence.cpp b/test/TestSequence.cpp index 482868a88..b8afd1ad6 100644 --- a/test/TestSequence.cpp +++ b/test/TestSequence.cpp @@ -100,3 +100,33 @@ TEST(TestSequence, RerecordSequence) EXPECT_EQ(tensorB->data(), std::vector({2, 8, 18})); } + + +TEST(TestSequence, SequenceTimestamps) +{ + kp::Manager mgr; + + std::shared_ptr tensorA = mgr.tensor({ 0, 0, 0 }); + + std::string shader(R"( + #version 450 + layout (local_size_x = 1) in; + layout(set = 0, binding = 0) buffer a { float pa[]; }; + void main() { + uint index = gl_GlobalInvocationID.x; + pa[index] = pa[index] + 1; + })"); + + std::vector spirv = kp::Shader::compile_source(shader); + + auto seq = mgr.sequence(0, 100); //100 timestamps + seq->record({ tensorA }) + ->record(mgr.algorithm({ tensorA }, spirv)) + ->record(mgr.algorithm({ tensorA }, spirv)) + ->record(mgr.algorithm({ tensorA }, spirv)) + ->record({ tensorA }) + ->eval(); + const std::vector timestamps = seq->getTimestamps(); + + EXPECT_EQ(timestamps.size(), 6); //1 timestamp at start + 1 after each operation +}