diff --git a/Makefile b/Makefile
index 872209015..9fdcbdcbe 100644
--- a/Makefile
+++ b/Makefile
@@ -13,7 +13,7 @@ VCPKG_WIN_PATH ?= "C:\\Users\\axsau\\Programming\\lib\\vcpkg\\scripts\\buildsyst
 VCPKG_UNIX_PATH ?= "/c/Users/axsau/Programming/lib/vcpkg/scripts/buildsystems/vcpkg.cmake"
 
 # Regext to pass to catch2 to filter tests
-FILTER_TESTS ?= "-TestAsyncOperations.TestManagerParallelExecution"
+FILTER_TESTS ?= "-TestAsyncOperations.TestManagerParallelExecution:TestSequence.SequenceTimestamps"
 
 ifeq ($(OS),Windows_NT)     # is Windows_NT on XP, 2000, 7, Vista, 10...
 	CMAKE_BIN ?= "C:\Program Files\CMake\bin\cmake.exe"
diff --git a/python/src/main.cpp b/python/src/main.cpp
index f13347aa8..7165d41e7 100644
--- a/python/src/main.cpp
+++ b/python/src/main.cpp
@@ -129,6 +129,7 @@ PYBIND11_MODULE(kp, m) {
         .def("is_recording", &kp::Sequence::isRecording)
         .def("is_running", &kp::Sequence::isRunning)
         .def("is_init", &kp::Sequence::isInit)
+        .def("get_timestamps", &kp::Sequence::getTimestamps)
         .def("clear", &kp::Sequence::clear)
         .def("destroy", &kp::Sequence::destroy);
 
@@ -139,7 +140,7 @@ PYBIND11_MODULE(kp, m) {
                 py::arg("device") = 0,
                 py::arg("family_queue_indices") = std::vector<uint32_t>(),
                 py::arg("desired_extensions") = std::vector<std::string>())
-        .def("sequence", &kp::Manager::sequence, py::arg("queueIndex") = 0)
+        .def("sequence", &kp::Manager::sequence, py::arg("queue_index") = 0, py::arg("total_timestamps") = 0)
         .def("tensor", [np](kp::Manager& self,
                             const py::array_t<float> data,
                             kp::Tensor::TensorTypes tensor_type) {
diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp
index 607928f0c..38213bb6e 100755
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@@ -820,12 +820,14 @@ class Tensor
     };
 
     /**
-     *  Default constructor with data provided which would be used to create the
+     *  Constructor with data provided which would be used to create the
      * respective vulkan buffer and memory.
      *
+     *  @param physicalDevice The physical device to use to fetch properties
+     *  @param device The device to use to create the buffer and memory from
      *  @param data Non-zero-sized vector of data that will be used by the
      * tensor
-     *  @param tensorType Type for the tensor which is of type TensorTypes
+     *  @param tensorTypes Type for the tensor which is of type TensorTypes
      */
     Tensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
            std::shared_ptr<vk::Device> device,
@@ -839,10 +841,11 @@ class Tensor
     ~Tensor();
 
     /**
-     * Initialiser which calls the initialisation for all the respective tensors
-     * as well as creates the respective staging tensors. The staging tensors
-     * would only be created for the tensors of type TensorType::eDevice as
-     * otherwise there is no need to copy from host memory.
+     * Function to trigger reinitialisation of the tensor buffer and memory with
+     * new data as well as new potential device type.
+     *
+     * @param data Vector of data to use to initialise vector from
+     * @param tensorType The type to use for the tensor
      */
     void rebuild(const std::vector<float>& data,
                  TensorTypes tensorType = TensorTypes::eDevice);
@@ -852,6 +855,11 @@ class Tensor
      */
     void destroy();
 
+    /**
+     * Check whether tensor is initialized based on the created gpu resources.
+     *
+     * @returns Boolean stating whether tensor is initialized
+     */
     bool isInit();
 
     /**
@@ -1210,6 +1218,8 @@ class OpBase
      * The record function is intended to only send a record command or run
      * commands that are expected to record operations that are to be submitted
      * as a batch into the GPU.
+     *
+     * @param commandBuffer The command buffer to record the command into.
      */
     virtual void record(const vk::CommandBuffer& commandBuffer) = 0;
 
@@ -1220,6 +1230,8 @@ class OpBase
      * there are situations where eval can be called multiple times, so the 
      * resources that are created should be idempotent in case it's called multiple
      * times in a row.
+     *
+     * @param commandBuffer The command buffer to record the command into.
      */
     virtual void preEval(const vk::CommandBuffer& commandBuffer) = 0;
 
@@ -1230,6 +1242,8 @@ class OpBase
      * there are situations where eval can be called multiple times, so the 
      * resources that are destroyed should not require a re-init unless explicitly
      * provided by the user.
+     *
+     * @param commandBuffer The command buffer to record the command into.
      */
     virtual void postEval(const vk::CommandBuffer& commandBuffer) = 0;
 };
@@ -1239,38 +1253,47 @@ class OpBase
 namespace kp {
 
 /**
-    Operation that copies the data from the first tensor to the rest of the tensors provided, using a record command for all the vectors. This operation does not own/manage the memory of the tensors passed to it. The operation must only receive tensors of type 
+ * Operation that copies the data from the first tensor to the rest of the tensors 
+ * provided, using a record command for all the vectors. This operation does not 
+ * own/manage the memory of the tensors passed to it. The operation must only 
+ * receive tensors of type 
 */
 class OpTensorCopy : public OpBase
 {
   public:
     /**
-     * Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation.
+     * Default constructor with parameters that provides the core vulkan resources 
+     * and the tensors that will be used in the operation.
      *
-     * @param physicalDevice Vulkan physical device used to find device queues
-     * @param device Vulkan logical device for passing to Algorithm
-     * @param commandBuffer Vulkan Command Buffer to record commands into
      * @param tensors Tensors that will be used to create in operation.
      */
     OpTensorCopy(const std::vector<std::shared_ptr<Tensor>>& tensors);
 
     /**
-     * Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
+     * Default destructor. This class does not manage memory so it won't be 
+     * expecting the parent to perform a release.
      */
     ~OpTensorCopy() override;
 
     /**
-     * Records the copy commands from the first tensor into all the other tensors provided. Also optionally records a barrier.
+     * Records the copy commands from the first tensor into all the other 
+     * tensors provided. Also optionally records a barrier.
+     *
+     * @param commandBuffer The command buffer to record the command into.
      */
     void record(const vk::CommandBuffer& commandBuffer) override;
 
     /**
      * Does not perform any preEval commands.
+     *
+     * @param commandBuffer The command buffer to record the command into.
      */
     virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
 
     /**
      * Copies the local vectors for all the tensors to sync the data with the gpu.
+     *
+     * @param commandBuffer The command buffer to record the command into.
      */
     virtual void postEval(const vk::CommandBuffer& commandBuffer) override;
 
@@ -1284,17 +1307,20 @@ class OpTensorCopy : public OpBase
 namespace kp {
 
 /**
-    Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging.
+ * Operation that syncs tensor's device by mapping local data into the device memory. 
+ * For TensorTypes::eDevice it will use a record operation for the memory to be syncd 
+ * into GPU memory which means that the operation will be done in sync with GPU commands. 
+ * For TensorTypes::eHost it will only map the data into host memory which will 
+ * happen during preEval before the recorded commands are dispatched.
 */
 class OpTensorSyncDevice : public OpBase
 {
   public:
     /**
-     * Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensos provided cannot be of type TensorTypes::eStorage.
+     * Default constructor with parameters that provides the core vulkan resources 
+     * and the tensors that will be used in the operation. The tensos provided cannot 
+     * be of type TensorTypes::eStorage.
      *
-     * @param physicalDevice Vulkan physical device used to find device queues
-     * @param device Vulkan logical device for passing to Algorithm
-     * @param commandBuffer Vulkan Command Buffer to record commands into
      * @param tensors Tensors that will be used to create in operation.
      */
     OpTensorSyncDevice(const std::vector<std::shared_ptr<Tensor>>& tensors);
@@ -1305,17 +1331,24 @@ class OpTensorSyncDevice : public OpBase
     ~OpTensorSyncDevice() override;
 
     /**
-     * For device tensors, it records the copy command for the tensor to copy the data from its staging to device memory.
+     * For device tensors, it records the copy command for the tensor to copy the 
+     * data from its staging to device memory.
+     *
+     * @param commandBuffer The command buffer to record the command into.
      */
     void record(const vk::CommandBuffer& commandBuffer) override;
 
     /**
      * Does not perform any preEval commands.
+     *
+     * @param commandBuffer The command buffer to record the command into.
      */
     virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
 
     /**
      * Does not perform any postEval commands.
+     *
+     * @param commandBuffer The command buffer to record the command into.
      */
     virtual void postEval(const vk::CommandBuffer& commandBuffer) override;
 
@@ -1329,38 +1362,50 @@ class OpTensorSyncDevice : public OpBase
 namespace kp {
 
 /**
-    Operation that syncs tensor's local memory by mapping device data into the local CPU memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging.
+ * Operation that syncs tensor's local memory by mapping device data into the 
+ * local CPU memory. For TensorTypes::eDevice it will use a record operation 
+ * for the memory to be syncd into GPU memory which means that the operation 
+ * will be done in sync with GPU commands. For TensorTypes::eHost it will 
+ * only map the data into host memory which will happen during preEval before 
+ * the recorded commands are dispatched.
 */
 class OpTensorSyncLocal : public OpBase
 {
   public:
     /**
-     * Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensors provided cannot be of type TensorTypes::eStorage.
+     * Default constructor with parameters that provides the core vulkan resources 
+     * and the tensors that will be used in the operation. The tensors provided 
+     * cannot be of type TensorTypes::eStorage.
      *
-     * @param physicalDevice Vulkan physical device used to find device queues
-     * @param device Vulkan logical device for passing to Algorithm
-     * @param commandBuffer Vulkan Command Buffer to record commands into
      * @param tensors Tensors that will be used to create in operation.
      */
     OpTensorSyncLocal(const std::vector<std::shared_ptr<Tensor>>& tensors);
 
     /**
-     * Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
+     * Default destructor. This class does not manage memory so it won't be expecting 
+     * the parent to perform a release.
      */
     ~OpTensorSyncLocal() override;
 
     /**
-     * For device tensors, it records the copy command for the tensor to copy the data from its device to staging memory.
+     * For device tensors, it records the copy command for the tensor to copy the 
+     * data from its device to staging memory.
+     *
+     * @param commandBuffer The command buffer to record the command into.
      */
     void record(const vk::CommandBuffer& commandBuffer) override;
 
     /**
      * Does not perform any preEval commands.
+     *
+     * @param commandBuffer The command buffer to record the command into.
      */
     virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
 
     /**
      * For host tensors it performs the map command from the host memory into local memory.
+     *
+     * @param commandBuffer The command buffer to record the command into.
      */
     virtual void postEval(const vk::CommandBuffer& commandBuffer) override;
 
@@ -1383,6 +1428,13 @@ class OpAlgoDispatch : public OpBase
 {
   public:
 
+    /**
+     * Constructor that stores the algorithm to use as well as the relevant
+     * push constants to override when recording.
+     *
+     * @param algorithm The algorithm object to use for dispatch
+     * @param pushConstants The push constants to use for override
+     */
     OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm,
             const kp::Constants& pushConstants = {});
 
@@ -1399,18 +1451,22 @@ class OpAlgoDispatch : public OpBase
      * shader processing to the gpu. This function also records the GPU memory
      * copy of the output data for the staging buffer so it can be read by the
      * host.
+     *
+     * @param commandBuffer The command buffer to record the command into.
      */
     virtual void record(const vk::CommandBuffer& commandBuffer) override;
 
     /**
      * Does not perform any preEval commands.
+     *
+     * @param commandBuffer The command buffer to record the command into.
      */
     virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
 
     /**
-     * Executes after the recorded commands are submitted, and performs a copy
-     * of the GPU Device memory into the staging buffer so the output data can
-     * be retrieved.
+     * Does not perform any postEval commands.
+     *
+     * @param commandBuffer The command buffer to record the command into.
      */
     virtual void postEval(const vk::CommandBuffer& commandBuffer) override;
 
@@ -1439,11 +1495,9 @@ class OpMult : public OpAlgoDispatch
      * requirements for the operations to be able to create and manage their
      * sub-components.
      *
-     * @param physicalDevice Vulkan physical device used to find device queues
-     * @param device Vulkan logical device for passing to Algorithm
-     * @param commandBuffer Vulkan Command Buffer to record commands into
      * @param tensors Tensors that are to be used in this operation
-     * @param komputeWorkgroup Optional parameter to specify the layout for processing
+     * @param algorithm An algorithm that will be overridden with the OpMult
+     * shader data and the tensors provided which are expected to be 3
      */
     OpMult(std::vector<std::shared_ptr<Tensor>> tensors, std::shared_ptr<Algorithm> algorithm)
         : OpAlgoDispatch(algorithm)
@@ -1489,11 +1543,13 @@ class Sequence : public std::enable_shared_from_this<Sequence>
      * @param device Vulkan logical device
      * @param computeQueue Vulkan compute queue
      * @param queueIndex Vulkan compute queue index in device
+     * @param totalTimestamps Maximum number of timestamps to allocate
      */
     Sequence(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
              std::shared_ptr<vk::Device> device,
              std::shared_ptr<vk::Queue> computeQueue,
-             uint32_t queueIndex);
+             uint32_t queueIndex,
+             uint32_t totalTimestamps = 0);
     /**
      * Destructor for sequence which is responsible for cleaning all subsequent
      * owned operations.
@@ -1669,6 +1725,12 @@ class Sequence : public std::enable_shared_from_this<Sequence>
      */
     void clear();
 
+    /**
+     * Return the timestamps that were latched at the beginning and
+     * after each operation during the last eval() call.
+     */
+    std::vector<std::uint64_t> getTimestamps();
+
     /**
      * Begins recording commands for commands to be submitted into the command
      * buffer.
@@ -1737,6 +1799,7 @@ class Sequence : public std::enable_shared_from_this<Sequence>
     // -------------- ALWAYS OWNED RESOURCES
     vk::Fence mFence;
     std::vector<std::shared_ptr<OpBase>> mOperations;
+    std::shared_ptr<vk::QueryPool> timestampQueryPool = nullptr;
 
     // State
     bool mRecording = false;
@@ -1745,6 +1808,7 @@ class Sequence : public std::enable_shared_from_this<Sequence>
     // Create functions
     void createCommandPool();
     void createCommandBuffer();
+    void createTimestampQueryPool(uint32_t totalTimestamps);
 };
 
 } // End namespace kp
@@ -1805,9 +1869,11 @@ class Manager
      * if it hasn't been destroyed by its reference count going to zero.
      *
      * @param queueIndex The queue to use from the available queues
+     * @param nrOfTimestamps The maximum number of timestamps to allocate.
+     * If zero (default), disables latching of timestamps.
      * @returns Shared pointer with initialised sequence
      */
-    std::shared_ptr<Sequence> sequence(uint32_t queueIndex = 0);
+    std::shared_ptr<Sequence> sequence(uint32_t queueIndex = 0, uint32_t nrOfTimestamps = 0);
 
     /**
      * Create a managed tensor that will be destroyed by this manager
diff --git a/src/Manager.cpp b/src/Manager.cpp
index 83676f9ec..e3bdbb2d9 100644
--- a/src/Manager.cpp
+++ b/src/Manager.cpp
@@ -431,7 +431,7 @@ Manager::algorithm(const std::vector<std::shared_ptr<Tensor>>& tensors,
 }
 
 std::shared_ptr<Sequence>
-Manager::sequence(uint32_t queueIndex)
+Manager::sequence(uint32_t queueIndex, uint32_t totalTimestamps)
 {
     KP_LOG_DEBUG("Kompute Manager sequence() with queueIndex: {}", queueIndex);
 
@@ -439,7 +439,8 @@ Manager::sequence(uint32_t queueIndex)
       this->mPhysicalDevice,
       this->mDevice,
       this->mComputeQueues[queueIndex],
-      this->mComputeQueueFamilyIndices[queueIndex]) };
+      this->mComputeQueueFamilyIndices[queueIndex],
+      totalTimestamps) };
 
     if (this->mManageResources) {
         this->mManagedSequences.push_back(sq);
diff --git a/src/Sequence.cpp b/src/Sequence.cpp
index fa715cefc..6e379eb92 100644
--- a/src/Sequence.cpp
+++ b/src/Sequence.cpp
@@ -6,7 +6,8 @@ namespace kp {
 Sequence::Sequence(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
                    std::shared_ptr<vk::Device> device,
                    std::shared_ptr<vk::Queue> computeQueue,
-                   uint32_t queueIndex)
+                   uint32_t queueIndex,
+                   uint32_t totalTimestamps)
 {
     KP_LOG_DEBUG("Kompute Sequence Constructor with existing device & queue");
 
@@ -17,6 +18,8 @@ Sequence::Sequence(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
 
     this->createCommandPool();
     this->createCommandBuffer();
+    if(totalTimestamps>0)
+        this->createTimestampQueryPool(totalTimestamps+1); //+1 for the first one
 }
 
 Sequence::~Sequence()
@@ -44,6 +47,13 @@ Sequence::begin()
     KP_LOG_INFO("Kompute Sequence command now started recording");
     this->mCommandBuffer->begin(vk::CommandBufferBeginInfo());
     this->mRecording = true;
+
+    //latch the first timestamp before any commands are submitted
+    if(this->timestampQueryPool)
+        this->mCommandBuffer->writeTimestamp(
+            vk::PipelineStageFlagBits::eAllCommands,
+            *this->timestampQueryPool, 0
+        );
 }
 
 void
@@ -236,6 +246,16 @@ Sequence::destroy()
         this->mOperations.clear();
     }
 
+    if(this->timestampQueryPool){
+        KP_LOG_INFO("Destroying QueryPool");
+        this->mDevice->destroy(
+            *this->timestampQueryPool,
+            (vk::Optional<const vk::AllocationCallbacks>)nullptr);
+        
+        this->timestampQueryPool = nullptr;
+        KP_LOG_DEBUG("Kompute Sequence Destroyed QueryPool");
+    }
+
     if (this->mDevice) {
         this->mDevice = nullptr;
     }
@@ -261,6 +281,12 @@ Sequence::record(std::shared_ptr<OpBase> op)
 
     this->mOperations.push_back(op);
 
+    if(this->timestampQueryPool)
+      this->mCommandBuffer->writeTimestamp(
+                vk::PipelineStageFlagBits::eAllCommands,
+                *this->timestampQueryPool, this->mOperations.size()
+        );
+    
     return shared_from_this();
 }
 
@@ -308,4 +334,46 @@ Sequence::createCommandBuffer()
     KP_LOG_DEBUG("Kompute Sequence Command Buffer Created");
 }
 
+void
+Sequence::createTimestampQueryPool(uint32_t totalTimestamps)
+{
+    KP_LOG_DEBUG("Kompute Sequence creating query pool");
+    if (!this->isInit()) {
+        throw std::runtime_error("createTimestampQueryPool() called on uninitialized Sequence");
+    }
+    if (!this->mPhysicalDevice) {
+        throw std::runtime_error("Kompute Sequence physical device is null");
+    }
+
+    vk::PhysicalDeviceProperties physicalDeviceProperties =
+      this->mPhysicalDevice->getProperties();
+    
+    if(physicalDeviceProperties.limits.timestampComputeAndGraphics){
+        vk::QueryPoolCreateInfo queryPoolInfo;
+        queryPoolInfo.setQueryCount(totalTimestamps);
+        queryPoolInfo.setQueryType(vk::QueryType::eTimestamp);
+        this->timestampQueryPool = std::make_shared<vk::QueryPool>(this->mDevice->createQueryPool(queryPoolInfo));
+
+        KP_LOG_DEBUG("Query pool for timestamps created");
+    }
+    else{
+        throw std::runtime_error("Device does not support timestamps");
+    }
+}
+
+std::vector<std::uint64_t>
+Sequence::getTimestamps()
+{
+    if(!this->timestampQueryPool)
+        throw std::runtime_error("Timestamp latching not enabled");
+    
+    const auto n = this->mOperations.size()+1;
+    std::vector<std::uint64_t> timestamps(n, 0);
+    this->mDevice->getQueryPoolResults(*this->timestampQueryPool, 
+                                       0, n, timestamps.size()*sizeof(std::uint64_t), timestamps.data(),
+                                       sizeof(uint64_t), vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait);
+
+    return timestamps;
+}
+
 }
diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp
index 957e45d2e..d9c6ddf3e 100644
--- a/src/include/kompute/Manager.hpp
+++ b/src/include/kompute/Manager.hpp
@@ -60,9 +60,11 @@ class Manager
      * if it hasn't been destroyed by its reference count going to zero.
      *
      * @param queueIndex The queue to use from the available queues
+     * @param nrOfTimestamps The maximum number of timestamps to allocate.
+     * If zero (default), disables latching of timestamps.
      * @returns Shared pointer with initialised sequence
      */
-    std::shared_ptr<Sequence> sequence(uint32_t queueIndex = 0);
+    std::shared_ptr<Sequence> sequence(uint32_t queueIndex = 0, uint32_t totalTimestamps = 0);
 
     /**
      * Create a managed tensor that will be destroyed by this manager
diff --git a/src/include/kompute/Sequence.hpp b/src/include/kompute/Sequence.hpp
index 10aa80148..d29f6aaf0 100644
--- a/src/include/kompute/Sequence.hpp
+++ b/src/include/kompute/Sequence.hpp
@@ -3,6 +3,7 @@
 #include "kompute/Core.hpp"
 
 #include "kompute/operations/OpBase.hpp"
+#include "kompute/operations/OpAlgoDispatch.hpp"
 
 namespace kp {
 
@@ -20,11 +21,13 @@ class Sequence : public std::enable_shared_from_this<Sequence>
      * @param device Vulkan logical device
      * @param computeQueue Vulkan compute queue
      * @param queueIndex Vulkan compute queue index in device
+     * @param totalTimestamps Maximum number of timestamps to allocate
      */
     Sequence(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
              std::shared_ptr<vk::Device> device,
              std::shared_ptr<vk::Queue> computeQueue,
-             uint32_t queueIndex);
+             uint32_t queueIndex,
+             uint32_t totalTimestamps = 0);
     /**
      * Destructor for sequence which is responsible for cleaning all subsequent
      * owned operations.
@@ -200,6 +203,12 @@ class Sequence : public std::enable_shared_from_this<Sequence>
      */
     void clear();
 
+    /**
+     * Return the timestamps that were latched at the beginning and
+     * after each operation during the last eval() call.
+     */
+    std::vector<std::uint64_t> getTimestamps();
+
     /**
      * Begins recording commands for commands to be submitted into the command
      * buffer.
@@ -268,6 +277,7 @@ class Sequence : public std::enable_shared_from_this<Sequence>
     // -------------- ALWAYS OWNED RESOURCES
     vk::Fence mFence;
     std::vector<std::shared_ptr<OpBase>> mOperations;
+    std::shared_ptr<vk::QueryPool> timestampQueryPool = nullptr;
 
     // State
     bool mRecording = false;
@@ -276,6 +286,7 @@ class Sequence : public std::enable_shared_from_this<Sequence>
     // Create functions
     void createCommandPool();
     void createCommandBuffer();
+    void createTimestampQueryPool(uint32_t totalTimestamps);
 };
 
 } // End namespace kp
diff --git a/test/TestSequence.cpp b/test/TestSequence.cpp
index 482868a88..b8afd1ad6 100644
--- a/test/TestSequence.cpp
+++ b/test/TestSequence.cpp
@@ -100,3 +100,33 @@ TEST(TestSequence, RerecordSequence)
 
     EXPECT_EQ(tensorB->data(), std::vector<float>({2, 8, 18}));
 }
+
+
+TEST(TestSequence, SequenceTimestamps)
+{
+    kp::Manager mgr;
+
+    std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 0, 0 });
+
+    std::string shader(R"(
+      #version 450
+      layout (local_size_x = 1) in;
+      layout(set = 0, binding = 0) buffer a { float pa[]; };
+      void main() {
+          uint index = gl_GlobalInvocationID.x;
+          pa[index] = pa[index] + 1;
+      })");
+
+    std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
+    
+    auto seq = mgr.sequence(0, 100); //100 timestamps
+    seq->record<kp::OpTensorSyncDevice>({ tensorA })
+        ->record<kp::OpAlgoDispatch>(mgr.algorithm({ tensorA }, spirv))
+        ->record<kp::OpAlgoDispatch>(mgr.algorithm({ tensorA }, spirv))
+        ->record<kp::OpAlgoDispatch>(mgr.algorithm({ tensorA }, spirv))
+        ->record<kp::OpTensorSyncLocal>({ tensorA })
+        ->eval();
+    const std::vector<uint64_t> timestamps = seq->getTimestamps();
+    
+    EXPECT_EQ(timestamps.size(), 6); //1 timestamp at start + 1 after each operation
+}