Updating python

Signed-off-by: Alejandro Saucedo <axsauze@gmail.com>
2021-09-12 14:07:20 +01:00 · 2021-09-12 14:07:20 +01:00 · 3d320ff687
commit 3d320ff687
parent 1972f2c8f8
2 changed files with 1 additions and 520 deletions
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@ -1793,523 +1793,3 @@ class OpMult : public OpAlgoDispatch
 };

 } // End namespace kp
-
-// SPDX-License-Identifier: Apache-2.0
-
-namespace kp {
-
-/**
- *  Container of operations that can be sent to GPU as batch
- */
-class Sequence : public std::enable_shared_from_this<Sequence>
-{
-  public:
-    /**
-     * Main constructor for sequence which requires core vulkan components to
-     * generate all dependent resources.
-     *
-     * @param physicalDevice Vulkan physical device
-     * @param device Vulkan logical device
-     * @param computeQueue Vulkan compute queue
-     * @param queueIndex Vulkan compute queue index in device
-     * @param totalTimestamps Maximum number of timestamps to allocate
-     */
-    Sequence(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
-             std::shared_ptr<vk::Device> device,
-             std::shared_ptr<vk::Queue> computeQueue,
-             uint32_t queueIndex,
-             uint32_t totalTimestamps = 0);
-    /**
-     * Destructor for sequence which is responsible for cleaning all subsequent
-     * owned operations.
-     */
-    ~Sequence();
-
-    /**
-     * Record function for operation to be added to the GPU queue in batch. This
-     * template requires classes to be derived from the OpBase class. This
-     * function also requires the Sequence to be recording, otherwise it will
-     * not be able to add the operation.
-     *
-     * @param op Object derived from kp::BaseOp that will be recoreded by the
-     * sequence which will be used when the operation is evaluated.
-     * @return shared_ptr<Sequence> of the Sequence class itself
-     */
-    std::shared_ptr<Sequence> record(std::shared_ptr<OpBase> op);
-
-    /**
-     * Record function for operation to be added to the GPU queue in batch. This
-     * template requires classes to be derived from the OpBase class. This
-     * function also requires the Sequence to be recording, otherwise it will
-     * not be able to add the operation.
-     *
-     * @param tensors Vector of tensors to use for the operation
-     * @param TArgs Template parameters that are used to initialise operation
-     * which allows for extensible configurations on initialisation.
-     * @return shared_ptr<Sequence> of the Sequence class itself
-     */
-    template<typename T, typename... TArgs>
-    std::shared_ptr<Sequence> record(
-      std::vector<std::shared_ptr<Tensor>> tensors,
-      TArgs&&... params)
-    {
-        std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
-        return this->record(op);
-    }
-    /**
-     * Record function for operation to be added to the GPU queue in batch. This
-     * template requires classes to be derived from the OpBase class. This
-     * function also requires the Sequence to be recording, otherwise it will
-     * not be able to add the operation.
-     *
-     * @param algorithm Algorithm to use for the record often used for OpAlgo
-     * operations
-     * @param TArgs Template parameters that are used to initialise operation
-     * which allows for extensible configurations on initialisation.
-     * @return shared_ptr<Sequence> of the Sequence class itself
-     */
-    template<typename T, typename... TArgs>
-    std::shared_ptr<Sequence> record(std::shared_ptr<Algorithm> algorithm,
-                                     TArgs&&... params)
-    {
-        std::shared_ptr<T> op{ new T(algorithm,
-                                     std::forward<TArgs>(params)...) };
-        return this->record(op);
-    }
-
-    /**
-     * Eval sends all the recorded and stored operations in the vector of
-     * operations into the gpu as a submit job synchronously (with a barrier).
-     *
-     * @return shared_ptr<Sequence> of the Sequence class itself
-     */
-    std::shared_ptr<Sequence> eval();
-
-    /**
-     * Resets all the recorded and stored operations, records the operation
-     * provided and submits into the gpu as a submit job synchronously (with a
-     * barrier).
-     *
-     * @return shared_ptr<Sequence> of the Sequence class itself
-     */
-    std::shared_ptr<Sequence> eval(std::shared_ptr<OpBase> op);
-
-    /**
-     * Eval sends all the recorded and stored operations in the vector of
-     * operations into the gpu as a submit job with a barrier.
-     *
-     * @param tensors Vector of tensors to use for the operation
-     * @param TArgs Template parameters that are used to initialise operation
-     * which allows for extensible configurations on initialisation.
-     * @return shared_ptr<Sequence> of the Sequence class itself
-     */
-    template<typename T, typename... TArgs>
-    std::shared_ptr<Sequence> eval(std::vector<std::shared_ptr<Tensor>> tensors,
-                                   TArgs&&... params)
-    {
-        std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
-        return this->eval(op);
-    }
-    /**
-     * Eval sends all the recorded and stored operations in the vector of
-     * operations into the gpu as a submit job with a barrier.
-     *
-     * @param algorithm Algorithm to use for the record often used for OpAlgo
-     * operations
-     * @param TArgs Template parameters that are used to initialise operation
-     * which allows for extensible configurations on initialisation.
-     * @return shared_ptr<Sequence> of the Sequence class itself
-     */
-    template<typename T, typename... TArgs>
-    std::shared_ptr<Sequence> eval(std::shared_ptr<Algorithm> algorithm,
-                                   TArgs&&... params)
-    {
-        std::shared_ptr<T> op{ new T(algorithm,
-                                     std::forward<TArgs>(params)...) };
-        return this->eval(op);
-    }
-
-    /**
-     * Eval Async sends all the recorded and stored operations in the vector of
-     * operations into the gpu as a submit job without a barrier. EvalAwait()
-     * must ALWAYS be called after to ensure the sequence is terminated
-     * correctly.
-     *
-     * @return Boolean stating whether execution was successful.
-     */
-    std::shared_ptr<Sequence> evalAsync();
-    /**
-     * Clears currnet operations to record provided one in the vector of
-     * operations into the gpu as a submit job without a barrier. EvalAwait()
-     * must ALWAYS be called after to ensure the sequence is terminated
-     * correctly.
-     *
-     * @return Boolean stating whether execution was successful.
-     */
-    std::shared_ptr<Sequence> evalAsync(std::shared_ptr<OpBase> op);
-    /**
-     * Eval sends all the recorded and stored operations in the vector of
-     * operations into the gpu as a submit job with a barrier.
-     *
-     * @param tensors Vector of tensors to use for the operation
-     * @param TArgs Template parameters that are used to initialise operation
-     * which allows for extensible configurations on initialisation.
-     * @return shared_ptr<Sequence> of the Sequence class itself
-     */
-    template<typename T, typename... TArgs>
-    std::shared_ptr<Sequence> evalAsync(
-      std::vector<std::shared_ptr<Tensor>> tensors,
-      TArgs&&... params)
-    {
-        std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
-        return this->evalAsync(op);
-    }
-    /**
-     * Eval sends all the recorded and stored operations in the vector of
-     * operations into the gpu as a submit job with a barrier.
-     *
-     * @param algorithm Algorithm to use for the record often used for OpAlgo
-     * operations
-     * @param TArgs Template parameters that are used to initialise operation
-     * which allows for extensible configurations on initialisation.
-     * @return shared_ptr<Sequence> of the Sequence class itself
-     */
-    template<typename T, typename... TArgs>
-    std::shared_ptr<Sequence> evalAsync(std::shared_ptr<Algorithm> algorithm,
-                                        TArgs&&... params)
-    {
-        std::shared_ptr<T> op{ new T(algorithm,
-                                     std::forward<TArgs>(params)...) };
-        return this->evalAsync(op);
-    }
-
-    /**
-     * Eval Await waits for the fence to finish processing and then once it
-     * finishes, it runs the postEval of all operations.
-     *
-     * @param waitFor Number of milliseconds to wait before timing out.
-     * @return shared_ptr<Sequence> of the Sequence class itself
-     */
-    std::shared_ptr<Sequence> evalAwait(uint64_t waitFor = UINT64_MAX);
-
-    /**
-     * Clear function clears all operations currently recorded and starts
-     * recording again.
-     */
-    void clear();
-
-    /**
-     * Return the timestamps that were latched at the beginning and
-     * after each operation during the last eval() call.
-     */
-    std::vector<std::uint64_t> getTimestamps();
-
-    /**
-     * Begins recording commands for commands to be submitted into the command
-     * buffer.
-     *
-     * @return Boolean stating whether execution was successful.
-     */
-    void begin();
-
-    /**
-     * Ends the recording and stops recording commands when the record command
-     * is sent.
-     *
-     * @return Boolean stating whether execution was successful.
-     */
-    void end();
-
-    /**
-     * Returns true if the sequence is currently in recording activated.
-     *
-     * @return Boolean stating if recording ongoing.
-     */
-    bool isRecording();
-
-    /**
-     * Returns true if the sequence has been initialised, and it's based on the
-     * GPU resources being refrenced.
-     *
-     * @return Boolean stating if is initialized
-     */
-    bool isInit();
-
-    /**
-     * Clears command buffer and triggers re-record of all the current
-     * operations saved, which is useful if the underlying kp::Tensors or
-     * kp::Algorithms are modified and need to be re-recorded.
-     */
-    void rerecord();
-
-    /**
-     * Returns true if the sequence is currently running - mostly used for async
-     * workloads.
-     *
-     * @return Boolean stating if currently running.
-     */
-    bool isRunning();
-
-    /**
-     * Destroys and frees the GPU resources which include the buffer and memory
-     * and sets the sequence as init=False.
-     */
-    void destroy();
-
-  private:
-    // -------------- NEVER OWNED RESOURCES
-    std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice = nullptr;
-    std::shared_ptr<vk::Device> mDevice = nullptr;
-    std::shared_ptr<vk::Queue> mComputeQueue = nullptr;
-    uint32_t mQueueIndex = -1;
-
-    // -------------- OPTIONALLY OWNED RESOURCES
-    std::shared_ptr<vk::CommandPool> mCommandPool = nullptr;
-    bool mFreeCommandPool = false;
-    std::shared_ptr<vk::CommandBuffer> mCommandBuffer = nullptr;
-    bool mFreeCommandBuffer = false;
-
-    // -------------- ALWAYS OWNED RESOURCES
-    vk::Fence mFence;
-    std::vector<std::shared_ptr<OpBase>> mOperations;
-    std::shared_ptr<vk::QueryPool> timestampQueryPool = nullptr;
-
-    // State
-    bool mRecording = false;
-    bool mIsRunning = false;
-
-    // Create functions
-    void createCommandPool();
-    void createCommandBuffer();
-    void createTimestampQueryPool(uint32_t totalTimestamps);
-};
-
-} // End namespace kp
-
-// SPDX-License-Identifier: Apache-2.0
-
-#include <set>
-#include <unordered_map>
-
-#define KP_DEFAULT_SESSION "DEFAULT"
-
-namespace kp {
-
-/**
-    Base orchestrator which creates and manages device and child components
-*/
-class Manager
-{
-  public:
-    /**
-        Base constructor and default used which creates the base resources
-       including choosing the device 0 by default.
-    */
-    Manager();
-
-    /**
-     * Similar to base constructor but allows for further configuration to use
-     * when creating the Vulkan resources.
-     *
-     * @param physicalDeviceIndex The index of the physical device to use
-     * @param familyQueueIndices (Optional) List of queue indices to add for
-     * explicit allocation
-     * @param desiredExtensions The desired extensions to load from
-     * physicalDevice
-     */
-    Manager(uint32_t physicalDeviceIndex,
-            const std::vector<uint32_t>& familyQueueIndices = {},
-            const std::vector<std::string>& desiredExtensions = {});
-
-    /**
-     * Manager constructor which allows your own vulkan application to integrate
-     * with the kompute use.
-     *
-     * @param instance Vulkan compute instance to base this application
-     * @param physicalDevice Vulkan physical device to use for application
-     * @param device Vulkan logical device to use for all base resources
-     * @param physicalDeviceIndex Index for vulkan physical device used
-     */
-    Manager(std::shared_ptr<vk::Instance> instance,
-            std::shared_ptr<vk::PhysicalDevice> physicalDevice,
-            std::shared_ptr<vk::Device> device);
-
-    /**
-     * Manager destructor which would ensure all owned resources are destroyed
-     * unless explicitly stated that resources should not be destroyed or freed.
-     */
-    ~Manager();
-
-    /**
-     * Create a managed sequence that will be destroyed by this manager
-     * if it hasn't been destroyed by its reference count going to zero.
-     *
-     * @param queueIndex The queue to use from the available queues
-     * @param nrOfTimestamps The maximum number of timestamps to allocate.
-     * If zero (default), disables latching of timestamps.
-     * @returns Shared pointer with initialised sequence
-     */
-    std::shared_ptr<Sequence> sequence(uint32_t queueIndex = 0,
-                                       uint32_t totalTimestamps = 0);
-
-    /**
-     * Create a managed tensor that will be destroyed by this manager
-     * if it hasn't been destroyed by its reference count going to zero.
-     *
-     * @param data The data to initialize the tensor with
-     * @param tensorType The type of tensor to initialize
-     * @returns Shared pointer with initialised tensor
-     */
-    template<typename T>
-    std::shared_ptr<TensorT<T>> tensorT(
-      const std::vector<T>& data,
-      Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
-    {
-        KP_LOG_DEBUG("Kompute Manager tensor creation triggered");
-
-        std::shared_ptr<TensorT<T>> tensor{ new kp::TensorT<T>(
-          this->mPhysicalDevice, this->mDevice, data, tensorType) };
-
-        if (this->mManageResources) {
-            this->mManagedTensors.push_back(tensor);
-        }
-
-        return tensor;
-    }
-
-    std::shared_ptr<TensorT<float>> tensor(
-      const std::vector<float>& data,
-      Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
-    {
-        return this->tensorT<float>(data, tensorType);
-    }
-
-    std::shared_ptr<Tensor> tensor(
-      void* data,
-      uint32_t elementTotalCount,
-      uint32_t elementMemorySize,
-      const Tensor::TensorDataTypes& dataType,
-      Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
-    {
-        std::shared_ptr<Tensor> tensor{ new kp::Tensor(this->mPhysicalDevice,
-                                                       this->mDevice,
-                                                       data,
-                                                       elementTotalCount,
-                                                       elementMemorySize,
-                                                       dataType,
-                                                       tensorType) };
-
-        if (this->mManageResources) {
-            this->mManagedTensors.push_back(tensor);
-        }
-
-        return tensor;
-    }
-
-    std::shared_ptr<Algorithm> algorithm(
-      const std::vector<std::shared_ptr<Tensor>>& tensors = {},
-      const std::vector<uint32_t>& spirv = {},
-      const Workgroup& workgroup = {},
-      const std::vector<float>& specializationConstants = {},
-      const std::vector<float>& pushConstants = {})
-    {
-        return this->algorithm<>(tensors, spirv, workgroup, specializationConstants, pushConstants);
-    }
-
-    /**
-     * Create a managed algorithm that will be destroyed by this manager
-     * if it hasn't been destroyed by its reference count going to zero.
-     *
-     * @param tensors (optional) The tensors to initialise the algorithm with
-     * @param spirv (optional) The SPIRV bytes for the algorithm to dispatch
-     * @param workgroup (optional) kp::Workgroup for algorithm to use, and
-     * defaults to (tensor[0].size(), 1, 1)
-     * @param specializationConstants (optional) kp::Constant to use for
-     * specialization constants, and defaults to an empty constant
-     * @param pushConstants (optional) kp::Constant to use for push constants,
-     * and defaults to an empty constant
-     * @returns Shared pointer with initialised algorithm
-     */
-    template<typename S = float, typename P = float>
-    std::shared_ptr<Algorithm> algorithm(
-      const std::vector<std::shared_ptr<Tensor>>& tensors,
-      const std::vector<uint32_t>& spirv,
-      const Workgroup& workgroup,
-      const std::vector<S>& specializationConstants,
-      const std::vector<P>& pushConstants)
-    {
-
-        KP_LOG_DEBUG("Kompute Manager algorithm creation triggered");
-
-        std::shared_ptr<Algorithm> algorithm{ new kp::Algorithm(
-          this->mDevice,
-          tensors,
-          spirv,
-          workgroup,
-          specializationConstants,
-          pushConstants) };
-
-        if (this->mManageResources) {
-            this->mManagedAlgorithms.push_back(algorithm);
-        }
-
-        return algorithm;
-    }
-
-    /**
-     * Destroy the GPU resources and all managed resources by manager.
-     **/
-    void destroy();
-    /**
-     * Run a pseudo-garbage collection to release all the managed resources
-     * that have been already freed due to these reaching to zero ref count.
-     **/
-    void clear();
-
-    /**
-     * Information about the current device.
-     *
-     * @return vk::PhysicalDeviceProperties containing information about the device
-     **/
-    vk::PhysicalDeviceProperties getDeviceProperties() const;
-
-    /**
-     * List the devices available in the current vulkan instance.
-     *
-     * @return vector of physical devices containing their respective properties
-     **/
-    std::vector<vk::PhysicalDevice> listDevices() const;
-
-  private:
-    // -------------- OPTIONALLY OWNED RESOURCES
-    std::shared_ptr<vk::Instance> mInstance = nullptr;
-    bool mFreeInstance = false;
-    std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice = nullptr;
-    std::shared_ptr<vk::Device> mDevice = nullptr;
-    bool mFreeDevice = false;
-
-    // -------------- ALWAYS OWNED RESOURCES
-    std::vector<std::weak_ptr<Tensor>> mManagedTensors;
-    std::vector<std::weak_ptr<Sequence>> mManagedSequences;
-    std::vector<std::weak_ptr<Algorithm>> mManagedAlgorithms;
-
-    std::vector<uint32_t> mComputeQueueFamilyIndices;
-    std::vector<std::shared_ptr<vk::Queue>> mComputeQueues;
-
-    bool mManageResources = false;
-
-#if DEBUG
-#ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS
-    vk::DebugReportCallbackEXT mDebugReportCallback;
-    vk::DispatchLoaderDynamic mDebugDispatcher;
-#endif
-#endif
-
-    // Create functions
-    void createInstance();
-    void createDevice(const std::vector<uint32_t>& familyQueueIndices = {},
-                      uint32_t hysicalDeviceIndex = 0,
-                      const std::vector<std::string>& desiredExtensions = {});
-};
-
-} // End namespace kp
--- a/src/OpAlgoDispatch.cpp
+++ b/src/OpAlgoDispatch.cpp
@ -10,6 +10,7 @@ OpAlgoDispatch::~OpAlgoDispatch()
    KP_LOG_DEBUG("Kompute OpAlgoDispatch destructor started");

    if (this->mPushConstantsData) {
+        KP_LOG_DEBUG("Kompute freeing push constants data");
        free(this->mPushConstantsData);
    }
 }