Updated docstrings

2020-08-28 16:13:48 +01:00 · 2020-08-28 16:13:48 +01:00 · cb0d7f7cf3
commit cb0d7f7cf3
parent b91c392f5e
7 changed files with 135 additions and 26 deletions
--- a/docs/CMakeLists.txt
+++ b/docs/CMakeLists.txt
@ -55,3 +55,13 @@ add_custom_target(gensphinx ALL
    DEPENDS ${DOXYGEN_INDEX_FILE}
    COMMENT "Generating documentation with Sphinx")

+# For completeness we also copy the output doxygen html files
+file(COPY ${DOXYGEN_OUTPUT_DIR}/html/ 
+    DESTINATION ${SPHINX_BUILD}/doxygen/)
+#add_custom_target(includedoxygen ALL
+#    COMMAND ${CMAKE_COMMAND}
+#        -E copy_directory
+#        ${DOXYGEN_OUTPUT_DIR}/html/
+#        ${SPHINX_BUILD}/doxygen/
+#    DEPENDS gensphinx)
+
--- a/src/Manager.cpp
+++ b/src/Manager.cpp
@ -74,8 +74,8 @@ std::weak_ptr<Sequence>
 Manager::getOrCreateManagedSequence(std::string sessionName)
 {
    SPDLOG_DEBUG("Kompute Manager creating Sequence object");
-    std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator
-        found = this->mManagedSequences.find(sessionName);
+    std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator found =
+      this->mManagedSequences.find(sessionName);

    if (found == this->mManagedSequences.end()) {
        std::shared_ptr<Sequence> sq =
@ -84,10 +84,9 @@ Manager::getOrCreateManagedSequence(std::string sessionName)
                                     this->mComputeQueue,
                                     this->mComputeQueueFamilyIndex);
        sq->init();
-        this->mManagedSequences.insert({sessionName, sq});
+        this->mManagedSequences.insert({ sessionName, sq });
        return sq;
-    }
-    else {
+    } else {
        return found->second;
    }
 }
--- a/src/include/kompute/Manager.hpp
+++ b/src/include/kompute/Manager.hpp
@ -31,14 +31,14 @@ class Manager
    std::weak_ptr<Sequence> getOrCreateManagedSequence(std::string sessionName);

    template<typename T, typename... TArgs>
-    void evalOp(std::vector<std::shared_ptr<Tensor>> tensors, std::string sessionName = KP_DEFAULT_SESSION)
+    void evalOp(std::vector<std::shared_ptr<Tensor>> tensors,
+                std::string sessionName = KP_DEFAULT_SESSION)
    {
        SPDLOG_DEBUG("Kompute Manager evalOp triggered");
-        std::weak_ptr<Sequence> sqWeakPtr = 
-            this->getOrCreateManagedSequence(sessionName);
+        std::weak_ptr<Sequence> sqWeakPtr =
+          this->getOrCreateManagedSequence(sessionName);

-        if (std::shared_ptr<kp::Sequence> sq = sqWeakPtr.lock()) 
-        {
+        if (std::shared_ptr<kp::Sequence> sq = sqWeakPtr.lock()) {
            SPDLOG_DEBUG("Kompute Manager evalOp running sequence BEGIN");
            sq->begin();

@ -55,7 +55,6 @@ class Manager
    }

  private:
-
    std::shared_ptr<vk::Instance> mInstance = nullptr;
    bool mFreeInstance = false;
    std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice = nullptr;
@ -66,7 +65,8 @@ class Manager
    std::shared_ptr<vk::Queue> mComputeQueue = nullptr;

    // Always owned resources
-    std::unordered_map<std::string, std::shared_ptr<Sequence>> mManagedSequences;
+    std::unordered_map<std::string, std::shared_ptr<Sequence>>
+      mManagedSequences;

 #if DEBUG
    vk::DebugReportCallbackEXT mDebugReportCallback;
--- a/src/include/kompute/OpBase.hpp
+++ b/src/include/kompute/OpBase.hpp
@ -23,9 +23,15 @@ class OpBase
    OpBase() { SPDLOG_DEBUG("Compute OpBase base constructor"); }

    /**
-     *  Default constructor with parameters that provides the bare minimum
+     * Default constructor with parameters that provides the bare minimum
     * requirements for the operations to be able to create and manage their
     * sub-components.
+     *
+     * @param physicalDevice Vulkan physical device used to find device queues
+     * @param device Vulkan logical device for passing to Algorithm
+     * @param commandBuffer Vulkan Command Buffer to record commands into
+     * @param tensors Tensors that are to be used in this operation
+     * @param freeTensors Whether operation manages the memory of the Tensors
     */
    OpBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
           std::shared_ptr<vk::Device> device,
@ -69,22 +75,40 @@ class OpBase
        }
    }

+    /**
+     * The init function is responsible for setting up all the resources and
+     * should be called after the Operation has been created.
+     */
    virtual void init() = 0;

+    /**
+     * The record function is intended to only send a record command or run
+     * commands that are expected to record operations that are to be submitted
+     * as a batch into the GPU.
+     */
    virtual void record() = 0;

+    /**
+     * Post submit is called after the Sequence has submitted the commands to
+     * the GPU for processing, and can be used to perform any tear-down steps
+     * required as the computation iteration finishes.
+     */
    virtual void postSubmit() = 0;

  protected:
-    // Sometimes owned resources
-    std::vector<std::shared_ptr<Tensor>> mTensors;
-    bool mFreeTensors =
-      false; // TODO: Provide granularity to specify which to free
+    // OPTIONALLY OWNED RESOURCES
+    std::vector<std::shared_ptr<Tensor>>
+      mTensors; ///< Tensors referenced by operation that can be managed
+                ///< optionally by operation
+    bool mFreeTensors = false; ///< Explicit boolean that specifies whether the
+                               ///< tensors are freed (if they are managed)

-    // Always external resources
-    std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
-    std::shared_ptr<vk::Device> mDevice;
-    std::shared_ptr<vk::CommandBuffer> mCommandBuffer;
+    // NEVER OWNED RESOURCES
+    std::shared_ptr<vk::PhysicalDevice>
+      mPhysicalDevice;                   ///< Vulkan Physical Device
+    std::shared_ptr<vk::Device> mDevice; ///< Vulkan Logical Device
+    std::shared_ptr<vk::CommandBuffer>
+      mCommandBuffer; ///< Vulkan Command Buffer
 };

 } // End namespace kp
--- a/src/include/kompute/OpCreateTensor.hpp
+++ b/src/include/kompute/OpCreateTensor.hpp
@ -8,23 +8,55 @@

 namespace kp {

+/**
+    Operation that creates tensor and manages the memory of the components
+   created
+*/
 class OpCreateTensor : public OpBase
 {
  public:
    OpCreateTensor();

+    /**
+     * Default constructor with parameters that provides the bare minimum
+     * requirements for the operations to be able to create and manage their
+     * sub-components.
+     *
+     * @param physicalDevice Vulkan physical device used to find device queues
+     * @param device Vulkan logical device for passing to Algorithm
+     * @param commandBuffer Vulkan Command Buffer to record commands into
+     * @param tensors Tensors that are to be used in this operation
+     * @param freeTensors Whether operation manages the memory of the Tensors
+     */
    OpCreateTensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
                   std::shared_ptr<vk::Device> device,
                   std::shared_ptr<vk::CommandBuffer> commandBuffer,
                   std::vector<std::shared_ptr<Tensor>>& tensors,
                   bool freeTensors = true);

+    /**
+     * Default destructor which in this case expects the parent class to free
+     * the tensors
+     */
    ~OpCreateTensor();

+    /**
+     * In charge of initialising the primary Tensor as well as the staging
+     * tensor as required. It will only initialise a staging tensor if the
+     * Primary tensor is of type Device.
+     */
    void init() override;

+    /**
+     * Records the copy command into the GPU memory from the staging or host
+     * memory depending on the type of tensor.
+     */
    void record() override;

+    /**
+     * Performs a copy back into the main tensor to ensure that the data
+     * contained is the one that is now being stored in the GPU.
+     */
    void postSubmit() override;

  private:
--- a/src/include/kompute/OpMult.hpp
+++ b/src/include/kompute/OpMult.hpp
@ -14,29 +14,66 @@
 namespace kp {

 /**
-    Base algorithm based operation
-*/
+ * Operation that performs multiplication on two tensors and outpus on third
+ * tensor. The template parameters specify the processing GPU layout number of
+ * iterations for each x, y, z parameter. More specifically, this will be the
+ * input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)"
+ */
 template<uint32_t tX = 0, uint32_t tY = 0, uint32_t tZ = 0>
 class OpMult : public OpBase
 {
  public:
    /**
-        Constructor
-    */
+     *  Base constructor, should not be used unless explicitly intended.
+     */
    OpMult();

+    /**
+     * Default constructor with parameters that provides the bare minimum
+     * requirements for the operations to be able to create and manage their
+     * sub-components.
+     *
+     * @param physicalDevice Vulkan physical device used to find device queues
+     * @param device Vulkan logical device for passing to Algorithm
+     * @param commandBuffer Vulkan Command Buffer to record commands into
+     * @param tensors Tensors that are to be used in this operation
+     * @param freeTensors Whether operation manages the memory of the Tensors
+     */
    OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
           std::shared_ptr<vk::Device> device,
           std::shared_ptr<vk::CommandBuffer> commandBuffer,
           std::vector<std::shared_ptr<Tensor>>& tensors,
           bool freeTensors = false);

+    /**
+     * Default destructor, which is in charge of destroying the algorithm
+     * components but does not destroy the underlying tensors
+     */
    ~OpMult();

+    /**
+     * The init function is responsible for ensuring that all of the tensors
+     * provided are aligned with requirements such as LHS, RHS and Output
+     * tensors, and  creates the algorithm component which processes the
+     * computation.
+     */
    void init() override;

+    /**
+     * This records the commands that are to be sent to the GPU. This includes
+     * the barriers that ensure the memory has been copied before going in and
+     * out of the shader, as well as the dispatch operation that sends the
+     * shader processing to the gpu. This function also records the GPU memory
+     * copy of the output data for the staging bufffer so it can be read by the
+     * host.
+     */
    void record() override;

+    /**
+     * Executes after the recorded commands are submitted, and performs a copy
+     * of the GPU Device memory into the staging buffer so the output data can
+     * be retrieved.
+     */
    void postSubmit() override;

  private:
@ -71,7 +108,6 @@ OpMult<tX, tY, tZ>::OpMult()
    SPDLOG_DEBUG("Kompute OpMult constructor base");
 }

-// TODO: Remove physicalDevice from main initialiser
 template<uint32_t tX, uint32_t tY, uint32_t tZ>
 OpMult<tX, tY, tZ>::OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
                           std::shared_ptr<vk::Device> device,
--- a/src/include/kompute/Tensor.hpp
+++ b/src/include/kompute/Tensor.hpp
@ -6,6 +6,14 @@

 namespace kp {

+/**
+ * Structured data used in GPU operations.
+ *
+ * Tensors are the base building block in Kompute to perform operations across
+ * GPUs. Each tensor would have a respective Vulkan memory and buffer, which
+ * woudl be used to store their respective data. The tensors can be used for GPU
+ * data storage or transfer.
+ */
 class Tensor
 {
  public: