Merge branch 'master' into timestamps

2021-03-06 13:18:21 +01:00 · 2021-03-06 13:18:21 +01:00 · eb47d52047
commit eb47d52047
parent 6f5a8f8968 515c3b6cd4
26 changed files with 1189 additions and 816 deletions
--- a/src/include/kompute/Algorithm.hpp
+++ b/src/include/kompute/Algorithm.hpp
@ -14,31 +14,46 @@ class Algorithm
 {
  public:
    /**
-     *  Default constructor for Algorithm
+     *  Main constructor for algorithm with configuration parameters to create
+     *  the underlying resources.
     *
     *  @param device The Vulkan device to use for creating resources
-     *  @param commandBuffer The vulkan command buffer to bind the pipeline and
-     * shaders
+     *  @param tensors (optional) The tensors to use to create the descriptor resources
+     *  @param spirv (optional) The spirv code to use to create the algorithm
+     *  @param workgroup (optional) The kp::Workgroup to use for the dispatch which defaults to
+     *  kp::Workgroup(tensor[0].size(), 1, 1) if not set.
+     *  @param specializationConstants (optional) The kp::Constants to use to initialize
+     *  the specialization constants which cannot be changed once set.
+     *  @param pushConstants (optional) The kp::Constants to use when initializing the
+     *  pipeline, which set the size of the push constants - these can be modified but
+     *  all new values must have the same vector size as this initial value.
     */
    Algorithm(std::shared_ptr<vk::Device> device,
              const std::vector<std::shared_ptr<Tensor>>& tensors = {},
              const std::vector<uint32_t>& spirv = {},
              const Workgroup& workgroup = {},
-              const Constants& specializationConstants = {});
+              const Constants& specializationConstants = {},
+              const Constants& pushConstants = {});

    /**
-     * Initialiser for the shader data provided to the algorithm as well as
-     * tensor parameters that will be used in shader.
+     *  Rebuild function to reconstruct algorithm with configuration parameters to create
+     *  the underlying resources.
     *
-     * @param shaderFileData The bytes in spir-v format of the shader
-     * @tensorParams The Tensors to be used in the Algorithm / shader for
-     * @specalizationInstalces The specialization parameters to pass to the
-     * function processing
+     *  @param tensors The tensors to use to create the descriptor resources
+     *  @param spirv The spirv code to use to create the algorithm
+     *  @param workgroup (optional) The kp::Workgroup to use for the dispatch which defaults to
+     *  kp::Workgroup(tensor[0].size(), 1, 1) if not set.
+     *  @param specializationConstants (optional) The kp::Constants to use to initialize
+     *  the specialization constants which cannot be changed once set.
+     *  @param pushConstants (optional) The kp::Constants to use when initializing the
+     *  pipeline, which set the size of the push constants - these can be modified but
+     *  all new values must have the same vector size as this initial value.
     */
    void rebuild(const std::vector<std::shared_ptr<Tensor>>& tensors,
                 const std::vector<uint32_t>& spirv,
                 const Workgroup& workgroup = {},
-                 const Constants& specializationConstants = {});
+                 const Constants& specializationConstants = {},
+                 const Constants& pushConstants = {});

    /**
     * Destructor for Algorithm which is responsible for freeing and desroying
@ -50,23 +65,77 @@ class Algorithm
     * Records the dispatch function with the provided template parameters or
     * alternatively using the size of the tensor by default.
     *
-     * @param x Layout X dispatch value
-     * @param y Layout Y dispatch value
-     * @param z Layout Z dispatch value
+     * @param commandBuffer Command buffer to record the algorithm resources to
     */
    void recordDispatch(const vk::CommandBuffer& commandBuffer);

-    void bindCore(const vk::CommandBuffer& commandBuffer);
+    /**
+     * Records command that binds the "core" algorithm components which consist of
+     * binding the pipeline and binding the descriptorsets.
+     *
+     * @param commandBuffer Command buffer to record the algorithm resources to
+     */
+    void recordBindCore(const vk::CommandBuffer& commandBuffer);

-    void bindPush(const vk::CommandBuffer& commandBuffer,
-                  const Constants& pushConstants);
+    /**
+     * Records command that binds the push constants to the command buffer provided
+     * - it is required that the pushConstants provided are of the same size as the
+     *   ones provided during initialization.
+     *
+     * @param commandBuffer Command buffer to record the algorithm resources to
+     */
+    void recordBindPush(const vk::CommandBuffer& commandBuffer);

+    /**
+     * function that checks all the gpu resource components to verify if these have
+     * been created and returns true if all are valid.
+     *
+     * @returns returns true if the algorithm is currently initialized.
+     */
    bool isInit();

+    /**
+     * Sets the work group to use in the recordDispatch
+     *
+     * @param workgroup The kp::Workgroup value to use to update the algorithm. It
+     * must have a value greater than 1 on the x value (index 1) otherwise it will
+     * be initialized on the size of the first tensor (ie. this->mTensor[0]->size())
+     */
    void setWorkgroup(const Workgroup& workgroup, uint32_t minSize = 1);
+    /**
+     * Sets the push constants to the new value provided to use in the next bindPush()
+     *
+     * @param The kp::Constant to use to set the push constants to use in the next
+     * bindPush(...) calls. The constants provided must be of the same size as the
+     * ones created during initialization.
+     */
+    void setPush(const Constants& pushConstants);

+    /**
+     * Gets the current workgroup from the algorithm.
+     *
+     * @param The kp::Constant to use to set the push constants to use in the next
+     * bindPush(...) calls. The constants provided must be of the same size as the
+     * ones created during initialization.
+     */
    const Workgroup& getWorkgroup();
+    /**
+     * Gets the specialization constants of the current algorithm.
+     *
+     * @returns The kp::Constants currently set for specialization constants
+     */
    const Constants& getSpecializationConstants();
+    /**
+     * Gets the specialization constants of the current algorithm.
+     *
+     * @returns The kp::Constants currently set for push constants
+     */
+    const Constants& getPush();
+    /**
+     * Gets the current tensors that are used in the algorithm.
+     *
+     * @returns The list of tensors used in the algorithm.
+     */
    const std::vector<std::shared_ptr<Tensor>>& getTensors();

    void destroy();
@ -95,10 +164,9 @@ class Algorithm
    // -------------- ALWAYS OWNED RESOURCES
    std::vector<uint32_t> mSpirv;
    Constants mSpecializationConstants;
+    Constants mPushConstants;
    Workgroup mWorkgroup;

-    bool mIsInit;
-
    // Create util functions
    void createShaderModule();
    void createPipeline();
--- a/src/include/kompute/Manager.hpp
+++ b/src/include/kompute/Manager.hpp
@ -24,18 +24,17 @@ class Manager
    Manager();

    /**
-     * Similar to base constructor but allows the user to provide the device
-     * they would like to create the resources on.
+     * Similar to base constructor but allows for further configuration to use when
+     * creating the Vulkan resources.
     *
     * @param physicalDeviceIndex The index of the physical device to use
-     * @param manageResources (Optional) Whether to manage the memory of the
-     * resources created and destroy when the manager is destroyed.
     * @param familyQueueIndices (Optional) List of queue indices to add for
     * explicit allocation
-     * @param totalQueues The total number of compute queues to create.
+     * @param desiredExtensions The desired extensions to load from physicalDevice
     */
    Manager(uint32_t physicalDeviceIndex,
-            const std::vector<uint32_t>& familyQueueIndices = {});
+            const std::vector<uint32_t>& familyQueueIndices = {},
+            const std::vector<std::string>& desiredExtensions = {});

    /**
     * Manager constructor which allows your own vulkan application to integrate
@ -57,41 +56,57 @@ class Manager
    ~Manager();

    /**
-     * Get or create a managed Sequence that will be contained by this manager.
-     * If the named sequence does not currently exist, it would be created and
-     * initialised.
+     * Create a managed sequence that will be destroyed by this manager
+     * if it hasn't been destroyed by its reference count going to zero.
     *
-     * @param sequenceName The name for the named sequence to be retrieved or
-     * created
     * @param queueIndex The queue to use from the available queues
     * @param nrOfTimestamps The maximum number of timestamps to allocate.
     * If zero (default), disables latching of timestamps.
-     * @return Shared pointer to the manager owned sequence resource
+     * @returns Shared pointer with initialised sequence
     */
    std::shared_ptr<Sequence> sequence(uint32_t queueIndex = 0, uint32_t nrOfTimestamps = 0);

    /**
-     * Function that simplifies the common workflow of tensor creation and
-     * initialization. It will take the constructor parameters for a Tensor
-     * and will will us it to create a new Tensor and then create it. The
-     * tensor memory will then be managed and owned by the manager.
+     * Create a managed tensor that will be destroyed by this manager
+     * if it hasn't been destroyed by its reference count going to zero.
     *
     * @param data The data to initialize the tensor with
     * @param tensorType The type of tensor to initialize
-     * @param syncDataToGPU Whether to sync the data to GPU memory
-     * @returns Initialized Tensor with memory Syncd to GPU device
+     * @returns Shared pointer with initialised tensor
     */
    std::shared_ptr<Tensor> tensor(
      const std::vector<float>& data,
      Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice);

+    /**
+     * Create a managed algorithm that will be destroyed by this manager
+     * if it hasn't been destroyed by its reference count going to zero.
+     *
+     * @param tensors (optional) The tensors to initialise the algorithm with
+     * @param spirv (optional) The SPIRV bytes for the algorithm to dispatch
+     * @param workgroup (optional) kp::Workgroup for algorithm to use, and
+     * defaults to (tensor[0].size(), 1, 1)
+     * @param specializationConstants (optional) kp::Constant to use for
+     * specialization constants, and defaults to an empty constant
+     * @param pushConstants (optional) kp::Constant to use for push constants,
+     * and defaults to an empty constant
+     * @returns Shared pointer with initialised algorithm
+     */
    std::shared_ptr<Algorithm> algorithm(
      const std::vector<std::shared_ptr<Tensor>>& tensors = {},
      const std::vector<uint32_t>& spirv = {},
      const Workgroup& workgroup = {},
-      const Constants& specializationConstants = {});
+      const Constants& specializationConstants = {},
+      const Constants& pushConstants = {});

+    /**
+     * Destroy the GPU resources and all managed resources by manager.
+     **/
    void destroy();
+    /**
+     * Run a pseudo-garbage collection to release all the managed resources
+     * that have been already freed due to these reaching to zero ref count.
+     **/
    void clear();

  private:
@ -122,7 +137,8 @@ class Manager
    // Create functions
    void createInstance();
    void createDevice(const std::vector<uint32_t>& familyQueueIndices = {},
-                      uint32_t hysicalDeviceIndex = 0);
+                      uint32_t hysicalDeviceIndex = 0,
+                      const std::vector<std::string>& desiredExtensions = {});
 };

 } // End namespace kp
--- a/src/include/kompute/Parameter.hpp
+++ b/src/include/kompute/Parameter.hpp
@ -1,47 +0,0 @@
-#pragma once
-
-#include "kompute/Core.hpp"
-
-#include "kompute/Tensor.hpp"
-
-namespace kp {
-
-class Algorithm
-{
-  public:
-    Algorithm();
-
-    Algorithm(std::shared_ptr<vk::Device> device);
-
-    void init(std::string shaderFilePath,
-              std::vector<std::shared_ptr<Tensor>> tensorParams);
-
-    ~Algorithm();
-
-  private:
-    // -------------- NEVER OWNED RESOURCES
-    std::shared_ptr<vk::Device> mDevice;
-
-    // -------------- OPTIONALLY OWNED RESOURCES
-    std::shared_ptr<vk::DescriptorSetLayout> mDescriptorSetLayout;
-    bool mFreeDescriptorSetLayout = false;
-    std::shared_ptr<vk::DescriptorPool> mDescriptorPool;
-    bool mFreeDescriptorPool = false;
-    std::shared_ptr<vk::DescriptorSet> mDescriptorSet;
-    bool mFreeDescriptorSet = false;
-    std::shared_ptr<vk::ShaderModule> mShaderModule;
-    bool mFreeShaderModule = false;
-    std::shared_ptr<vk::PipelineLayout> mPipelineLayout;
-    bool mFreePipelineLayout = false;
-    std::shared_ptr<vk::PipelineCache> mPipelineCache;
-    bool mFreePipelineCache = false;
-    std::shared_ptr<vk::Pipeline> mPipeline;
-    bool mFreePipeline = false;
-
-    // Create util functions
-    void createParameters();
-    void createShaderModule(std::string shaderFilePath);
-    void createPipeline();
-};
-
-} // End namespace kp
--- a/src/include/kompute/Sequence.hpp
+++ b/src/include/kompute/Sequence.hpp
@ -35,6 +35,14 @@ class Sequence : public std::enable_shared_from_this<Sequence>
    ~Sequence();

    /**
+     * Record function for operation to be added to the GPU queue in batch. This
+     * template requires classes to be derived from the OpBase class. This
+     * function also requires the Sequence to be recording, otherwise it will
+     * not be able to add the operation.
+     *
+     * @param op Object derived from kp::BaseOp that will be recoreded by the sequence
+     * which will be used when the operation is evaluated.
+     * @return shared_ptr<Sequence> of the Sequence class itself
     */
    std::shared_ptr<Sequence> record(std::shared_ptr<OpBase> op);

@ -47,6 +55,7 @@ class Sequence : public std::enable_shared_from_this<Sequence>
     * @param tensors Vector of tensors to use for the operation
     * @param TArgs Template parameters that are used to initialise operation
     * which allows for extensible configurations on initialisation.
+     * @return shared_ptr<Sequence> of the Sequence class itself
     */
    template<typename T, typename... TArgs>
    std::shared_ptr<Sequence> record(
@ -55,6 +64,18 @@ class Sequence : public std::enable_shared_from_this<Sequence>
        std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
        return this->record(op);
    }
+    /**
+     * Record function for operation to be added to the GPU queue in batch. This
+     * template requires classes to be derived from the OpBase class. This
+     * function also requires the Sequence to be recording, otherwise it will
+     * not be able to add the operation.
+     *
+     * @param algorithm Algorithm to use for the record often used for OpAlgo
+     * operations
+     * @param TArgs Template parameters that are used to initialise operation
+     * which allows for extensible configurations on initialisation.
+     * @return shared_ptr<Sequence> of the Sequence class itself
+     */
    template<typename T, typename... TArgs>
    std::shared_ptr<Sequence> record(std::shared_ptr<Algorithm> algorithm,
                                     TArgs&&... params)
@ -66,21 +87,29 @@ class Sequence : public std::enable_shared_from_this<Sequence>

    /**
     * Eval sends all the recorded and stored operations in the vector of
-     * operations into the gpu as a submit job with a barrier.
+     * operations into the gpu as a submit job synchronously (with a barrier).
     *
     * @return shared_ptr<Sequence> of the Sequence class itself
     */
    std::shared_ptr<Sequence> eval();

+    /**
+     * Resets all the recorded and stored operations, records the operation 
+     * provided and submits into the gpu as a submit job synchronously (with a barrier).
+     *
+     * @return shared_ptr<Sequence> of the Sequence class itself
+     */
    std::shared_ptr<Sequence> eval(std::shared_ptr<OpBase> op);

    /**
     * Eval sends all the recorded and stored operations in the vector of
     * operations into the gpu as a submit job with a barrier.
     *
+     * @param tensors Vector of tensors to use for the operation
+     * @param TArgs Template parameters that are used to initialise operation
+     * which allows for extensible configurations on initialisation.
     * @return shared_ptr<Sequence> of the Sequence class itself
     */
-    // TODO: Aim to have only a single function with tensors/algorithm
    template<typename T, typename... TArgs>
    std::shared_ptr<Sequence> eval(std::vector<std::shared_ptr<Tensor>> tensors,
                                   TArgs&&... params)
@ -88,6 +117,16 @@ class Sequence : public std::enable_shared_from_this<Sequence>
        std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
        return this->eval(op);
    }
+    /**
+     * Eval sends all the recorded and stored operations in the vector of
+     * operations into the gpu as a submit job with a barrier.
+     *
+     * @param algorithm Algorithm to use for the record often used for OpAlgo
+     * operations
+     * @param TArgs Template parameters that are used to initialise operation
+     * which allows for extensible configurations on initialisation.
+     * @return shared_ptr<Sequence> of the Sequence class itself
+     */
    template<typename T, typename... TArgs>
    std::shared_ptr<Sequence> eval(std::shared_ptr<Algorithm> algorithm,
                                   TArgs&&... params)
@ -99,18 +138,27 @@ class Sequence : public std::enable_shared_from_this<Sequence>

    /**
     * Eval Async sends all the recorded and stored operations in the vector of
-     * operations into the gpu as a submit job with a barrier. EvalAwait() must
-     * be called after to ensure the sequence is terminated correctly.
+     * operations into the gpu as a submit job without a barrier. EvalAwait() must
+     * ALWAYS be called after to ensure the sequence is terminated correctly.
     *
     * @return Boolean stating whether execution was successful.
     */
    std::shared_ptr<Sequence> evalAsync();
+    /**
+     * Clears currnet operations to record provided one in the vector of
+     * operations into the gpu as a submit job without a barrier. EvalAwait() must
+     * ALWAYS be called after to ensure the sequence is terminated correctly.
+     *
+     * @return Boolean stating whether execution was successful.
+     */
    std::shared_ptr<Sequence> evalAsync(std::shared_ptr<OpBase> op);
-
    /**
     * Eval sends all the recorded and stored operations in the vector of
     * operations into the gpu as a submit job with a barrier.
     *
+     * @param tensors Vector of tensors to use for the operation
+     * @param TArgs Template parameters that are used to initialise operation
+     * which allows for extensible configurations on initialisation.
     * @return shared_ptr<Sequence> of the Sequence class itself
     */
    template<typename T, typename... TArgs>
@ -121,6 +169,16 @@ class Sequence : public std::enable_shared_from_this<Sequence>
        std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
        return this->evalAsync(op);
    }
+    /**
+     * Eval sends all the recorded and stored operations in the vector of
+     * operations into the gpu as a submit job with a barrier.
+     *
+     * @param algorithm Algorithm to use for the record often used for OpAlgo
+     * operations
+     * @param TArgs Template parameters that are used to initialise operation
+     * which allows for extensible configurations on initialisation.
+     * @return shared_ptr<Sequence> of the Sequence class itself
+     */
    template<typename T, typename... TArgs>
    std::shared_ptr<Sequence> evalAsync(std::shared_ptr<Algorithm> algorithm,
                                        TArgs&&... params)
@ -135,7 +193,7 @@ class Sequence : public std::enable_shared_from_this<Sequence>
     * finishes, it runs the postEval of all operations.
     *
     * @param waitFor Number of milliseconds to wait before timing out.
-     * @return Boolean stating whether execution was successful.
+     * @return shared_ptr<Sequence> of the Sequence class itself
     */
    std::shared_ptr<Sequence> evalAwait(uint64_t waitFor = UINT64_MAX);

@ -174,8 +232,19 @@ class Sequence : public std::enable_shared_from_this<Sequence>
     */
    bool isRecording();

+    /**
+     * Returns true if the sequence has been initialised, and it's based on the
+     * GPU resources being refrenced.
+     *
+     * @return Boolean stating if is initialized
+     */
    bool isInit();

+    /**
+     * Clears command buffer and triggers re-record of all the current operations 
+     * saved, which is useful if the underlying kp::Tensors or kp::Algorithms
+     * are modified and need to be re-recorded.
+     */
    void rerecord();

    /**
--- a/src/include/kompute/Shader.hpp
+++ b/src/include/kompute/Shader.hpp
@ -12,124 +12,18 @@

 namespace kp {

-// The default resource limit for the GLSL compiler, can be overwritten
-// Has been adobted by:
-// https://github.com/KhronosGroup/glslang/blob/master/StandAlone/ResourceLimits.cpp
-const TBuiltInResource defaultResource = {
-    /* .MaxLights = */ 0,
-    /* .MaxClipPlanes = */ 0,
-    /* .MaxTextureUnits = */ 0,
-    /* .MaxTextureCoords = */ 0,
-    /* .MaxVertexAttribs = */ 64,
-    /* .MaxVertexUniformComponents = */ 4096,
-    /* .MaxVaryingFloats = */ 64,
-    /* .MaxVertexTextureImageUnits = */ 0,
-    /* .MaxCombinedTextureImageUnits = */ 0,
-    /* .MaxTextureImageUnits = */ 0,
-    /* .MaxFragmentUniformComponents = */ 0,
-    /* .MaxDrawBuffers = */ 0,
-    /* .MaxVertexUniformVectors = */ 128,
-    /* .MaxVaryingVectors = */ 8,
-    /* .MaxFragmentUniformVectors = */ 0,
-    /* .MaxVertexOutputVectors = */ 16,
-    /* .MaxFragmentInputVectors = */ 0,
-    /* .MinProgramTexelOffset = */ -8,
-    /* .MaxProgramTexelOffset = */ 7,
-    /* .MaxClipDistances = */ 8,
-    /* .MaxComputeWorkGroupCountX = */ 65535,
-    /* .MaxComputeWorkGroupCountY = */ 65535,
-    /* .MaxComputeWorkGroupCountZ = */ 65535,
-    /* .MaxComputeWorkGroupSizeX = */ 1024,
-    /* .MaxComputeWorkGroupSizeY = */ 1024,
-    /* .MaxComputeWorkGroupSizeZ = */ 64,
-    /* .MaxComputeUniformComponents = */ 1024,
-    /* .MaxComputeTextureImageUnits = */ 16,
-    /* .MaxComputeImageUniforms = */ 8,
-    /* .MaxComputeAtomicCounters = */ 8,
-    /* .MaxComputeAtomicCounterBuffers = */ 1,
-    /* .MaxVaryingComponents = */ 60,
-    /* .MaxVertexOutputComponents = */ 64,
-    /* .MaxGeometryInputComponents = */ 64,
-    /* .MaxGeometryOutputComponents = */ 128,
-    /* .MaxFragmentInputComponents = */ 0,
-    /* .MaxImageUnits = */ 0,
-    /* .MaxCombinedImageUnitsAndFragmentOutputs = */ 0,
-    /* .MaxCombinedShaderOutputResources = */ 8,
-    /* .MaxImageSamples = */ 0,
-    /* .MaxVertexImageUniforms = */ 0,
-    /* .MaxTessControlImageUniforms = */ 0,
-    /* .MaxTessEvaluationImageUniforms = */ 0,
-    /* .MaxGeometryImageUniforms = */ 0,
-    /* .MaxFragmentImageUniforms = */ 0,
-    /* .MaxCombinedImageUniforms = */ 0,
-    /* .MaxGeometryTextureImageUnits = */ 0,
-    /* .MaxGeometryOutputVertices = */ 256,
-    /* .MaxGeometryTotalOutputComponents = */ 1024,
-    /* .MaxGeometryUniformComponents = */ 1024,
-    /* .MaxGeometryVaryingComponents = */ 64,
-    /* .MaxTessControlInputComponents = */ 128,
-    /* .MaxTessControlOutputComponents = */ 128,
-    /* .MaxTessControlTextureImageUnits = */ 0,
-    /* .MaxTessControlUniformComponents = */ 1024,
-    /* .MaxTessControlTotalOutputComponents = */ 4096,
-    /* .MaxTessEvaluationInputComponents = */ 128,
-    /* .MaxTessEvaluationOutputComponents = */ 128,
-    /* .MaxTessEvaluationTextureImageUnits = */ 16,
-    /* .MaxTessEvaluationUniformComponents = */ 1024,
-    /* .MaxTessPatchComponents = */ 120,
-    /* .MaxPatchVertices = */ 32,
-    /* .MaxTessGenLevel = */ 64,
-    /* .MaxViewports = */ 16,
-    /* .MaxVertexAtomicCounters = */ 0,
-    /* .MaxTessControlAtomicCounters = */ 0,
-    /* .MaxTessEvaluationAtomicCounters = */ 0,
-    /* .MaxGeometryAtomicCounters = */ 0,
-    /* .MaxFragmentAtomicCounters = */ 0,
-    /* .MaxCombinedAtomicCounters = */ 8,
-    /* .MaxAtomicCounterBindings = */ 1,
-    /* .MaxVertexAtomicCounterBuffers = */ 0,
-    /* .MaxTessControlAtomicCounterBuffers = */ 0,
-    /* .MaxTessEvaluationAtomicCounterBuffers = */ 0,
-    /* .MaxGeometryAtomicCounterBuffers = */ 0,
-    /* .MaxFragmentAtomicCounterBuffers = */ 0,
-    /* .MaxCombinedAtomicCounterBuffers = */ 1,
-    /* .MaxAtomicCounterBufferSize = */ 16384,
-    /* .MaxTransformFeedbackBuffers = */ 4,
-    /* .MaxTransformFeedbackInterleavedComponents = */ 64,
-    /* .MaxCullDistances = */ 8,
-    /* .MaxCombinedClipAndCullDistances = */ 8,
-    /* .MaxSamples = */ 4,
-    /* .maxMeshOutputVerticesNV = */ 256,
-    /* .maxMeshOutputPrimitivesNV = */ 512,
-    /* .maxMeshWorkGroupSizeX_NV = */ 32,
-    /* .maxMeshWorkGroupSizeY_NV = */ 1,
-    /* .maxMeshWorkGroupSizeZ_NV = */ 1,
-    /* .maxTaskWorkGroupSizeX_NV = */ 32,
-    /* .maxTaskWorkGroupSizeY_NV = */ 1,
-    /* .maxTaskWorkGroupSizeZ_NV = */ 1,
-    /* .maxMeshViewCountNV = */ 4,
-    /* .maxDualSourceDrawBuffersEXT = */ 1,
-
-    /* .limits = */
-    {
-      /* .nonInductiveForLoops = */ 1,
-      /* .whileLoops = */ 1,
-      /* .doWhileLoops = */ 1,
-      /* .generalUniformIndexing = */ 1,
-      /* .generalAttributeMatrixVectorIndexing = */ 1,
-      /* .generalVaryingIndexing = */ 1,
-      /* .generalSamplerIndexing = */ 1,
-      /* .generalVariableIndexing = */ 1,
-      /* .generalConstantMatrixVectorIndexing = */ 1,
-    }
-};
-
 /**
    Shader utily class with functions to compile and process glsl files.
 */
 class Shader
 {
  public:
+
+    // The default resource limit for the GLSL compiler, can be overwritten
+    // Has been adopted by:
+    // https://github.com/KhronosGroup/glslang/blob/master/StandAlone/ResourceLimits.cpp
+    const static TBuiltInResource defaultResource;
+
    /**
     * Compile multiple sources with optional filenames. Currently this function
     * uses the glslang C++ interface which is not thread safe so this funciton
@ -150,7 +44,7 @@ class Shader
      const std::vector<std::string>& files = {},
      const std::string& entryPoint = "main",
      std::vector<std::pair<std::string, std::string>> definitions = {},
-      const TBuiltInResource& resources = defaultResource);
+      const TBuiltInResource& resources = Shader::defaultResource);

    /**
     * Compile a single glslang source from string value. Currently this
@ -170,7 +64,7 @@ class Shader
      const std::string& source,
      const std::string& entryPoint = "main",
      std::vector<std::pair<std::string, std::string>> definitions = {},
-      const TBuiltInResource& resources = defaultResource);
+      const TBuiltInResource& resources = Shader::defaultResource);
 };

 }
--- a/src/include/kompute/Tensor.hpp
+++ b/src/include/kompute/Tensor.hpp
@ -29,12 +29,14 @@ class Tensor
    };

    /**
-     *  Default constructor with data provided which would be used to create the
+     *  Constructor with data provided which would be used to create the
     * respective vulkan buffer and memory.
     *
+     *  @param physicalDevice The physical device to use to fetch properties
+     *  @param device The device to use to create the buffer and memory from
     *  @param data Non-zero-sized vector of data that will be used by the
     * tensor
-     *  @param tensorType Type for the tensor which is of type TensorTypes
+     *  @param tensorTypes Type for the tensor which is of type TensorTypes
     */
    Tensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
           std::shared_ptr<vk::Device> device,
@ -48,10 +50,11 @@ class Tensor
    ~Tensor();

    /**
-     * Initialiser which calls the initialisation for all the respective tensors
-     * as well as creates the respective staging tensors. The staging tensors
-     * would only be created for the tensors of type TensorType::eDevice as
-     * otherwise there is no need to copy from host memory.
+     * Function to trigger reinitialisation of the tensor buffer and memory with
+     * new data as well as new potential device type.
+     *
+     * @param data Vector of data to use to initialise vector from
+     * @param tensorType The type to use for the tensor
     */
    void rebuild(const std::vector<float>& data,
                 TensorTypes tensorType = TensorTypes::eDevice);
@ -61,6 +64,11 @@ class Tensor
     */
    void destroy();

+    /**
+     * Check whether tensor is initialized based on the created gpu resources.
+     *
+     * @returns Boolean stating whether tensor is initialized
+     */
    bool isInit();

    /**
--- a/src/include/kompute/operations/OpAlgoDispatch.hpp
+++ b/src/include/kompute/operations/OpAlgoDispatch.hpp
@ -17,6 +17,13 @@ class OpAlgoDispatch : public OpBase
 {
  public:

+    /**
+     * Constructor that stores the algorithm to use as well as the relevant
+     * push constants to override when recording.
+     *
+     * @param algorithm The algorithm object to use for dispatch
+     * @param pushConstants The push constants to use for override
+     */
    OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm,
            const kp::Constants& pushConstants = {});

@ -33,18 +40,22 @@ class OpAlgoDispatch : public OpBase
     * shader processing to the gpu. This function also records the GPU memory
     * copy of the output data for the staging buffer so it can be read by the
     * host.
+     *
+     * @param commandBuffer The command buffer to record the command into.
     */
    virtual void record(const vk::CommandBuffer& commandBuffer) override;

    /**
     * Does not perform any preEval commands.
+     *
+     * @param commandBuffer The command buffer to record the command into.
     */
    virtual void preEval(const vk::CommandBuffer& commandBuffer) override;

    /**
-     * Executes after the recorded commands are submitted, and performs a copy
-     * of the GPU Device memory into the staging buffer so the output data can
-     * be retrieved.
+     * Does not perform any postEval commands.
+     *
+     * @param commandBuffer The command buffer to record the command into.
     */
    virtual void postEval(const vk::CommandBuffer& commandBuffer) override;

--- a/src/include/kompute/operations/OpBase.hpp
+++ b/src/include/kompute/operations/OpBase.hpp
@ -32,6 +32,8 @@ class OpBase
     * The record function is intended to only send a record command or run
     * commands that are expected to record operations that are to be submitted
     * as a batch into the GPU.
+     *
+     * @param commandBuffer The command buffer to record the command into.
     */
    virtual void record(const vk::CommandBuffer& commandBuffer) = 0;

@ -42,6 +44,8 @@ class OpBase
     * there are situations where eval can be called multiple times, so the 
     * resources that are created should be idempotent in case it's called multiple
     * times in a row.
+     *
+     * @param commandBuffer The command buffer to record the command into.
     */
    virtual void preEval(const vk::CommandBuffer& commandBuffer) = 0;

@ -52,6 +56,8 @@ class OpBase
     * there are situations where eval can be called multiple times, so the 
     * resources that are destroyed should not require a re-init unless explicitly
     * provided by the user.
+     *
+     * @param commandBuffer The command buffer to record the command into.
     */
    virtual void postEval(const vk::CommandBuffer& commandBuffer) = 0;
 };
--- a/src/include/kompute/operations/OpMult.hpp
+++ b/src/include/kompute/operations/OpMult.hpp
@ -26,11 +26,9 @@ class OpMult : public OpAlgoDispatch
     * requirements for the operations to be able to create and manage their
     * sub-components.
     *
-     * @param physicalDevice Vulkan physical device used to find device queues
-     * @param device Vulkan logical device for passing to Algorithm
-     * @param commandBuffer Vulkan Command Buffer to record commands into
     * @param tensors Tensors that are to be used in this operation
-     * @param komputeWorkgroup Optional parameter to specify the layout for processing
+     * @param algorithm An algorithm that will be overridden with the OpMult
+     * shader data and the tensors provided which are expected to be 3
     */
    OpMult(std::vector<std::shared_ptr<Tensor>> tensors, std::shared_ptr<Algorithm> algorithm)
        : OpAlgoDispatch(algorithm)
--- a/src/include/kompute/operations/OpTensorCopy.hpp
+++ b/src/include/kompute/operations/OpTensorCopy.hpp
@ -9,38 +9,47 @@
 namespace kp {

 /**
-    Operation that copies the data from the first tensor to the rest of the tensors provided, using a record command for all the vectors. This operation does not own/manage the memory of the tensors passed to it. The operation must only receive tensors of type 
+ * Operation that copies the data from the first tensor to the rest of the tensors 
+ * provided, using a record command for all the vectors. This operation does not 
+ * own/manage the memory of the tensors passed to it. The operation must only 
+ * receive tensors of type 
 */
 class OpTensorCopy : public OpBase
 {
  public:
    /**
-     * Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation.
+     * Default constructor with parameters that provides the core vulkan resources 
+     * and the tensors that will be used in the operation.
     *
-     * @param physicalDevice Vulkan physical device used to find device queues
-     * @param device Vulkan logical device for passing to Algorithm
-     * @param commandBuffer Vulkan Command Buffer to record commands into
     * @param tensors Tensors that will be used to create in operation.
     */
    OpTensorCopy(const std::vector<std::shared_ptr<Tensor>>& tensors);

    /**
-     * Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
+     * Default destructor. This class does not manage memory so it won't be 
+     * expecting the parent to perform a release.
     */
    ~OpTensorCopy() override;

    /**
-     * Records the copy commands from the first tensor into all the other tensors provided. Also optionally records a barrier.
+     * Records the copy commands from the first tensor into all the other 
+     * tensors provided. Also optionally records a barrier.
+     *
+     * @param commandBuffer The command buffer to record the command into.
     */
    void record(const vk::CommandBuffer& commandBuffer) override;

    /**
     * Does not perform any preEval commands.
+     *
+     * @param commandBuffer The command buffer to record the command into.
     */
    virtual void preEval(const vk::CommandBuffer& commandBuffer) override;

    /**
     * Copies the local vectors for all the tensors to sync the data with the gpu.
+     *
+     * @param commandBuffer The command buffer to record the command into.
     */
    virtual void postEval(const vk::CommandBuffer& commandBuffer) override;

--- a/src/include/kompute/operations/OpTensorSyncDevice.hpp
+++ b/src/include/kompute/operations/OpTensorSyncDevice.hpp
@ -8,17 +8,20 @@
 namespace kp {

 /**
-    Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging.
+ * Operation that syncs tensor's device by mapping local data into the device memory. 
+ * For TensorTypes::eDevice it will use a record operation for the memory to be syncd 
+ * into GPU memory which means that the operation will be done in sync with GPU commands. 
+ * For TensorTypes::eHost it will only map the data into host memory which will 
+ * happen during preEval before the recorded commands are dispatched.
 */
 class OpTensorSyncDevice : public OpBase
 {
  public:
    /**
-     * Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensos provided cannot be of type TensorTypes::eStorage.
+     * Default constructor with parameters that provides the core vulkan resources 
+     * and the tensors that will be used in the operation. The tensos provided cannot 
+     * be of type TensorTypes::eStorage.
     *
-     * @param physicalDevice Vulkan physical device used to find device queues
-     * @param device Vulkan logical device for passing to Algorithm
-     * @param commandBuffer Vulkan Command Buffer to record commands into
     * @param tensors Tensors that will be used to create in operation.
     */
    OpTensorSyncDevice(const std::vector<std::shared_ptr<Tensor>>& tensors);
@ -29,17 +32,24 @@ class OpTensorSyncDevice : public OpBase
    ~OpTensorSyncDevice() override;

    /**
-     * For device tensors, it records the copy command for the tensor to copy the data from its staging to device memory.
+     * For device tensors, it records the copy command for the tensor to copy the 
+     * data from its staging to device memory.
+     *
+     * @param commandBuffer The command buffer to record the command into.
     */
    void record(const vk::CommandBuffer& commandBuffer) override;

    /**
     * Does not perform any preEval commands.
+     *
+     * @param commandBuffer The command buffer to record the command into.
     */
    virtual void preEval(const vk::CommandBuffer& commandBuffer) override;

    /**
     * Does not perform any postEval commands.
+     *
+     * @param commandBuffer The command buffer to record the command into.
     */
    virtual void postEval(const vk::CommandBuffer& commandBuffer) override;

--- a/src/include/kompute/operations/OpTensorSyncLocal.hpp
+++ b/src/include/kompute/operations/OpTensorSyncLocal.hpp
@ -9,38 +9,50 @@
 namespace kp {

 /**
-    Operation that syncs tensor's local memory by mapping device data into the local CPU memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging.
+ * Operation that syncs tensor's local memory by mapping device data into the 
+ * local CPU memory. For TensorTypes::eDevice it will use a record operation 
+ * for the memory to be syncd into GPU memory which means that the operation 
+ * will be done in sync with GPU commands. For TensorTypes::eHost it will 
+ * only map the data into host memory which will happen during preEval before 
+ * the recorded commands are dispatched.
 */
 class OpTensorSyncLocal : public OpBase
 {
  public:
    /**
-     * Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensors provided cannot be of type TensorTypes::eStorage.
+     * Default constructor with parameters that provides the core vulkan resources 
+     * and the tensors that will be used in the operation. The tensors provided 
+     * cannot be of type TensorTypes::eStorage.
     *
-     * @param physicalDevice Vulkan physical device used to find device queues
-     * @param device Vulkan logical device for passing to Algorithm
-     * @param commandBuffer Vulkan Command Buffer to record commands into
     * @param tensors Tensors that will be used to create in operation.
     */
    OpTensorSyncLocal(const std::vector<std::shared_ptr<Tensor>>& tensors);

    /**
-     * Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
+     * Default destructor. This class does not manage memory so it won't be expecting 
+     * the parent to perform a release.
     */
    ~OpTensorSyncLocal() override;

    /**
-     * For device tensors, it records the copy command for the tensor to copy the data from its device to staging memory.
+     * For device tensors, it records the copy command for the tensor to copy the 
+     * data from its device to staging memory.
+     *
+     * @param commandBuffer The command buffer to record the command into.
     */
    void record(const vk::CommandBuffer& commandBuffer) override;

    /**
     * Does not perform any preEval commands.
+     *
+     * @param commandBuffer The command buffer to record the command into.
     */
    virtual void preEval(const vk::CommandBuffer& commandBuffer) override;

    /**
     * For host tensors it performs the map command from the host memory into local memory.
+     *
+     * @param commandBuffer The command buffer to record the command into.
     */
    virtual void postEval(const vk::CommandBuffer& commandBuffer) override;