Merge branch 'master' into timestamps

This commit is contained in:
alexander-g 2021-03-06 13:18:21 +01:00
commit eb47d52047
26 changed files with 1189 additions and 816 deletions

View file

@ -14,31 +14,46 @@ class Algorithm
{
public:
/**
* Default constructor for Algorithm
* Main constructor for algorithm with configuration parameters to create
* the underlying resources.
*
* @param device The Vulkan device to use for creating resources
* @param commandBuffer The vulkan command buffer to bind the pipeline and
* shaders
* @param tensors (optional) The tensors to use to create the descriptor resources
* @param spirv (optional) The spirv code to use to create the algorithm
* @param workgroup (optional) The kp::Workgroup to use for the dispatch which defaults to
* kp::Workgroup(tensor[0].size(), 1, 1) if not set.
* @param specializationConstants (optional) The kp::Constants to use to initialize
* the specialization constants which cannot be changed once set.
* @param pushConstants (optional) The kp::Constants to use when initializing the
* pipeline, which set the size of the push constants - these can be modified but
* all new values must have the same vector size as this initial value.
*/
Algorithm(std::shared_ptr<vk::Device> device,
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
const std::vector<uint32_t>& spirv = {},
const Workgroup& workgroup = {},
const Constants& specializationConstants = {});
const Constants& specializationConstants = {},
const Constants& pushConstants = {});
/**
* Initialiser for the shader data provided to the algorithm as well as
* tensor parameters that will be used in shader.
* Rebuild function to reconstruct algorithm with configuration parameters to create
* the underlying resources.
*
* @param shaderFileData The bytes in spir-v format of the shader
* @tensorParams The Tensors to be used in the Algorithm / shader for
* @specalizationInstalces The specialization parameters to pass to the
* function processing
* @param tensors The tensors to use to create the descriptor resources
* @param spirv The spirv code to use to create the algorithm
* @param workgroup (optional) The kp::Workgroup to use for the dispatch which defaults to
* kp::Workgroup(tensor[0].size(), 1, 1) if not set.
* @param specializationConstants (optional) The kp::Constants to use to initialize
* the specialization constants which cannot be changed once set.
* @param pushConstants (optional) The kp::Constants to use when initializing the
* pipeline, which set the size of the push constants - these can be modified but
* all new values must have the same vector size as this initial value.
*/
void rebuild(const std::vector<std::shared_ptr<Tensor>>& tensors,
const std::vector<uint32_t>& spirv,
const Workgroup& workgroup = {},
const Constants& specializationConstants = {});
const Constants& specializationConstants = {},
const Constants& pushConstants = {});
/**
* Destructor for Algorithm which is responsible for freeing and desroying
@ -50,23 +65,77 @@ class Algorithm
* Records the dispatch function with the provided template parameters or
* alternatively using the size of the tensor by default.
*
* @param x Layout X dispatch value
* @param y Layout Y dispatch value
* @param z Layout Z dispatch value
* @param commandBuffer Command buffer to record the algorithm resources to
*/
void recordDispatch(const vk::CommandBuffer& commandBuffer);
void bindCore(const vk::CommandBuffer& commandBuffer);
/**
* Records command that binds the "core" algorithm components which consist of
* binding the pipeline and binding the descriptorsets.
*
* @param commandBuffer Command buffer to record the algorithm resources to
*/
void recordBindCore(const vk::CommandBuffer& commandBuffer);
void bindPush(const vk::CommandBuffer& commandBuffer,
const Constants& pushConstants);
/**
* Records command that binds the push constants to the command buffer provided
* - it is required that the pushConstants provided are of the same size as the
* ones provided during initialization.
*
* @param commandBuffer Command buffer to record the algorithm resources to
*/
void recordBindPush(const vk::CommandBuffer& commandBuffer);
/**
* function that checks all the gpu resource components to verify if these have
* been created and returns true if all are valid.
*
* @returns returns true if the algorithm is currently initialized.
*/
bool isInit();
/**
* Sets the work group to use in the recordDispatch
*
* @param workgroup The kp::Workgroup value to use to update the algorithm. It
* must have a value greater than 1 on the x value (index 1) otherwise it will
* be initialized on the size of the first tensor (ie. this->mTensor[0]->size())
*/
void setWorkgroup(const Workgroup& workgroup, uint32_t minSize = 1);
/**
* Sets the push constants to the new value provided to use in the next bindPush()
*
* @param The kp::Constant to use to set the push constants to use in the next
* bindPush(...) calls. The constants provided must be of the same size as the
* ones created during initialization.
*/
void setPush(const Constants& pushConstants);
/**
* Gets the current workgroup from the algorithm.
*
* @param The kp::Constant to use to set the push constants to use in the next
* bindPush(...) calls. The constants provided must be of the same size as the
* ones created during initialization.
*/
const Workgroup& getWorkgroup();
/**
* Gets the specialization constants of the current algorithm.
*
* @returns The kp::Constants currently set for specialization constants
*/
const Constants& getSpecializationConstants();
/**
* Gets the specialization constants of the current algorithm.
*
* @returns The kp::Constants currently set for push constants
*/
const Constants& getPush();
/**
* Gets the current tensors that are used in the algorithm.
*
* @returns The list of tensors used in the algorithm.
*/
const std::vector<std::shared_ptr<Tensor>>& getTensors();
void destroy();
@ -95,10 +164,9 @@ class Algorithm
// -------------- ALWAYS OWNED RESOURCES
std::vector<uint32_t> mSpirv;
Constants mSpecializationConstants;
Constants mPushConstants;
Workgroup mWorkgroup;
bool mIsInit;
// Create util functions
void createShaderModule();
void createPipeline();

View file

@ -24,18 +24,17 @@ class Manager
Manager();
/**
* Similar to base constructor but allows the user to provide the device
* they would like to create the resources on.
* Similar to base constructor but allows for further configuration to use when
* creating the Vulkan resources.
*
* @param physicalDeviceIndex The index of the physical device to use
* @param manageResources (Optional) Whether to manage the memory of the
* resources created and destroy when the manager is destroyed.
* @param familyQueueIndices (Optional) List of queue indices to add for
* explicit allocation
* @param totalQueues The total number of compute queues to create.
* @param desiredExtensions The desired extensions to load from physicalDevice
*/
Manager(uint32_t physicalDeviceIndex,
const std::vector<uint32_t>& familyQueueIndices = {});
const std::vector<uint32_t>& familyQueueIndices = {},
const std::vector<std::string>& desiredExtensions = {});
/**
* Manager constructor which allows your own vulkan application to integrate
@ -57,41 +56,57 @@ class Manager
~Manager();
/**
* Get or create a managed Sequence that will be contained by this manager.
* If the named sequence does not currently exist, it would be created and
* initialised.
* Create a managed sequence that will be destroyed by this manager
* if it hasn't been destroyed by its reference count going to zero.
*
* @param sequenceName The name for the named sequence to be retrieved or
* created
* @param queueIndex The queue to use from the available queues
* @param nrOfTimestamps The maximum number of timestamps to allocate.
* If zero (default), disables latching of timestamps.
* @return Shared pointer to the manager owned sequence resource
* @returns Shared pointer with initialised sequence
*/
std::shared_ptr<Sequence> sequence(uint32_t queueIndex = 0, uint32_t nrOfTimestamps = 0);
/**
* Function that simplifies the common workflow of tensor creation and
* initialization. It will take the constructor parameters for a Tensor
* and will will us it to create a new Tensor and then create it. The
* tensor memory will then be managed and owned by the manager.
* Create a managed tensor that will be destroyed by this manager
* if it hasn't been destroyed by its reference count going to zero.
*
* @param data The data to initialize the tensor with
* @param tensorType The type of tensor to initialize
* @param syncDataToGPU Whether to sync the data to GPU memory
* @returns Initialized Tensor with memory Syncd to GPU device
* @returns Shared pointer with initialised tensor
*/
std::shared_ptr<Tensor> tensor(
const std::vector<float>& data,
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice);
/**
* Create a managed algorithm that will be destroyed by this manager
* if it hasn't been destroyed by its reference count going to zero.
*
* @param tensors (optional) The tensors to initialise the algorithm with
* @param spirv (optional) The SPIRV bytes for the algorithm to dispatch
* @param workgroup (optional) kp::Workgroup for algorithm to use, and
* defaults to (tensor[0].size(), 1, 1)
* @param specializationConstants (optional) kp::Constant to use for
* specialization constants, and defaults to an empty constant
* @param pushConstants (optional) kp::Constant to use for push constants,
* and defaults to an empty constant
* @returns Shared pointer with initialised algorithm
*/
std::shared_ptr<Algorithm> algorithm(
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
const std::vector<uint32_t>& spirv = {},
const Workgroup& workgroup = {},
const Constants& specializationConstants = {});
const Constants& specializationConstants = {},
const Constants& pushConstants = {});
/**
* Destroy the GPU resources and all managed resources by manager.
**/
void destroy();
/**
* Run a pseudo-garbage collection to release all the managed resources
* that have been already freed due to these reaching to zero ref count.
**/
void clear();
private:
@ -122,7 +137,8 @@ class Manager
// Create functions
void createInstance();
void createDevice(const std::vector<uint32_t>& familyQueueIndices = {},
uint32_t hysicalDeviceIndex = 0);
uint32_t hysicalDeviceIndex = 0,
const std::vector<std::string>& desiredExtensions = {});
};
} // End namespace kp

View file

@ -1,47 +0,0 @@
#pragma once
#include "kompute/Core.hpp"
#include "kompute/Tensor.hpp"
namespace kp {
class Algorithm
{
public:
Algorithm();
Algorithm(std::shared_ptr<vk::Device> device);
void init(std::string shaderFilePath,
std::vector<std::shared_ptr<Tensor>> tensorParams);
~Algorithm();
private:
// -------------- NEVER OWNED RESOURCES
std::shared_ptr<vk::Device> mDevice;
// -------------- OPTIONALLY OWNED RESOURCES
std::shared_ptr<vk::DescriptorSetLayout> mDescriptorSetLayout;
bool mFreeDescriptorSetLayout = false;
std::shared_ptr<vk::DescriptorPool> mDescriptorPool;
bool mFreeDescriptorPool = false;
std::shared_ptr<vk::DescriptorSet> mDescriptorSet;
bool mFreeDescriptorSet = false;
std::shared_ptr<vk::ShaderModule> mShaderModule;
bool mFreeShaderModule = false;
std::shared_ptr<vk::PipelineLayout> mPipelineLayout;
bool mFreePipelineLayout = false;
std::shared_ptr<vk::PipelineCache> mPipelineCache;
bool mFreePipelineCache = false;
std::shared_ptr<vk::Pipeline> mPipeline;
bool mFreePipeline = false;
// Create util functions
void createParameters();
void createShaderModule(std::string shaderFilePath);
void createPipeline();
};
} // End namespace kp

View file

@ -35,6 +35,14 @@ class Sequence : public std::enable_shared_from_this<Sequence>
~Sequence();
/**
* Record function for operation to be added to the GPU queue in batch. This
* template requires classes to be derived from the OpBase class. This
* function also requires the Sequence to be recording, otherwise it will
* not be able to add the operation.
*
* @param op Object derived from kp::BaseOp that will be recoreded by the sequence
* which will be used when the operation is evaluated.
* @return shared_ptr<Sequence> of the Sequence class itself
*/
std::shared_ptr<Sequence> record(std::shared_ptr<OpBase> op);
@ -47,6 +55,7 @@ class Sequence : public std::enable_shared_from_this<Sequence>
* @param tensors Vector of tensors to use for the operation
* @param TArgs Template parameters that are used to initialise operation
* which allows for extensible configurations on initialisation.
* @return shared_ptr<Sequence> of the Sequence class itself
*/
template<typename T, typename... TArgs>
std::shared_ptr<Sequence> record(
@ -55,6 +64,18 @@ class Sequence : public std::enable_shared_from_this<Sequence>
std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
return this->record(op);
}
/**
* Record function for operation to be added to the GPU queue in batch. This
* template requires classes to be derived from the OpBase class. This
* function also requires the Sequence to be recording, otherwise it will
* not be able to add the operation.
*
* @param algorithm Algorithm to use for the record often used for OpAlgo
* operations
* @param TArgs Template parameters that are used to initialise operation
* which allows for extensible configurations on initialisation.
* @return shared_ptr<Sequence> of the Sequence class itself
*/
template<typename T, typename... TArgs>
std::shared_ptr<Sequence> record(std::shared_ptr<Algorithm> algorithm,
TArgs&&... params)
@ -66,21 +87,29 @@ class Sequence : public std::enable_shared_from_this<Sequence>
/**
* Eval sends all the recorded and stored operations in the vector of
* operations into the gpu as a submit job with a barrier.
* operations into the gpu as a submit job synchronously (with a barrier).
*
* @return shared_ptr<Sequence> of the Sequence class itself
*/
std::shared_ptr<Sequence> eval();
/**
* Resets all the recorded and stored operations, records the operation
* provided and submits into the gpu as a submit job synchronously (with a barrier).
*
* @return shared_ptr<Sequence> of the Sequence class itself
*/
std::shared_ptr<Sequence> eval(std::shared_ptr<OpBase> op);
/**
* Eval sends all the recorded and stored operations in the vector of
* operations into the gpu as a submit job with a barrier.
*
* @param tensors Vector of tensors to use for the operation
* @param TArgs Template parameters that are used to initialise operation
* which allows for extensible configurations on initialisation.
* @return shared_ptr<Sequence> of the Sequence class itself
*/
// TODO: Aim to have only a single function with tensors/algorithm
template<typename T, typename... TArgs>
std::shared_ptr<Sequence> eval(std::vector<std::shared_ptr<Tensor>> tensors,
TArgs&&... params)
@ -88,6 +117,16 @@ class Sequence : public std::enable_shared_from_this<Sequence>
std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
return this->eval(op);
}
/**
* Eval sends all the recorded and stored operations in the vector of
* operations into the gpu as a submit job with a barrier.
*
* @param algorithm Algorithm to use for the record often used for OpAlgo
* operations
* @param TArgs Template parameters that are used to initialise operation
* which allows for extensible configurations on initialisation.
* @return shared_ptr<Sequence> of the Sequence class itself
*/
template<typename T, typename... TArgs>
std::shared_ptr<Sequence> eval(std::shared_ptr<Algorithm> algorithm,
TArgs&&... params)
@ -99,18 +138,27 @@ class Sequence : public std::enable_shared_from_this<Sequence>
/**
* Eval Async sends all the recorded and stored operations in the vector of
* operations into the gpu as a submit job with a barrier. EvalAwait() must
* be called after to ensure the sequence is terminated correctly.
* operations into the gpu as a submit job without a barrier. EvalAwait() must
* ALWAYS be called after to ensure the sequence is terminated correctly.
*
* @return Boolean stating whether execution was successful.
*/
std::shared_ptr<Sequence> evalAsync();
/**
* Clears currnet operations to record provided one in the vector of
* operations into the gpu as a submit job without a barrier. EvalAwait() must
* ALWAYS be called after to ensure the sequence is terminated correctly.
*
* @return Boolean stating whether execution was successful.
*/
std::shared_ptr<Sequence> evalAsync(std::shared_ptr<OpBase> op);
/**
* Eval sends all the recorded and stored operations in the vector of
* operations into the gpu as a submit job with a barrier.
*
* @param tensors Vector of tensors to use for the operation
* @param TArgs Template parameters that are used to initialise operation
* which allows for extensible configurations on initialisation.
* @return shared_ptr<Sequence> of the Sequence class itself
*/
template<typename T, typename... TArgs>
@ -121,6 +169,16 @@ class Sequence : public std::enable_shared_from_this<Sequence>
std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
return this->evalAsync(op);
}
/**
* Eval sends all the recorded and stored operations in the vector of
* operations into the gpu as a submit job with a barrier.
*
* @param algorithm Algorithm to use for the record often used for OpAlgo
* operations
* @param TArgs Template parameters that are used to initialise operation
* which allows for extensible configurations on initialisation.
* @return shared_ptr<Sequence> of the Sequence class itself
*/
template<typename T, typename... TArgs>
std::shared_ptr<Sequence> evalAsync(std::shared_ptr<Algorithm> algorithm,
TArgs&&... params)
@ -135,7 +193,7 @@ class Sequence : public std::enable_shared_from_this<Sequence>
* finishes, it runs the postEval of all operations.
*
* @param waitFor Number of milliseconds to wait before timing out.
* @return Boolean stating whether execution was successful.
* @return shared_ptr<Sequence> of the Sequence class itself
*/
std::shared_ptr<Sequence> evalAwait(uint64_t waitFor = UINT64_MAX);
@ -174,8 +232,19 @@ class Sequence : public std::enable_shared_from_this<Sequence>
*/
bool isRecording();
/**
* Returns true if the sequence has been initialised, and it's based on the
* GPU resources being refrenced.
*
* @return Boolean stating if is initialized
*/
bool isInit();
/**
* Clears command buffer and triggers re-record of all the current operations
* saved, which is useful if the underlying kp::Tensors or kp::Algorithms
* are modified and need to be re-recorded.
*/
void rerecord();
/**

View file

@ -12,124 +12,18 @@
namespace kp {
// The default resource limit for the GLSL compiler, can be overwritten
// Has been adobted by:
// https://github.com/KhronosGroup/glslang/blob/master/StandAlone/ResourceLimits.cpp
const TBuiltInResource defaultResource = {
/* .MaxLights = */ 0,
/* .MaxClipPlanes = */ 0,
/* .MaxTextureUnits = */ 0,
/* .MaxTextureCoords = */ 0,
/* .MaxVertexAttribs = */ 64,
/* .MaxVertexUniformComponents = */ 4096,
/* .MaxVaryingFloats = */ 64,
/* .MaxVertexTextureImageUnits = */ 0,
/* .MaxCombinedTextureImageUnits = */ 0,
/* .MaxTextureImageUnits = */ 0,
/* .MaxFragmentUniformComponents = */ 0,
/* .MaxDrawBuffers = */ 0,
/* .MaxVertexUniformVectors = */ 128,
/* .MaxVaryingVectors = */ 8,
/* .MaxFragmentUniformVectors = */ 0,
/* .MaxVertexOutputVectors = */ 16,
/* .MaxFragmentInputVectors = */ 0,
/* .MinProgramTexelOffset = */ -8,
/* .MaxProgramTexelOffset = */ 7,
/* .MaxClipDistances = */ 8,
/* .MaxComputeWorkGroupCountX = */ 65535,
/* .MaxComputeWorkGroupCountY = */ 65535,
/* .MaxComputeWorkGroupCountZ = */ 65535,
/* .MaxComputeWorkGroupSizeX = */ 1024,
/* .MaxComputeWorkGroupSizeY = */ 1024,
/* .MaxComputeWorkGroupSizeZ = */ 64,
/* .MaxComputeUniformComponents = */ 1024,
/* .MaxComputeTextureImageUnits = */ 16,
/* .MaxComputeImageUniforms = */ 8,
/* .MaxComputeAtomicCounters = */ 8,
/* .MaxComputeAtomicCounterBuffers = */ 1,
/* .MaxVaryingComponents = */ 60,
/* .MaxVertexOutputComponents = */ 64,
/* .MaxGeometryInputComponents = */ 64,
/* .MaxGeometryOutputComponents = */ 128,
/* .MaxFragmentInputComponents = */ 0,
/* .MaxImageUnits = */ 0,
/* .MaxCombinedImageUnitsAndFragmentOutputs = */ 0,
/* .MaxCombinedShaderOutputResources = */ 8,
/* .MaxImageSamples = */ 0,
/* .MaxVertexImageUniforms = */ 0,
/* .MaxTessControlImageUniforms = */ 0,
/* .MaxTessEvaluationImageUniforms = */ 0,
/* .MaxGeometryImageUniforms = */ 0,
/* .MaxFragmentImageUniforms = */ 0,
/* .MaxCombinedImageUniforms = */ 0,
/* .MaxGeometryTextureImageUnits = */ 0,
/* .MaxGeometryOutputVertices = */ 256,
/* .MaxGeometryTotalOutputComponents = */ 1024,
/* .MaxGeometryUniformComponents = */ 1024,
/* .MaxGeometryVaryingComponents = */ 64,
/* .MaxTessControlInputComponents = */ 128,
/* .MaxTessControlOutputComponents = */ 128,
/* .MaxTessControlTextureImageUnits = */ 0,
/* .MaxTessControlUniformComponents = */ 1024,
/* .MaxTessControlTotalOutputComponents = */ 4096,
/* .MaxTessEvaluationInputComponents = */ 128,
/* .MaxTessEvaluationOutputComponents = */ 128,
/* .MaxTessEvaluationTextureImageUnits = */ 16,
/* .MaxTessEvaluationUniformComponents = */ 1024,
/* .MaxTessPatchComponents = */ 120,
/* .MaxPatchVertices = */ 32,
/* .MaxTessGenLevel = */ 64,
/* .MaxViewports = */ 16,
/* .MaxVertexAtomicCounters = */ 0,
/* .MaxTessControlAtomicCounters = */ 0,
/* .MaxTessEvaluationAtomicCounters = */ 0,
/* .MaxGeometryAtomicCounters = */ 0,
/* .MaxFragmentAtomicCounters = */ 0,
/* .MaxCombinedAtomicCounters = */ 8,
/* .MaxAtomicCounterBindings = */ 1,
/* .MaxVertexAtomicCounterBuffers = */ 0,
/* .MaxTessControlAtomicCounterBuffers = */ 0,
/* .MaxTessEvaluationAtomicCounterBuffers = */ 0,
/* .MaxGeometryAtomicCounterBuffers = */ 0,
/* .MaxFragmentAtomicCounterBuffers = */ 0,
/* .MaxCombinedAtomicCounterBuffers = */ 1,
/* .MaxAtomicCounterBufferSize = */ 16384,
/* .MaxTransformFeedbackBuffers = */ 4,
/* .MaxTransformFeedbackInterleavedComponents = */ 64,
/* .MaxCullDistances = */ 8,
/* .MaxCombinedClipAndCullDistances = */ 8,
/* .MaxSamples = */ 4,
/* .maxMeshOutputVerticesNV = */ 256,
/* .maxMeshOutputPrimitivesNV = */ 512,
/* .maxMeshWorkGroupSizeX_NV = */ 32,
/* .maxMeshWorkGroupSizeY_NV = */ 1,
/* .maxMeshWorkGroupSizeZ_NV = */ 1,
/* .maxTaskWorkGroupSizeX_NV = */ 32,
/* .maxTaskWorkGroupSizeY_NV = */ 1,
/* .maxTaskWorkGroupSizeZ_NV = */ 1,
/* .maxMeshViewCountNV = */ 4,
/* .maxDualSourceDrawBuffersEXT = */ 1,
/* .limits = */
{
/* .nonInductiveForLoops = */ 1,
/* .whileLoops = */ 1,
/* .doWhileLoops = */ 1,
/* .generalUniformIndexing = */ 1,
/* .generalAttributeMatrixVectorIndexing = */ 1,
/* .generalVaryingIndexing = */ 1,
/* .generalSamplerIndexing = */ 1,
/* .generalVariableIndexing = */ 1,
/* .generalConstantMatrixVectorIndexing = */ 1,
}
};
/**
Shader utily class with functions to compile and process glsl files.
*/
class Shader
{
public:
// The default resource limit for the GLSL compiler, can be overwritten
// Has been adopted by:
// https://github.com/KhronosGroup/glslang/blob/master/StandAlone/ResourceLimits.cpp
const static TBuiltInResource defaultResource;
/**
* Compile multiple sources with optional filenames. Currently this function
* uses the glslang C++ interface which is not thread safe so this funciton
@ -150,7 +44,7 @@ class Shader
const std::vector<std::string>& files = {},
const std::string& entryPoint = "main",
std::vector<std::pair<std::string, std::string>> definitions = {},
const TBuiltInResource& resources = defaultResource);
const TBuiltInResource& resources = Shader::defaultResource);
/**
* Compile a single glslang source from string value. Currently this
@ -170,7 +64,7 @@ class Shader
const std::string& source,
const std::string& entryPoint = "main",
std::vector<std::pair<std::string, std::string>> definitions = {},
const TBuiltInResource& resources = defaultResource);
const TBuiltInResource& resources = Shader::defaultResource);
};
}

View file

@ -29,12 +29,14 @@ class Tensor
};
/**
* Default constructor with data provided which would be used to create the
* Constructor with data provided which would be used to create the
* respective vulkan buffer and memory.
*
* @param physicalDevice The physical device to use to fetch properties
* @param device The device to use to create the buffer and memory from
* @param data Non-zero-sized vector of data that will be used by the
* tensor
* @param tensorType Type for the tensor which is of type TensorTypes
* @param tensorTypes Type for the tensor which is of type TensorTypes
*/
Tensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
@ -48,10 +50,11 @@ class Tensor
~Tensor();
/**
* Initialiser which calls the initialisation for all the respective tensors
* as well as creates the respective staging tensors. The staging tensors
* would only be created for the tensors of type TensorType::eDevice as
* otherwise there is no need to copy from host memory.
* Function to trigger reinitialisation of the tensor buffer and memory with
* new data as well as new potential device type.
*
* @param data Vector of data to use to initialise vector from
* @param tensorType The type to use for the tensor
*/
void rebuild(const std::vector<float>& data,
TensorTypes tensorType = TensorTypes::eDevice);
@ -61,6 +64,11 @@ class Tensor
*/
void destroy();
/**
* Check whether tensor is initialized based on the created gpu resources.
*
* @returns Boolean stating whether tensor is initialized
*/
bool isInit();
/**

View file

@ -17,6 +17,13 @@ class OpAlgoDispatch : public OpBase
{
public:
/**
* Constructor that stores the algorithm to use as well as the relevant
* push constants to override when recording.
*
* @param algorithm The algorithm object to use for dispatch
* @param pushConstants The push constants to use for override
*/
OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm,
const kp::Constants& pushConstants = {});
@ -33,18 +40,22 @@ class OpAlgoDispatch : public OpBase
* shader processing to the gpu. This function also records the GPU memory
* copy of the output data for the staging buffer so it can be read by the
* host.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void record(const vk::CommandBuffer& commandBuffer) override;
/**
* Does not perform any preEval commands.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
/**
* Executes after the recorded commands are submitted, and performs a copy
* of the GPU Device memory into the staging buffer so the output data can
* be retrieved.
* Does not perform any postEval commands.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void postEval(const vk::CommandBuffer& commandBuffer) override;

View file

@ -32,6 +32,8 @@ class OpBase
* The record function is intended to only send a record command or run
* commands that are expected to record operations that are to be submitted
* as a batch into the GPU.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void record(const vk::CommandBuffer& commandBuffer) = 0;
@ -42,6 +44,8 @@ class OpBase
* there are situations where eval can be called multiple times, so the
* resources that are created should be idempotent in case it's called multiple
* times in a row.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void preEval(const vk::CommandBuffer& commandBuffer) = 0;
@ -52,6 +56,8 @@ class OpBase
* there are situations where eval can be called multiple times, so the
* resources that are destroyed should not require a re-init unless explicitly
* provided by the user.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void postEval(const vk::CommandBuffer& commandBuffer) = 0;
};

View file

@ -26,11 +26,9 @@ class OpMult : public OpAlgoDispatch
* requirements for the operations to be able to create and manage their
* sub-components.
*
* @param physicalDevice Vulkan physical device used to find device queues
* @param device Vulkan logical device for passing to Algorithm
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that are to be used in this operation
* @param komputeWorkgroup Optional parameter to specify the layout for processing
* @param algorithm An algorithm that will be overridden with the OpMult
* shader data and the tensors provided which are expected to be 3
*/
OpMult(std::vector<std::shared_ptr<Tensor>> tensors, std::shared_ptr<Algorithm> algorithm)
: OpAlgoDispatch(algorithm)

View file

@ -9,38 +9,47 @@
namespace kp {
/**
Operation that copies the data from the first tensor to the rest of the tensors provided, using a record command for all the vectors. This operation does not own/manage the memory of the tensors passed to it. The operation must only receive tensors of type
* Operation that copies the data from the first tensor to the rest of the tensors
* provided, using a record command for all the vectors. This operation does not
* own/manage the memory of the tensors passed to it. The operation must only
* receive tensors of type
*/
class OpTensorCopy : public OpBase
{
public:
/**
* Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation.
* Default constructor with parameters that provides the core vulkan resources
* and the tensors that will be used in the operation.
*
* @param physicalDevice Vulkan physical device used to find device queues
* @param device Vulkan logical device for passing to Algorithm
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that will be used to create in operation.
*/
OpTensorCopy(const std::vector<std::shared_ptr<Tensor>>& tensors);
/**
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
* Default destructor. This class does not manage memory so it won't be
* expecting the parent to perform a release.
*/
~OpTensorCopy() override;
/**
* Records the copy commands from the first tensor into all the other tensors provided. Also optionally records a barrier.
* Records the copy commands from the first tensor into all the other
* tensors provided. Also optionally records a barrier.
*
* @param commandBuffer The command buffer to record the command into.
*/
void record(const vk::CommandBuffer& commandBuffer) override;
/**
* Does not perform any preEval commands.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
/**
* Copies the local vectors for all the tensors to sync the data with the gpu.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void postEval(const vk::CommandBuffer& commandBuffer) override;

View file

@ -8,17 +8,20 @@
namespace kp {
/**
Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging.
* Operation that syncs tensor's device by mapping local data into the device memory.
* For TensorTypes::eDevice it will use a record operation for the memory to be syncd
* into GPU memory which means that the operation will be done in sync with GPU commands.
* For TensorTypes::eHost it will only map the data into host memory which will
* happen during preEval before the recorded commands are dispatched.
*/
class OpTensorSyncDevice : public OpBase
{
public:
/**
* Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensos provided cannot be of type TensorTypes::eStorage.
* Default constructor with parameters that provides the core vulkan resources
* and the tensors that will be used in the operation. The tensos provided cannot
* be of type TensorTypes::eStorage.
*
* @param physicalDevice Vulkan physical device used to find device queues
* @param device Vulkan logical device for passing to Algorithm
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that will be used to create in operation.
*/
OpTensorSyncDevice(const std::vector<std::shared_ptr<Tensor>>& tensors);
@ -29,17 +32,24 @@ class OpTensorSyncDevice : public OpBase
~OpTensorSyncDevice() override;
/**
* For device tensors, it records the copy command for the tensor to copy the data from its staging to device memory.
* For device tensors, it records the copy command for the tensor to copy the
* data from its staging to device memory.
*
* @param commandBuffer The command buffer to record the command into.
*/
void record(const vk::CommandBuffer& commandBuffer) override;
/**
* Does not perform any preEval commands.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
/**
* Does not perform any postEval commands.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void postEval(const vk::CommandBuffer& commandBuffer) override;

View file

@ -9,38 +9,50 @@
namespace kp {
/**
Operation that syncs tensor's local memory by mapping device data into the local CPU memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging.
* Operation that syncs tensor's local memory by mapping device data into the
* local CPU memory. For TensorTypes::eDevice it will use a record operation
* for the memory to be syncd into GPU memory which means that the operation
* will be done in sync with GPU commands. For TensorTypes::eHost it will
* only map the data into host memory which will happen during preEval before
* the recorded commands are dispatched.
*/
class OpTensorSyncLocal : public OpBase
{
public:
/**
* Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensors provided cannot be of type TensorTypes::eStorage.
* Default constructor with parameters that provides the core vulkan resources
* and the tensors that will be used in the operation. The tensors provided
* cannot be of type TensorTypes::eStorage.
*
* @param physicalDevice Vulkan physical device used to find device queues
* @param device Vulkan logical device for passing to Algorithm
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that will be used to create in operation.
*/
OpTensorSyncLocal(const std::vector<std::shared_ptr<Tensor>>& tensors);
/**
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
* Default destructor. This class does not manage memory so it won't be expecting
* the parent to perform a release.
*/
~OpTensorSyncLocal() override;
/**
* For device tensors, it records the copy command for the tensor to copy the data from its device to staging memory.
* For device tensors, it records the copy command for the tensor to copy the
* data from its device to staging memory.
*
* @param commandBuffer The command buffer to record the command into.
*/
void record(const vk::CommandBuffer& commandBuffer) override;
/**
* Does not perform any preEval commands.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
/**
* For host tensors it performs the map command from the host memory into local memory.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void postEval(const vk::CommandBuffer& commandBuffer) override;