Updated docstrings
This commit is contained in:
parent
b91c392f5e
commit
cb0d7f7cf3
7 changed files with 135 additions and 26 deletions
|
|
@ -55,3 +55,13 @@ add_custom_target(gensphinx ALL
|
|||
DEPENDS ${DOXYGEN_INDEX_FILE}
|
||||
COMMENT "Generating documentation with Sphinx")
|
||||
|
||||
# For completeness we also copy the output doxygen html files
|
||||
file(COPY ${DOXYGEN_OUTPUT_DIR}/html/
|
||||
DESTINATION ${SPHINX_BUILD}/doxygen/)
|
||||
#add_custom_target(includedoxygen ALL
|
||||
# COMMAND ${CMAKE_COMMAND}
|
||||
# -E copy_directory
|
||||
# ${DOXYGEN_OUTPUT_DIR}/html/
|
||||
# ${SPHINX_BUILD}/doxygen/
|
||||
# DEPENDS gensphinx)
|
||||
|
||||
|
|
|
|||
|
|
@ -74,8 +74,8 @@ std::weak_ptr<Sequence>
|
|||
Manager::getOrCreateManagedSequence(std::string sessionName)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute Manager creating Sequence object");
|
||||
std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator
|
||||
found = this->mManagedSequences.find(sessionName);
|
||||
std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator found =
|
||||
this->mManagedSequences.find(sessionName);
|
||||
|
||||
if (found == this->mManagedSequences.end()) {
|
||||
std::shared_ptr<Sequence> sq =
|
||||
|
|
@ -84,10 +84,9 @@ Manager::getOrCreateManagedSequence(std::string sessionName)
|
|||
this->mComputeQueue,
|
||||
this->mComputeQueueFamilyIndex);
|
||||
sq->init();
|
||||
this->mManagedSequences.insert({sessionName, sq});
|
||||
this->mManagedSequences.insert({ sessionName, sq });
|
||||
return sq;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
return found->second;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,14 +31,14 @@ class Manager
|
|||
std::weak_ptr<Sequence> getOrCreateManagedSequence(std::string sessionName);
|
||||
|
||||
template<typename T, typename... TArgs>
|
||||
void evalOp(std::vector<std::shared_ptr<Tensor>> tensors, std::string sessionName = KP_DEFAULT_SESSION)
|
||||
void evalOp(std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
std::string sessionName = KP_DEFAULT_SESSION)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute Manager evalOp triggered");
|
||||
std::weak_ptr<Sequence> sqWeakPtr =
|
||||
this->getOrCreateManagedSequence(sessionName);
|
||||
std::weak_ptr<Sequence> sqWeakPtr =
|
||||
this->getOrCreateManagedSequence(sessionName);
|
||||
|
||||
if (std::shared_ptr<kp::Sequence> sq = sqWeakPtr.lock())
|
||||
{
|
||||
if (std::shared_ptr<kp::Sequence> sq = sqWeakPtr.lock()) {
|
||||
SPDLOG_DEBUG("Kompute Manager evalOp running sequence BEGIN");
|
||||
sq->begin();
|
||||
|
||||
|
|
@ -55,7 +55,6 @@ class Manager
|
|||
}
|
||||
|
||||
private:
|
||||
|
||||
std::shared_ptr<vk::Instance> mInstance = nullptr;
|
||||
bool mFreeInstance = false;
|
||||
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice = nullptr;
|
||||
|
|
@ -66,7 +65,8 @@ class Manager
|
|||
std::shared_ptr<vk::Queue> mComputeQueue = nullptr;
|
||||
|
||||
// Always owned resources
|
||||
std::unordered_map<std::string, std::shared_ptr<Sequence>> mManagedSequences;
|
||||
std::unordered_map<std::string, std::shared_ptr<Sequence>>
|
||||
mManagedSequences;
|
||||
|
||||
#if DEBUG
|
||||
vk::DebugReportCallbackEXT mDebugReportCallback;
|
||||
|
|
|
|||
|
|
@ -23,9 +23,15 @@ class OpBase
|
|||
OpBase() { SPDLOG_DEBUG("Compute OpBase base constructor"); }
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
* requirements for the operations to be able to create and manage their
|
||||
* sub-components.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device used to find device queues
|
||||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
* @param freeTensors Whether operation manages the memory of the Tensors
|
||||
*/
|
||||
OpBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
|
|
@ -69,22 +75,40 @@ class OpBase
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The init function is responsible for setting up all the resources and
|
||||
* should be called after the Operation has been created.
|
||||
*/
|
||||
virtual void init() = 0;
|
||||
|
||||
/**
|
||||
* The record function is intended to only send a record command or run
|
||||
* commands that are expected to record operations that are to be submitted
|
||||
* as a batch into the GPU.
|
||||
*/
|
||||
virtual void record() = 0;
|
||||
|
||||
/**
|
||||
* Post submit is called after the Sequence has submitted the commands to
|
||||
* the GPU for processing, and can be used to perform any tear-down steps
|
||||
* required as the computation iteration finishes.
|
||||
*/
|
||||
virtual void postSubmit() = 0;
|
||||
|
||||
protected:
|
||||
// Sometimes owned resources
|
||||
std::vector<std::shared_ptr<Tensor>> mTensors;
|
||||
bool mFreeTensors =
|
||||
false; // TODO: Provide granularity to specify which to free
|
||||
// OPTIONALLY OWNED RESOURCES
|
||||
std::vector<std::shared_ptr<Tensor>>
|
||||
mTensors; ///< Tensors referenced by operation that can be managed
|
||||
///< optionally by operation
|
||||
bool mFreeTensors = false; ///< Explicit boolean that specifies whether the
|
||||
///< tensors are freed (if they are managed)
|
||||
|
||||
// Always external resources
|
||||
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
|
||||
std::shared_ptr<vk::Device> mDevice;
|
||||
std::shared_ptr<vk::CommandBuffer> mCommandBuffer;
|
||||
// NEVER OWNED RESOURCES
|
||||
std::shared_ptr<vk::PhysicalDevice>
|
||||
mPhysicalDevice; ///< Vulkan Physical Device
|
||||
std::shared_ptr<vk::Device> mDevice; ///< Vulkan Logical Device
|
||||
std::shared_ptr<vk::CommandBuffer>
|
||||
mCommandBuffer; ///< Vulkan Command Buffer
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -8,23 +8,55 @@
|
|||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
Operation that creates tensor and manages the memory of the components
|
||||
created
|
||||
*/
|
||||
class OpCreateTensor : public OpBase
|
||||
{
|
||||
public:
|
||||
OpCreateTensor();
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
* requirements for the operations to be able to create and manage their
|
||||
* sub-components.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device used to find device queues
|
||||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
* @param freeTensors Whether operation manages the memory of the Tensors
|
||||
*/
|
||||
OpCreateTensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
bool freeTensors = true);
|
||||
|
||||
/**
|
||||
* Default destructor which in this case expects the parent class to free
|
||||
* the tensors
|
||||
*/
|
||||
~OpCreateTensor();
|
||||
|
||||
/**
|
||||
* In charge of initialising the primary Tensor as well as the staging
|
||||
* tensor as required. It will only initialise a staging tensor if the
|
||||
* Primary tensor is of type Device.
|
||||
*/
|
||||
void init() override;
|
||||
|
||||
/**
|
||||
* Records the copy command into the GPU memory from the staging or host
|
||||
* memory depending on the type of tensor.
|
||||
*/
|
||||
void record() override;
|
||||
|
||||
/**
|
||||
* Performs a copy back into the main tensor to ensure that the data
|
||||
* contained is the one that is now being stored in the GPU.
|
||||
*/
|
||||
void postSubmit() override;
|
||||
|
||||
private:
|
||||
|
|
|
|||
|
|
@ -14,29 +14,66 @@
|
|||
namespace kp {
|
||||
|
||||
/**
|
||||
Base algorithm based operation
|
||||
*/
|
||||
* Operation that performs multiplication on two tensors and outpus on third
|
||||
* tensor. The template parameters specify the processing GPU layout number of
|
||||
* iterations for each x, y, z parameter. More specifically, this will be the
|
||||
* input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)"
|
||||
*/
|
||||
template<uint32_t tX = 0, uint32_t tY = 0, uint32_t tZ = 0>
|
||||
class OpMult : public OpBase
|
||||
{
|
||||
public:
|
||||
/**
|
||||
Constructor
|
||||
*/
|
||||
* Base constructor, should not be used unless explicitly intended.
|
||||
*/
|
||||
OpMult();
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
* requirements for the operations to be able to create and manage their
|
||||
* sub-components.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device used to find device queues
|
||||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
* @param freeTensors Whether operation manages the memory of the Tensors
|
||||
*/
|
||||
OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
bool freeTensors = false);
|
||||
|
||||
/**
|
||||
* Default destructor, which is in charge of destroying the algorithm
|
||||
* components but does not destroy the underlying tensors
|
||||
*/
|
||||
~OpMult();
|
||||
|
||||
/**
|
||||
* The init function is responsible for ensuring that all of the tensors
|
||||
* provided are aligned with requirements such as LHS, RHS and Output
|
||||
* tensors, and creates the algorithm component which processes the
|
||||
* computation.
|
||||
*/
|
||||
void init() override;
|
||||
|
||||
/**
|
||||
* This records the commands that are to be sent to the GPU. This includes
|
||||
* the barriers that ensure the memory has been copied before going in and
|
||||
* out of the shader, as well as the dispatch operation that sends the
|
||||
* shader processing to the gpu. This function also records the GPU memory
|
||||
* copy of the output data for the staging bufffer so it can be read by the
|
||||
* host.
|
||||
*/
|
||||
void record() override;
|
||||
|
||||
/**
|
||||
* Executes after the recorded commands are submitted, and performs a copy
|
||||
* of the GPU Device memory into the staging buffer so the output data can
|
||||
* be retrieved.
|
||||
*/
|
||||
void postSubmit() override;
|
||||
|
||||
private:
|
||||
|
|
@ -71,7 +108,6 @@ OpMult<tX, tY, tZ>::OpMult()
|
|||
SPDLOG_DEBUG("Kompute OpMult constructor base");
|
||||
}
|
||||
|
||||
// TODO: Remove physicalDevice from main initialiser
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
OpMult<tX, tY, tZ>::OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
|
|
|
|||
|
|
@ -6,6 +6,14 @@
|
|||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
* Structured data used in GPU operations.
|
||||
*
|
||||
* Tensors are the base building block in Kompute to perform operations across
|
||||
* GPUs. Each tensor would have a respective Vulkan memory and buffer, which
|
||||
* woudl be used to store their respective data. The tensors can be used for GPU
|
||||
* data storage or transfer.
|
||||
*/
|
||||
class Tensor
|
||||
{
|
||||
public:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue