Further tests added to new structure
This commit is contained in:
parent
3f1288271d
commit
6378583a23
17 changed files with 636 additions and 514 deletions
|
|
@ -45,10 +45,6 @@ public:
|
|||
const Constants& specializationConstants = {},
|
||||
const Constants& pushConstants = {});
|
||||
|
||||
bool isInit();
|
||||
|
||||
void freeMemoryDestroyGPUResources();
|
||||
|
||||
/**
|
||||
* Destructor for Algorithm which is responsible for freeing and desroying
|
||||
* respective pipelines and owned parameter groups.
|
||||
|
|
@ -65,11 +61,21 @@ public:
|
|||
*/
|
||||
void recordDispatch(std::shared_ptr<vk::CommandBuffer> commandBuffer);
|
||||
|
||||
bool isInit();
|
||||
|
||||
void setWorkgroup(const Workgroup& workgroup, uint32_t minSize = 1);
|
||||
|
||||
const Workgroup& getWorkgroup();
|
||||
const Constants& getSpecializationConstants();
|
||||
const Constants& getPushConstants();
|
||||
const std::vector<std::shared_ptr<Tensor>>& getTensors();
|
||||
|
||||
void destroy();
|
||||
|
||||
private:
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
std::shared_ptr<vk::Device> mDevice;
|
||||
std::vector<std::shared_ptr<Tensor>> mTensors;
|
||||
|
||||
// -------------- OPTIONALLY OWNED RESOURCES
|
||||
std::shared_ptr<vk::DescriptorSetLayout> mDescriptorSetLayout;
|
||||
|
|
@ -100,7 +106,7 @@ private:
|
|||
void createPipeline();
|
||||
|
||||
// Parameters
|
||||
void createParameters(const std::vector<std::shared_ptr<Tensor>>& tensorParams);
|
||||
void createParameters();
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -30,6 +30,8 @@ class Manager
|
|||
* they would like to create the resources on.
|
||||
*
|
||||
* @param physicalDeviceIndex The index of the physical device to use
|
||||
* @param manageResources (Optional) Whether to manage the memory of the
|
||||
* resources created and destroy when the manager is destroyed.
|
||||
* @param familyQueueIndices (Optional) List of queue indices to add for
|
||||
* explicit allocation
|
||||
* @param totalQueues The total number of compute queues to create.
|
||||
|
|
@ -48,8 +50,7 @@ class Manager
|
|||
*/
|
||||
Manager(std::shared_ptr<vk::Instance> instance,
|
||||
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
uint32_t physicalDeviceIndex);
|
||||
std::shared_ptr<vk::Device> device);
|
||||
|
||||
/**
|
||||
* Manager destructor which would ensure all owned resources are destroyed
|
||||
|
|
@ -92,12 +93,14 @@ class Manager
|
|||
const Constants& specializationConstants = {},
|
||||
const Constants& pushConstants = {});
|
||||
|
||||
void destroy();
|
||||
void clear();
|
||||
|
||||
private:
|
||||
// -------------- OPTIONALLY OWNED RESOURCES
|
||||
std::shared_ptr<vk::Instance> mInstance = nullptr;
|
||||
bool mFreeInstance = false;
|
||||
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice = nullptr;
|
||||
uint32_t mPhysicalDeviceIndex = -1;
|
||||
std::shared_ptr<vk::Device> mDevice = nullptr;
|
||||
bool mFreeDevice = false;
|
||||
|
||||
|
|
@ -109,7 +112,7 @@ class Manager
|
|||
std::vector<uint32_t> mComputeQueueFamilyIndices;
|
||||
std::vector<std::shared_ptr<vk::Queue>> mComputeQueues;
|
||||
|
||||
uint32_t mCurrentSequenceIndex = -1;
|
||||
bool mManageResources = false;
|
||||
|
||||
#if DEBUG
|
||||
#ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS
|
||||
|
|
@ -120,7 +123,7 @@ class Manager
|
|||
|
||||
// Create functions
|
||||
void createInstance();
|
||||
void createDevice(const std::vector<uint32_t>& familyQueueIndices = {});
|
||||
void createDevice(const std::vector<uint32_t>& familyQueueIndices = {}, uint32_t hysicalDeviceIndex = 0);
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -31,6 +31,10 @@ class Sequence: public std::enable_shared_from_this<Sequence>
|
|||
*/
|
||||
~Sequence();
|
||||
|
||||
/**
|
||||
*/
|
||||
std::shared_ptr<Sequence> record(std::shared_ptr<OpBase> op);
|
||||
|
||||
/**
|
||||
* Record function for operation to be added to the GPU queue in batch. This
|
||||
* template requires classes to be derived from the OpBase class. This
|
||||
|
|
@ -41,7 +45,148 @@ class Sequence: public std::enable_shared_from_this<Sequence>
|
|||
* @param TArgs Template parameters that are used to initialise operation
|
||||
* which allows for extensible configurations on initialisation.
|
||||
*/
|
||||
std::shared_ptr<Sequence> record(std::shared_ptr<OpBase> op);
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence>
|
||||
record(std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
static_assert(std::is_base_of<OpBase, T>::value,
|
||||
"Kompute Sequence record(...) template only valid with "
|
||||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
std::shared_ptr<T> op{
|
||||
new T(tensors, std::forward<TArgs>(params)...) };
|
||||
|
||||
return this->record(op);
|
||||
}
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence>
|
||||
record(std::shared_ptr<Algorithm> algorithm, TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
static_assert(std::is_base_of<OpBase, T>::value,
|
||||
"Kompute Sequence record(...) template only valid with "
|
||||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
std::shared_ptr<T> op{
|
||||
new T(algorithm, std::forward<TArgs>(params)...) };
|
||||
|
||||
return this->record(op);
|
||||
}
|
||||
|
||||
/**
|
||||
* Eval sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job with a barrier.
|
||||
*
|
||||
* @return shared_ptr<Sequence> of the Sequence class itself
|
||||
*/
|
||||
std::shared_ptr<Sequence> eval();
|
||||
|
||||
std::shared_ptr<Sequence> eval(std::shared_ptr<OpBase> op);
|
||||
|
||||
/**
|
||||
* Eval sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job with a barrier.
|
||||
*
|
||||
* @return shared_ptr<Sequence> of the Sequence class itself
|
||||
*/
|
||||
// TODO: Aim to have only a single function with tensors/algorithm
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence>
|
||||
eval(std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
static_assert(std::is_base_of<OpBase, T>::value,
|
||||
"Kompute Sequence record(...) template only valid with "
|
||||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
std::shared_ptr<T> op{
|
||||
new T(tensors, std::forward<TArgs>(params)...) };
|
||||
|
||||
// TODO: Aim to be able to handle errors when returning without throw except
|
||||
return this->eval(op);
|
||||
}
|
||||
// Needded as otherise can't use initialiser list
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence>
|
||||
eval(std::shared_ptr<Algorithm> algorithm, TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
static_assert(std::is_base_of<OpBase, T>::value,
|
||||
"Kompute Sequence record(...) template only valid with "
|
||||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
std::shared_ptr<T> op{
|
||||
new T(algorithm, std::forward<TArgs>(params)...) };
|
||||
|
||||
return this->eval(op);
|
||||
}
|
||||
|
||||
/**
|
||||
* Eval Async sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job with a barrier. EvalAwait() must
|
||||
* be called after to ensure the sequence is terminated correctly.
|
||||
*
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
std::shared_ptr<Sequence> evalAsync();
|
||||
|
||||
/**
|
||||
* Eval sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job with a barrier.
|
||||
*
|
||||
* @return shared_ptr<Sequence> of the Sequence class itself
|
||||
*/
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence>
|
||||
evalAsync(std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
static_assert(std::is_base_of<OpBase, T>::value,
|
||||
"Kompute Sequence record(...) template only valid with "
|
||||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
std::shared_ptr<T> op{
|
||||
new T(tensors, std::forward<TArgs>(params)...) };
|
||||
|
||||
return this->evalAsync(op);
|
||||
}
|
||||
// Needed as otherwise it's not possible to use initializer lists
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence>
|
||||
evalAsync(std::shared_ptr<Algorithm> algorithm, TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
static_assert(std::is_base_of<OpBase, T>::value,
|
||||
"Kompute Sequence record(...) template only valid with "
|
||||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
std::shared_ptr<T> op{
|
||||
new T(algorithm, std::forward<TArgs>(params)...) };
|
||||
|
||||
return this->evalAsync(op);
|
||||
}
|
||||
|
||||
/**
|
||||
* Eval Await waits for the fence to finish processing and then once it
|
||||
* finishes, it runs the postEval of all operations.
|
||||
*
|
||||
* @param waitFor Number of milliseconds to wait before timing out.
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
std::shared_ptr<Sequence> evalAwait(uint64_t waitFor = UINT64_MAX);
|
||||
|
||||
/**
|
||||
* Clear function clears all operations currently recorded and starts recording again.
|
||||
|
|
@ -64,32 +209,6 @@ class Sequence: public std::enable_shared_from_this<Sequence>
|
|||
*/
|
||||
void end();
|
||||
|
||||
/**
|
||||
* Eval sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job with a barrier.
|
||||
*
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
std::shared_ptr<Sequence> eval();
|
||||
|
||||
/**
|
||||
* Eval Async sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job with a barrier. EvalAwait() must
|
||||
* be called after to ensure the sequence is terminated correctly.
|
||||
*
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
std::shared_ptr<Sequence> evalAsync();
|
||||
|
||||
/**
|
||||
* Eval Await waits for the fence to finish processing and then once it
|
||||
* finishes, it runs the postEval of all operations.
|
||||
*
|
||||
* @param waitFor Number of milliseconds to wait before timing out.
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
std::shared_ptr<Sequence> evalAwait(uint64_t waitFor = UINT64_MAX);
|
||||
|
||||
/**
|
||||
* Returns true if the sequence is currently in recording activated.
|
||||
*
|
||||
|
|
@ -97,6 +216,9 @@ class Sequence: public std::enable_shared_from_this<Sequence>
|
|||
*/
|
||||
bool isRecording();
|
||||
|
||||
|
||||
bool isInit();
|
||||
|
||||
/**
|
||||
* Returns true if the sequence is currently running - mostly used for async
|
||||
* workloads.
|
||||
|
|
@ -109,7 +231,7 @@ class Sequence: public std::enable_shared_from_this<Sequence>
|
|||
* Destroys and frees the GPU resources which include the buffer and memory
|
||||
* and sets the sequence as init=False.
|
||||
*/
|
||||
void freeMemoryDestroyGPUResources();
|
||||
void destroy();
|
||||
|
||||
private:
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
|
|
|
|||
|
|
@ -59,7 +59,9 @@ class Tensor
|
|||
/**
|
||||
* Destroys and frees the GPU resources which include the buffer and memory.
|
||||
*/
|
||||
void freeMemoryDestroyGPUResources();
|
||||
void destroy();
|
||||
|
||||
bool isInit();
|
||||
|
||||
/**
|
||||
* Returns the vector of data currently contained by the Tensor. It is
|
||||
|
|
|
|||
|
|
@ -17,8 +17,7 @@ class OpAlgoDispatch : public OpBase
|
|||
{
|
||||
public:
|
||||
|
||||
OpAlgoDispatch(const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::shared_ptr<kp::Algorithm>& algorithm);
|
||||
OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm);
|
||||
|
||||
/**
|
||||
* Default destructor, which is in charge of destroying the algorithm
|
||||
|
|
@ -50,7 +49,6 @@ class OpAlgoDispatch : public OpBase
|
|||
|
||||
private:
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::vector<std::shared_ptr<Tensor>> mTensors;
|
||||
std::shared_ptr<Algorithm> mAlgorithm;
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue