Further tests added to new structure
This commit is contained in:
parent
3f1288271d
commit
6378583a23
17 changed files with 636 additions and 514 deletions
|
|
@ -10,5 +10,6 @@
|
|||
#include "kompute/operations/OpTensorCopy.hpp"
|
||||
#include "kompute/operations/OpTensorSyncDevice.hpp"
|
||||
#include "kompute/operations/OpTensorSyncLocal.hpp"
|
||||
#include "kompute/operations/OpAlgoDispatch.hpp"
|
||||
#include "kompute/Algorithm.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
|
|
|||
|
|
@ -928,7 +928,9 @@ class Tensor
|
|||
/**
|
||||
* Destroys and frees the GPU resources which include the buffer and memory.
|
||||
*/
|
||||
void freeMemoryDestroyGPUResources();
|
||||
void destroy();
|
||||
|
||||
bool isInit();
|
||||
|
||||
/**
|
||||
* Returns the vector of data currently contained by the Tensor. It is
|
||||
|
|
@ -1129,10 +1131,6 @@ public:
|
|||
const Constants& specializationConstants = {},
|
||||
const Constants& pushConstants = {});
|
||||
|
||||
bool isInit();
|
||||
|
||||
void freeMemoryDestroyGPUResources();
|
||||
|
||||
/**
|
||||
* Destructor for Algorithm which is responsible for freeing and desroying
|
||||
* respective pipelines and owned parameter groups.
|
||||
|
|
@ -1149,11 +1147,21 @@ public:
|
|||
*/
|
||||
void recordDispatch(std::shared_ptr<vk::CommandBuffer> commandBuffer);
|
||||
|
||||
bool isInit();
|
||||
|
||||
void setWorkgroup(const Workgroup& workgroup, uint32_t minSize = 1);
|
||||
|
||||
const Workgroup& getWorkgroup();
|
||||
const Constants& getSpecializationConstants();
|
||||
const Constants& getPushConstants();
|
||||
const std::vector<std::shared_ptr<Tensor>>& getTensors();
|
||||
|
||||
void destroy();
|
||||
|
||||
private:
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
std::shared_ptr<vk::Device> mDevice;
|
||||
std::vector<std::shared_ptr<Tensor>> mTensors;
|
||||
|
||||
// -------------- OPTIONALLY OWNED RESOURCES
|
||||
std::shared_ptr<vk::DescriptorSetLayout> mDescriptorSetLayout;
|
||||
|
|
@ -1184,7 +1192,7 @@ private:
|
|||
void createPipeline();
|
||||
|
||||
// Parameters
|
||||
void createParameters(const std::vector<std::shared_ptr<Tensor>>& tensorParams);
|
||||
void createParameters();
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
@ -1270,6 +1278,10 @@ class Sequence: public std::enable_shared_from_this<Sequence>
|
|||
*/
|
||||
~Sequence();
|
||||
|
||||
/**
|
||||
*/
|
||||
std::shared_ptr<Sequence> record(std::shared_ptr<OpBase> op);
|
||||
|
||||
/**
|
||||
* Record function for operation to be added to the GPU queue in batch. This
|
||||
* template requires classes to be derived from the OpBase class. This
|
||||
|
|
@ -1280,7 +1292,146 @@ class Sequence: public std::enable_shared_from_this<Sequence>
|
|||
* @param TArgs Template parameters that are used to initialise operation
|
||||
* which allows for extensible configurations on initialisation.
|
||||
*/
|
||||
std::shared_ptr<Sequence> record(std::shared_ptr<OpBase> op);
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence>
|
||||
record(std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
static_assert(std::is_base_of<OpBase, T>::value,
|
||||
"Kompute Sequence record(...) template only valid with "
|
||||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
std::shared_ptr<T> op{
|
||||
new T(tensors, std::forward<TArgs>(params)...) };
|
||||
|
||||
return this->record(op);
|
||||
}
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence>
|
||||
record(std::shared_ptr<Algorithm> algorithm, TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
static_assert(std::is_base_of<OpBase, T>::value,
|
||||
"Kompute Sequence record(...) template only valid with "
|
||||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
std::shared_ptr<T> op{
|
||||
new T(algorithm, std::forward<TArgs>(params)...) };
|
||||
|
||||
return this->record(op);
|
||||
}
|
||||
|
||||
/**
|
||||
* Eval sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job with a barrier.
|
||||
*
|
||||
* @return shared_ptr<Sequence> of the Sequence class itself
|
||||
*/
|
||||
std::shared_ptr<Sequence> eval();
|
||||
|
||||
std::shared_ptr<Sequence> eval(std::shared_ptr<OpBase> op);
|
||||
|
||||
/**
|
||||
* Eval sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job with a barrier.
|
||||
*
|
||||
* @return shared_ptr<Sequence> of the Sequence class itself
|
||||
*/
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence>
|
||||
eval(std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
static_assert(std::is_base_of<OpBase, T>::value,
|
||||
"Kompute Sequence record(...) template only valid with "
|
||||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
std::shared_ptr<T> op{
|
||||
new T(tensors, std::forward<TArgs>(params)...) };
|
||||
|
||||
return this->eval(op);
|
||||
}
|
||||
// Needded as otherise can't use initialiser list
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence>
|
||||
eval(std::shared_ptr<Algorithm> algorithm, TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
static_assert(std::is_base_of<OpBase, T>::value,
|
||||
"Kompute Sequence record(...) template only valid with "
|
||||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
std::shared_ptr<T> op{
|
||||
new T(algorithm, std::forward<TArgs>(params)...) };
|
||||
|
||||
return this->eval(op);
|
||||
}
|
||||
|
||||
/**
|
||||
* Eval Async sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job with a barrier. EvalAwait() must
|
||||
* be called after to ensure the sequence is terminated correctly.
|
||||
*
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
std::shared_ptr<Sequence> evalAsync();
|
||||
|
||||
/**
|
||||
* Eval sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job with a barrier.
|
||||
*
|
||||
* @return shared_ptr<Sequence> of the Sequence class itself
|
||||
*/
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence>
|
||||
evalAsync(std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
static_assert(std::is_base_of<OpBase, T>::value,
|
||||
"Kompute Sequence record(...) template only valid with "
|
||||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
std::shared_ptr<T> op{
|
||||
new T(tensors, std::forward<TArgs>(params)...) };
|
||||
|
||||
return this->evalAsync(op);
|
||||
}
|
||||
// Needed as otherwise it's not possible to use initializer lists
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence>
|
||||
evalAsync(std::shared_ptr<Algorithm> algorithm, TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
static_assert(std::is_base_of<OpBase, T>::value,
|
||||
"Kompute Sequence record(...) template only valid with "
|
||||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
std::shared_ptr<T> op{
|
||||
new T(algorithm, std::forward<TArgs>(params)...) };
|
||||
|
||||
return this->evalAsync(op);
|
||||
}
|
||||
|
||||
/**
|
||||
* Eval Await waits for the fence to finish processing and then once it
|
||||
* finishes, it runs the postEval of all operations.
|
||||
*
|
||||
* @param waitFor Number of milliseconds to wait before timing out.
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
std::shared_ptr<Sequence> evalAwait(uint64_t waitFor = UINT64_MAX);
|
||||
|
||||
/**
|
||||
* Clear function clears all operations currently recorded and starts recording again.
|
||||
|
|
@ -1303,32 +1454,6 @@ class Sequence: public std::enable_shared_from_this<Sequence>
|
|||
*/
|
||||
void end();
|
||||
|
||||
/**
|
||||
* Eval sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job with a barrier.
|
||||
*
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
std::shared_ptr<Sequence> eval();
|
||||
|
||||
/**
|
||||
* Eval Async sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job with a barrier. EvalAwait() must
|
||||
* be called after to ensure the sequence is terminated correctly.
|
||||
*
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
std::shared_ptr<Sequence> evalAsync();
|
||||
|
||||
/**
|
||||
* Eval Await waits for the fence to finish processing and then once it
|
||||
* finishes, it runs the postEval of all operations.
|
||||
*
|
||||
* @param waitFor Number of milliseconds to wait before timing out.
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
std::shared_ptr<Sequence> evalAwait(uint64_t waitFor = UINT64_MAX);
|
||||
|
||||
/**
|
||||
* Returns true if the sequence is currently in recording activated.
|
||||
*
|
||||
|
|
@ -1336,6 +1461,8 @@ class Sequence: public std::enable_shared_from_this<Sequence>
|
|||
*/
|
||||
bool isRecording();
|
||||
|
||||
bool isInit();
|
||||
|
||||
/**
|
||||
* Returns true if the sequence is currently running - mostly used for async
|
||||
* workloads.
|
||||
|
|
@ -1348,7 +1475,7 @@ class Sequence: public std::enable_shared_from_this<Sequence>
|
|||
* Destroys and frees the GPU resources which include the buffer and memory
|
||||
* and sets the sequence as init=False.
|
||||
*/
|
||||
void freeMemoryDestroyGPUResources();
|
||||
void destroy();
|
||||
|
||||
private:
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
|
|
@ -1444,6 +1571,8 @@ class Manager
|
|||
* they would like to create the resources on.
|
||||
*
|
||||
* @param physicalDeviceIndex The index of the physical device to use
|
||||
* @param manageResources (Optional) Whether to manage the memory of the
|
||||
* resources created and destroy when the manager is destroyed.
|
||||
* @param familyQueueIndices (Optional) List of queue indices to add for
|
||||
* explicit allocation
|
||||
* @param totalQueues The total number of compute queues to create.
|
||||
|
|
@ -1462,8 +1591,7 @@ class Manager
|
|||
*/
|
||||
Manager(std::shared_ptr<vk::Instance> instance,
|
||||
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
uint32_t physicalDeviceIndex);
|
||||
std::shared_ptr<vk::Device> device);
|
||||
|
||||
/**
|
||||
* Manager destructor which would ensure all owned resources are destroyed
|
||||
|
|
@ -1506,12 +1634,14 @@ class Manager
|
|||
const Constants& specializationConstants = {},
|
||||
const Constants& pushConstants = {});
|
||||
|
||||
void destroy();
|
||||
void clear();
|
||||
|
||||
private:
|
||||
// -------------- OPTIONALLY OWNED RESOURCES
|
||||
std::shared_ptr<vk::Instance> mInstance = nullptr;
|
||||
bool mFreeInstance = false;
|
||||
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice = nullptr;
|
||||
uint32_t mPhysicalDeviceIndex = -1;
|
||||
std::shared_ptr<vk::Device> mDevice = nullptr;
|
||||
bool mFreeDevice = false;
|
||||
|
||||
|
|
@ -1523,7 +1653,7 @@ class Manager
|
|||
std::vector<uint32_t> mComputeQueueFamilyIndices;
|
||||
std::vector<std::shared_ptr<vk::Queue>> mComputeQueues;
|
||||
|
||||
uint32_t mCurrentSequenceIndex = -1;
|
||||
bool mManageResources = false;
|
||||
|
||||
#if DEBUG
|
||||
#ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS
|
||||
|
|
@ -1534,7 +1664,7 @@ class Manager
|
|||
|
||||
// Create functions
|
||||
void createInstance();
|
||||
void createDevice(const std::vector<uint32_t>& familyQueueIndices = {});
|
||||
void createDevice(const std::vector<uint32_t>& familyQueueIndices = {}, uint32_t hysicalDeviceIndex = 0);
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
@ -1553,8 +1683,7 @@ class OpAlgoDispatch : public OpBase
|
|||
{
|
||||
public:
|
||||
|
||||
OpAlgoDispatch(const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::shared_ptr<kp::Algorithm>& algorithm);
|
||||
OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm);
|
||||
|
||||
/**
|
||||
* Default destructor, which is in charge of destroying the algorithm
|
||||
|
|
@ -1586,7 +1715,6 @@ class OpAlgoDispatch : public OpBase
|
|||
|
||||
private:
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::vector<std::shared_ptr<Tensor>> mTensors;
|
||||
std::shared_ptr<Algorithm> mAlgorithm;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ Algorithm::~Algorithm()
|
|||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm Destructor started");
|
||||
|
||||
this->freeMemoryDestroyGPUResources();
|
||||
this->destroy();
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -35,23 +35,35 @@ Algorithm::rebuild(
|
|||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm rebuild started");
|
||||
|
||||
this->setWorkgroup(workgroup);
|
||||
this->mTensors = tensors;
|
||||
this->mSpirv = spirv;
|
||||
this->mSpecializationConstants = specializationConstants;
|
||||
this->mPushConstants = pushConstants;
|
||||
this->setWorkgroup(workgroup);
|
||||
|
||||
// Descriptor pool is created first so if available then destroy all before rebuild
|
||||
if (this->mFreeDescriptorPool) {
|
||||
this->freeMemoryDestroyGPUResources();
|
||||
this->destroy();
|
||||
}
|
||||
|
||||
this->createParameters(tensors);
|
||||
this->createParameters();
|
||||
this->createShaderModule();
|
||||
this->createPipeline();
|
||||
}
|
||||
|
||||
bool
|
||||
Algorithm::isInit() {
|
||||
return this->mPipeline &&
|
||||
this->mPipelineCache &&
|
||||
this->mPipelineLayout &&
|
||||
this->mDescriptorPool &&
|
||||
this->mDescriptorSet &&
|
||||
this->mDescriptorSetLayout &&
|
||||
this->mShaderModule;
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::freeMemoryDestroyGPUResources() {
|
||||
Algorithm::destroy() {
|
||||
|
||||
if (!this->mDevice) {
|
||||
KP_LOG_WARN(
|
||||
|
|
@ -68,6 +80,7 @@ Algorithm::freeMemoryDestroyGPUResources() {
|
|||
this->mDevice->destroy(
|
||||
*this->mPipeline,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mPipeline = nullptr;
|
||||
}
|
||||
|
||||
if (this->mFreePipelineCache) {
|
||||
|
|
@ -79,6 +92,7 @@ Algorithm::freeMemoryDestroyGPUResources() {
|
|||
this->mDevice->destroy(
|
||||
*this->mPipelineCache,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mPipelineCache = nullptr;
|
||||
}
|
||||
|
||||
if (this->mFreePipelineLayout) {
|
||||
|
|
@ -90,6 +104,7 @@ Algorithm::freeMemoryDestroyGPUResources() {
|
|||
this->mDevice->destroy(
|
||||
*this->mPipelineLayout,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mPipelineLayout = nullptr;
|
||||
}
|
||||
|
||||
if (this->mFreeShaderModule) {
|
||||
|
|
@ -101,6 +116,7 @@ Algorithm::freeMemoryDestroyGPUResources() {
|
|||
this->mDevice->destroy(
|
||||
*this->mShaderModule,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mShaderModule = nullptr;
|
||||
}
|
||||
|
||||
if (this->mFreeDescriptorSet) {
|
||||
|
|
@ -111,6 +127,7 @@ Algorithm::freeMemoryDestroyGPUResources() {
|
|||
}
|
||||
this->mDevice->freeDescriptorSets(
|
||||
*this->mDescriptorPool, 1, this->mDescriptorSet.get());
|
||||
this->mDescriptorSet = nullptr;
|
||||
}
|
||||
|
||||
if (this->mFreeDescriptorSetLayout) {
|
||||
|
|
@ -122,6 +139,7 @@ Algorithm::freeMemoryDestroyGPUResources() {
|
|||
this->mDevice->destroy(
|
||||
*this->mDescriptorSetLayout,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mDescriptorSetLayout = nullptr;
|
||||
}
|
||||
|
||||
if (this->mFreeDescriptorPool) {
|
||||
|
|
@ -133,18 +151,19 @@ Algorithm::freeMemoryDestroyGPUResources() {
|
|||
this->mDevice->destroy(
|
||||
*this->mDescriptorPool,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mDescriptorPool = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::createParameters(const std::vector<std::shared_ptr<Tensor>>& tensorParams)
|
||||
Algorithm::createParameters()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm createParameters started");
|
||||
|
||||
std::vector<vk::DescriptorPoolSize> descriptorPoolSizes = {
|
||||
vk::DescriptorPoolSize(
|
||||
vk::DescriptorType::eStorageBuffer,
|
||||
static_cast<uint32_t>(tensorParams.size()) // Descriptor count
|
||||
static_cast<uint32_t>(this->mTensors.size()) // Descriptor count
|
||||
)
|
||||
};
|
||||
|
||||
|
|
@ -161,7 +180,7 @@ Algorithm::createParameters(const std::vector<std::shared_ptr<Tensor>>& tensorPa
|
|||
this->mFreeDescriptorPool = true;
|
||||
|
||||
std::vector<vk::DescriptorSetLayoutBinding> descriptorSetBindings;
|
||||
for (size_t i = 0; i < tensorParams.size(); i++) {
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
descriptorSetBindings.push_back(
|
||||
vk::DescriptorSetLayoutBinding(i, // Binding index
|
||||
vk::DescriptorType::eStorageBuffer,
|
||||
|
|
@ -193,11 +212,11 @@ Algorithm::createParameters(const std::vector<std::shared_ptr<Tensor>>& tensorPa
|
|||
this->mFreeDescriptorSet = true;
|
||||
|
||||
KP_LOG_DEBUG("Kompute Algorithm updating descriptor sets");
|
||||
for (size_t i = 0; i < tensorParams.size(); i++) {
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
std::vector<vk::WriteDescriptorSet> computeWriteDescriptorSets;
|
||||
|
||||
vk::DescriptorBufferInfo descriptorBufferInfo =
|
||||
tensorParams[i]->constructDescriptorBufferInfo();
|
||||
this->mTensors[i]->constructDescriptorBufferInfo();
|
||||
|
||||
computeWriteDescriptorSets.push_back(
|
||||
vk::WriteDescriptorSet(*this->mDescriptorSet,
|
||||
|
|
@ -377,4 +396,24 @@ Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize) {
|
|||
}
|
||||
}
|
||||
|
||||
const Workgroup&
|
||||
Algorithm::getWorkgroup() {
|
||||
return this->mWorkgroup;
|
||||
}
|
||||
|
||||
const Constants&
|
||||
Algorithm::getSpecializationConstants() {
|
||||
return this->mSpecializationConstants;
|
||||
}
|
||||
|
||||
const Constants&
|
||||
Algorithm::getPushConstants() {
|
||||
return this->mPushConstants;
|
||||
}
|
||||
|
||||
const std::vector<std::shared_ptr<Tensor>>&
|
||||
Algorithm::getTensors() {
|
||||
return this->mTensors;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,26 +33,33 @@ Manager::Manager()
|
|||
Manager::Manager(uint32_t physicalDeviceIndex,
|
||||
const std::vector<uint32_t>& familyQueueIndices)
|
||||
{
|
||||
this->mPhysicalDeviceIndex = physicalDeviceIndex;
|
||||
this->mManageResources = false;
|
||||
|
||||
this->createInstance();
|
||||
this->createDevice(familyQueueIndices);
|
||||
this->createDevice(familyQueueIndices, physicalDeviceIndex);
|
||||
}
|
||||
|
||||
Manager::Manager(std::shared_ptr<vk::Instance> instance,
|
||||
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
uint32_t physicalDeviceIndex)
|
||||
std::shared_ptr<vk::Device> device)
|
||||
{
|
||||
this->mManageResources = true;
|
||||
|
||||
this->mInstance = instance;
|
||||
this->mPhysicalDevice = physicalDevice;
|
||||
this->mDevice = device;
|
||||
this->mPhysicalDeviceIndex = physicalDeviceIndex;
|
||||
}
|
||||
|
||||
Manager::~Manager()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager Destructor started");
|
||||
this->destroy();
|
||||
}
|
||||
|
||||
void
|
||||
Manager::destroy() {
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager destroy() started");
|
||||
|
||||
if (this->mDevice == nullptr) {
|
||||
KP_LOG_ERROR(
|
||||
|
|
@ -60,32 +67,32 @@ Manager::~Manager()
|
|||
return;
|
||||
}
|
||||
|
||||
if (this->mManagedSequences.size()) {
|
||||
if (this->mManageResources && this->mManagedSequences.size()) {
|
||||
KP_LOG_DEBUG("Kompute Manager explicitly running destructor for "
|
||||
"managed sequences");
|
||||
for (const std::weak_ptr<Sequence>& weakSq : this->mManagedSequences) {
|
||||
if (std::shared_ptr<Sequence> sq = weakSq.lock()) {
|
||||
sq->freeMemoryDestroyGPUResources();
|
||||
sq->destroy();
|
||||
}
|
||||
}
|
||||
this->mManagedSequences.clear();
|
||||
}
|
||||
|
||||
if (this->mManagedAlgorithms.size()) {
|
||||
if (this->mManageResources && this->mManagedAlgorithms.size()) {
|
||||
KP_LOG_DEBUG("Kompute Manager explicitly freeing algorithms");
|
||||
for (const std::weak_ptr<Algorithm>& weakAlgorithm : this->mManagedAlgorithms) {
|
||||
if (std::shared_ptr<Algorithm> algorithm = weakAlgorithm.lock()) {
|
||||
algorithm->freeMemoryDestroyGPUResources();
|
||||
algorithm->destroy();
|
||||
}
|
||||
}
|
||||
this->mManagedAlgorithms.clear();
|
||||
}
|
||||
|
||||
if (this->mManagedTensors.size()) {
|
||||
if (this->mManageResources && this->mManagedTensors.size()) {
|
||||
KP_LOG_DEBUG("Kompute Manager explicitly freeing tensors");
|
||||
for (const std::weak_ptr<Tensor>& weakTensor : this->mManagedTensors) {
|
||||
if (std::shared_ptr<Tensor> tensor = weakTensor.lock()) {
|
||||
tensor->freeMemoryDestroyGPUResources();
|
||||
tensor->destroy();
|
||||
}
|
||||
}
|
||||
this->mManagedTensors.clear();
|
||||
|
|
@ -95,6 +102,7 @@ Manager::~Manager()
|
|||
KP_LOG_INFO("Destroying device");
|
||||
this->mDevice->destroy(
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mDevice = nullptr;
|
||||
KP_LOG_DEBUG("Kompute Manager Destroyed Device");
|
||||
}
|
||||
|
||||
|
|
@ -109,6 +117,7 @@ Manager::~Manager()
|
|||
if (this->mDebugReportCallback) {
|
||||
this->mInstance->destroyDebugReportCallbackEXT(
|
||||
this->mDebugReportCallback, nullptr, this->mDebugDispatcher);
|
||||
this->mInstance = nullptr;
|
||||
KP_LOG_DEBUG("Kompute Manager Destroyed Debug Report Callback");
|
||||
}
|
||||
#endif
|
||||
|
|
@ -117,6 +126,7 @@ Manager::~Manager()
|
|||
if (this->mFreeInstance) {
|
||||
this->mInstance->destroy(
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mInstance = nullptr;
|
||||
KP_LOG_DEBUG("Kompute Manager Destroyed Instance");
|
||||
}
|
||||
}
|
||||
|
|
@ -207,7 +217,31 @@ Manager::createInstance()
|
|||
}
|
||||
|
||||
void
|
||||
Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices)
|
||||
Manager::clear() {
|
||||
if (this->mManageResources) {
|
||||
this->mManagedTensors.erase(
|
||||
std::remove_if(
|
||||
begin(this->mManagedTensors),
|
||||
end(this->mManagedTensors),
|
||||
[](std::weak_ptr<Tensor> t) {return t.expired();}),
|
||||
end(this->mManagedTensors));
|
||||
this->mManagedAlgorithms.erase(
|
||||
std::remove_if(
|
||||
begin(this->mManagedAlgorithms),
|
||||
end(this->mManagedAlgorithms),
|
||||
[](std::weak_ptr<Algorithm> t) {return t.expired();}),
|
||||
end(this->mManagedAlgorithms));
|
||||
this->mManagedSequences.erase(
|
||||
std::remove_if(
|
||||
begin(this->mManagedSequences),
|
||||
end(this->mManagedSequences),
|
||||
[](std::weak_ptr<Sequence> t) {return t.expired();}),
|
||||
end(this->mManagedSequences));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices, uint32_t physicalDeviceIndex)
|
||||
{
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager creating Device");
|
||||
|
|
@ -215,7 +249,7 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices)
|
|||
if (this->mInstance == nullptr) {
|
||||
throw std::runtime_error("Kompute Manager instance is null");
|
||||
}
|
||||
if (this->mPhysicalDeviceIndex < 0) {
|
||||
if (physicalDeviceIndex < 0) {
|
||||
throw std::runtime_error(
|
||||
"Kompute Manager physical device index not provided");
|
||||
}
|
||||
|
|
@ -226,7 +260,7 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices)
|
|||
this->mInstance->enumeratePhysicalDevices();
|
||||
|
||||
vk::PhysicalDevice physicalDevice =
|
||||
physicalDevices[this->mPhysicalDeviceIndex];
|
||||
physicalDevices[physicalDeviceIndex];
|
||||
|
||||
this->mPhysicalDevice =
|
||||
std::make_shared<vk::PhysicalDevice>(physicalDevice);
|
||||
|
|
@ -235,7 +269,7 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices)
|
|||
physicalDevice.getProperties();
|
||||
|
||||
KP_LOG_INFO("Using physical device index {} found {}",
|
||||
this->mPhysicalDeviceIndex,
|
||||
physicalDeviceIndex,
|
||||
physicalDeviceProperties.deviceName);
|
||||
|
||||
if (!familyQueueIndices.size()) {
|
||||
|
|
@ -321,7 +355,9 @@ Manager::tensor(
|
|||
std::shared_ptr<Tensor> tensor{
|
||||
new kp::Tensor(this->mPhysicalDevice, this->mDevice, data, tensorType) };
|
||||
|
||||
this->mManagedTensors.push_back(tensor);
|
||||
if (this->mManageResources) {
|
||||
this->mManagedTensors.push_back(tensor);
|
||||
}
|
||||
|
||||
return tensor;
|
||||
}
|
||||
|
|
@ -345,7 +381,9 @@ Manager::algorithm(
|
|||
specializationConstants,
|
||||
pushConstants)};
|
||||
|
||||
this->mManagedAlgorithms.push_back(algorithm);
|
||||
if (this->mManageResources) {
|
||||
this->mManagedAlgorithms.push_back(algorithm);
|
||||
}
|
||||
|
||||
return algorithm;
|
||||
}
|
||||
|
|
@ -362,7 +400,9 @@ Manager::sequence(uint32_t queueIndex)
|
|||
this->mComputeQueues[queueIndex],
|
||||
this->mComputeQueueFamilyIndices[queueIndex]) };
|
||||
|
||||
this->mManagedSequences.push_back(sq);
|
||||
if (this->mManageResources) {
|
||||
this->mManagedSequences.push_back(sq);
|
||||
}
|
||||
|
||||
return sq;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,12 +4,10 @@
|
|||
|
||||
namespace kp {
|
||||
|
||||
OpAlgoDispatch::OpAlgoDispatch(const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::shared_ptr<kp::Algorithm>& algorithm)
|
||||
OpAlgoDispatch::OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor");
|
||||
|
||||
this->mTensors = tensors;
|
||||
this->mAlgorithm = algorithm;
|
||||
}
|
||||
|
||||
|
|
@ -24,7 +22,7 @@ OpAlgoDispatch::record(std::shared_ptr<vk::CommandBuffer> commandBuffer)
|
|||
KP_LOG_DEBUG("Kompute OpAlgoDispatch record called");
|
||||
|
||||
// Barrier to ensure the data is finished writing to buffer memory
|
||||
for (std::shared_ptr<Tensor> tensor : this->mTensors) {
|
||||
for (const std::shared_ptr<Tensor>& tensor : this->mAlgorithm->getTensors()) {
|
||||
tensor->recordBufferMemoryBarrier(
|
||||
commandBuffer,
|
||||
vk::AccessFlagBits::eHostWrite,
|
||||
|
|
|
|||
|
|
@ -61,6 +61,12 @@ Sequence::end()
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
Sequence::clear() {
|
||||
KP_LOG_DEBUG("Kompute Sequence calling clear");
|
||||
this->end();
|
||||
}
|
||||
|
||||
std::shared_ptr<Sequence>
|
||||
Sequence::eval()
|
||||
{
|
||||
|
|
@ -69,6 +75,13 @@ Sequence::eval()
|
|||
return this->evalAsync()->evalAwait();
|
||||
}
|
||||
|
||||
std::shared_ptr<Sequence>
|
||||
Sequence::eval(std::shared_ptr<OpBase> op) {
|
||||
this->clear();
|
||||
this->record(op);
|
||||
this->eval();
|
||||
}
|
||||
|
||||
std::shared_ptr<Sequence>
|
||||
Sequence::evalAsync()
|
||||
{
|
||||
|
|
@ -138,8 +151,16 @@ Sequence::isRecording()
|
|||
return this->mRecording;
|
||||
}
|
||||
|
||||
bool
|
||||
Sequence::isInit() {
|
||||
return this->mDevice &&
|
||||
this->mCommandPool &&
|
||||
this->mCommandBuffer &&
|
||||
this->mComputeQueue;
|
||||
}
|
||||
|
||||
void
|
||||
Sequence::freeMemoryDestroyGPUResources()
|
||||
Sequence::destroy()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence freeMemoryDestroyGPUResources called");
|
||||
|
||||
|
|
@ -189,6 +210,16 @@ Sequence::freeMemoryDestroyGPUResources()
|
|||
this->mOperations.clear();
|
||||
}
|
||||
|
||||
if (this->mDevice) {
|
||||
this->mDevice = nullptr;
|
||||
}
|
||||
if (this->mPhysicalDevice) {
|
||||
this->mPhysicalDevice = nullptr;
|
||||
}
|
||||
if (this->mComputeQueue) {
|
||||
this->mComputeQueue = nullptr;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
std::shared_ptr<Sequence>
|
||||
|
|
|
|||
|
|
@ -76,6 +76,15 @@ Tensor::tensorType()
|
|||
return this->mTensorType;
|
||||
}
|
||||
|
||||
bool
|
||||
Tensor::isInit() {
|
||||
return this->mDevice &&
|
||||
this->mPrimaryBuffer &&
|
||||
this->mPrimaryMemory &&
|
||||
this->mStagingBuffer &&
|
||||
this->mStagingMemory;
|
||||
}
|
||||
|
||||
void
|
||||
Tensor::setData(const std::vector<float>& data)
|
||||
{
|
||||
|
|
@ -429,7 +438,7 @@ Tensor::allocateBindMemory(std::shared_ptr<vk::Buffer> buffer,
|
|||
}
|
||||
|
||||
void
|
||||
Tensor::freeMemoryDestroyGPUResources()
|
||||
Tensor::destroy()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Tensor started freeMemoryDestroyGPUResources()");
|
||||
|
||||
|
|
@ -495,6 +504,10 @@ Tensor::freeMemoryDestroyGPUResources()
|
|||
}
|
||||
}
|
||||
|
||||
if (this->mDevice) {
|
||||
this->mDevice = nullptr;
|
||||
}
|
||||
|
||||
KP_LOG_DEBUG("Kompute Tensor successful freeMemoryDestroyGPUResources()");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -45,10 +45,6 @@ public:
|
|||
const Constants& specializationConstants = {},
|
||||
const Constants& pushConstants = {});
|
||||
|
||||
bool isInit();
|
||||
|
||||
void freeMemoryDestroyGPUResources();
|
||||
|
||||
/**
|
||||
* Destructor for Algorithm which is responsible for freeing and desroying
|
||||
* respective pipelines and owned parameter groups.
|
||||
|
|
@ -65,11 +61,21 @@ public:
|
|||
*/
|
||||
void recordDispatch(std::shared_ptr<vk::CommandBuffer> commandBuffer);
|
||||
|
||||
bool isInit();
|
||||
|
||||
void setWorkgroup(const Workgroup& workgroup, uint32_t minSize = 1);
|
||||
|
||||
const Workgroup& getWorkgroup();
|
||||
const Constants& getSpecializationConstants();
|
||||
const Constants& getPushConstants();
|
||||
const std::vector<std::shared_ptr<Tensor>>& getTensors();
|
||||
|
||||
void destroy();
|
||||
|
||||
private:
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
std::shared_ptr<vk::Device> mDevice;
|
||||
std::vector<std::shared_ptr<Tensor>> mTensors;
|
||||
|
||||
// -------------- OPTIONALLY OWNED RESOURCES
|
||||
std::shared_ptr<vk::DescriptorSetLayout> mDescriptorSetLayout;
|
||||
|
|
@ -100,7 +106,7 @@ private:
|
|||
void createPipeline();
|
||||
|
||||
// Parameters
|
||||
void createParameters(const std::vector<std::shared_ptr<Tensor>>& tensorParams);
|
||||
void createParameters();
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -30,6 +30,8 @@ class Manager
|
|||
* they would like to create the resources on.
|
||||
*
|
||||
* @param physicalDeviceIndex The index of the physical device to use
|
||||
* @param manageResources (Optional) Whether to manage the memory of the
|
||||
* resources created and destroy when the manager is destroyed.
|
||||
* @param familyQueueIndices (Optional) List of queue indices to add for
|
||||
* explicit allocation
|
||||
* @param totalQueues The total number of compute queues to create.
|
||||
|
|
@ -48,8 +50,7 @@ class Manager
|
|||
*/
|
||||
Manager(std::shared_ptr<vk::Instance> instance,
|
||||
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
uint32_t physicalDeviceIndex);
|
||||
std::shared_ptr<vk::Device> device);
|
||||
|
||||
/**
|
||||
* Manager destructor which would ensure all owned resources are destroyed
|
||||
|
|
@ -92,12 +93,14 @@ class Manager
|
|||
const Constants& specializationConstants = {},
|
||||
const Constants& pushConstants = {});
|
||||
|
||||
void destroy();
|
||||
void clear();
|
||||
|
||||
private:
|
||||
// -------------- OPTIONALLY OWNED RESOURCES
|
||||
std::shared_ptr<vk::Instance> mInstance = nullptr;
|
||||
bool mFreeInstance = false;
|
||||
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice = nullptr;
|
||||
uint32_t mPhysicalDeviceIndex = -1;
|
||||
std::shared_ptr<vk::Device> mDevice = nullptr;
|
||||
bool mFreeDevice = false;
|
||||
|
||||
|
|
@ -109,7 +112,7 @@ class Manager
|
|||
std::vector<uint32_t> mComputeQueueFamilyIndices;
|
||||
std::vector<std::shared_ptr<vk::Queue>> mComputeQueues;
|
||||
|
||||
uint32_t mCurrentSequenceIndex = -1;
|
||||
bool mManageResources = false;
|
||||
|
||||
#if DEBUG
|
||||
#ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS
|
||||
|
|
@ -120,7 +123,7 @@ class Manager
|
|||
|
||||
// Create functions
|
||||
void createInstance();
|
||||
void createDevice(const std::vector<uint32_t>& familyQueueIndices = {});
|
||||
void createDevice(const std::vector<uint32_t>& familyQueueIndices = {}, uint32_t hysicalDeviceIndex = 0);
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -31,6 +31,10 @@ class Sequence: public std::enable_shared_from_this<Sequence>
|
|||
*/
|
||||
~Sequence();
|
||||
|
||||
/**
|
||||
*/
|
||||
std::shared_ptr<Sequence> record(std::shared_ptr<OpBase> op);
|
||||
|
||||
/**
|
||||
* Record function for operation to be added to the GPU queue in batch. This
|
||||
* template requires classes to be derived from the OpBase class. This
|
||||
|
|
@ -41,7 +45,148 @@ class Sequence: public std::enable_shared_from_this<Sequence>
|
|||
* @param TArgs Template parameters that are used to initialise operation
|
||||
* which allows for extensible configurations on initialisation.
|
||||
*/
|
||||
std::shared_ptr<Sequence> record(std::shared_ptr<OpBase> op);
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence>
|
||||
record(std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
static_assert(std::is_base_of<OpBase, T>::value,
|
||||
"Kompute Sequence record(...) template only valid with "
|
||||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
std::shared_ptr<T> op{
|
||||
new T(tensors, std::forward<TArgs>(params)...) };
|
||||
|
||||
return this->record(op);
|
||||
}
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence>
|
||||
record(std::shared_ptr<Algorithm> algorithm, TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
static_assert(std::is_base_of<OpBase, T>::value,
|
||||
"Kompute Sequence record(...) template only valid with "
|
||||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
std::shared_ptr<T> op{
|
||||
new T(algorithm, std::forward<TArgs>(params)...) };
|
||||
|
||||
return this->record(op);
|
||||
}
|
||||
|
||||
/**
|
||||
* Eval sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job with a barrier.
|
||||
*
|
||||
* @return shared_ptr<Sequence> of the Sequence class itself
|
||||
*/
|
||||
std::shared_ptr<Sequence> eval();
|
||||
|
||||
std::shared_ptr<Sequence> eval(std::shared_ptr<OpBase> op);
|
||||
|
||||
/**
|
||||
* Eval sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job with a barrier.
|
||||
*
|
||||
* @return shared_ptr<Sequence> of the Sequence class itself
|
||||
*/
|
||||
// TODO: Aim to have only a single function with tensors/algorithm
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence>
|
||||
eval(std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
static_assert(std::is_base_of<OpBase, T>::value,
|
||||
"Kompute Sequence record(...) template only valid with "
|
||||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
std::shared_ptr<T> op{
|
||||
new T(tensors, std::forward<TArgs>(params)...) };
|
||||
|
||||
// TODO: Aim to be able to handle errors when returning without throw except
|
||||
return this->eval(op);
|
||||
}
|
||||
// Needded as otherise can't use initialiser list
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence>
|
||||
eval(std::shared_ptr<Algorithm> algorithm, TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
static_assert(std::is_base_of<OpBase, T>::value,
|
||||
"Kompute Sequence record(...) template only valid with "
|
||||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
std::shared_ptr<T> op{
|
||||
new T(algorithm, std::forward<TArgs>(params)...) };
|
||||
|
||||
return this->eval(op);
|
||||
}
|
||||
|
||||
/**
|
||||
* Eval Async sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job with a barrier. EvalAwait() must
|
||||
* be called after to ensure the sequence is terminated correctly.
|
||||
*
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
std::shared_ptr<Sequence> evalAsync();
|
||||
|
||||
/**
|
||||
* Eval sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job with a barrier.
|
||||
*
|
||||
* @return shared_ptr<Sequence> of the Sequence class itself
|
||||
*/
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence>
|
||||
evalAsync(std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
static_assert(std::is_base_of<OpBase, T>::value,
|
||||
"Kompute Sequence record(...) template only valid with "
|
||||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
std::shared_ptr<T> op{
|
||||
new T(tensors, std::forward<TArgs>(params)...) };
|
||||
|
||||
return this->evalAsync(op);
|
||||
}
|
||||
// Needed as otherwise it's not possible to use initializer lists
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence>
|
||||
evalAsync(std::shared_ptr<Algorithm> algorithm, TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
static_assert(std::is_base_of<OpBase, T>::value,
|
||||
"Kompute Sequence record(...) template only valid with "
|
||||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
std::shared_ptr<T> op{
|
||||
new T(algorithm, std::forward<TArgs>(params)...) };
|
||||
|
||||
return this->evalAsync(op);
|
||||
}
|
||||
|
||||
/**
|
||||
* Eval Await waits for the fence to finish processing and then once it
|
||||
* finishes, it runs the postEval of all operations.
|
||||
*
|
||||
* @param waitFor Number of milliseconds to wait before timing out.
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
std::shared_ptr<Sequence> evalAwait(uint64_t waitFor = UINT64_MAX);
|
||||
|
||||
/**
|
||||
* Clear function clears all operations currently recorded and starts recording again.
|
||||
|
|
@ -64,32 +209,6 @@ class Sequence: public std::enable_shared_from_this<Sequence>
|
|||
*/
|
||||
void end();
|
||||
|
||||
/**
|
||||
* Eval sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job with a barrier.
|
||||
*
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
std::shared_ptr<Sequence> eval();
|
||||
|
||||
/**
|
||||
* Eval Async sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job with a barrier. EvalAwait() must
|
||||
* be called after to ensure the sequence is terminated correctly.
|
||||
*
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
std::shared_ptr<Sequence> evalAsync();
|
||||
|
||||
/**
|
||||
* Eval Await waits for the fence to finish processing and then once it
|
||||
* finishes, it runs the postEval of all operations.
|
||||
*
|
||||
* @param waitFor Number of milliseconds to wait before timing out.
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
std::shared_ptr<Sequence> evalAwait(uint64_t waitFor = UINT64_MAX);
|
||||
|
||||
/**
|
||||
* Returns true if the sequence is currently in recording activated.
|
||||
*
|
||||
|
|
@ -97,6 +216,9 @@ class Sequence: public std::enable_shared_from_this<Sequence>
|
|||
*/
|
||||
bool isRecording();
|
||||
|
||||
|
||||
bool isInit();
|
||||
|
||||
/**
|
||||
* Returns true if the sequence is currently running - mostly used for async
|
||||
* workloads.
|
||||
|
|
@ -109,7 +231,7 @@ class Sequence: public std::enable_shared_from_this<Sequence>
|
|||
* Destroys and frees the GPU resources which include the buffer and memory
|
||||
* and sets the sequence as init=False.
|
||||
*/
|
||||
void freeMemoryDestroyGPUResources();
|
||||
void destroy();
|
||||
|
||||
private:
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
|
|
|
|||
|
|
@ -59,7 +59,9 @@ class Tensor
|
|||
/**
|
||||
* Destroys and frees the GPU resources which include the buffer and memory.
|
||||
*/
|
||||
void freeMemoryDestroyGPUResources();
|
||||
void destroy();
|
||||
|
||||
bool isInit();
|
||||
|
||||
/**
|
||||
* Returns the vector of data currently contained by the Tensor. It is
|
||||
|
|
|
|||
|
|
@ -17,8 +17,7 @@ class OpAlgoDispatch : public OpBase
|
|||
{
|
||||
public:
|
||||
|
||||
OpAlgoDispatch(const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::shared_ptr<kp::Algorithm>& algorithm);
|
||||
OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm);
|
||||
|
||||
/**
|
||||
* Default destructor, which is in charge of destroying the algorithm
|
||||
|
|
@ -50,7 +49,6 @@ class OpAlgoDispatch : public OpBase
|
|||
|
||||
private:
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::vector<std::shared_ptr<Tensor>> mTensors;
|
||||
std::shared_ptr<Algorithm> mAlgorithm;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -11,8 +11,7 @@ else()
|
|||
endif()
|
||||
|
||||
file(GLOB test_kompute_CPP
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/TestMain.cpp"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/TestWorkgroup.cpp"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/*.cpp"
|
||||
)
|
||||
|
||||
add_executable(test_kompute ${test_kompute_CPP})
|
||||
|
|
|
|||
|
|
@ -37,25 +37,32 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
|
|||
}
|
||||
)");
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
|
||||
std::vector<float> data(size, 0.0);
|
||||
std::vector<float> resultSync(size, 100000000);
|
||||
std::vector<float> resultAsync(size, 100000000);
|
||||
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
|
||||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> inputsSyncB;
|
||||
std::vector<std::shared_ptr<kp::Algorithm>> algorithms;
|
||||
|
||||
for (uint32_t i = 0; i < numParallel; i++) {
|
||||
inputsSyncB.push_back(std::make_shared<kp::Tensor>(kp::Tensor(data)));
|
||||
inputsSyncB.push_back(mgr.tensor(data));
|
||||
algorithms.push_back(mgr.algorithm({ inputsSyncB[i] }, spirv));
|
||||
}
|
||||
|
||||
mgr.rebuild(inputsSyncB);
|
||||
sq->eval<kp::OpTensorSyncDevice>(inputsSyncB);
|
||||
|
||||
mgr.sequence()->eval<kp::OpTensorSyncDevice>(inputsSyncB);
|
||||
|
||||
auto startSync = std::chrono::high_resolution_clock::now();
|
||||
|
||||
for (uint32_t i = 0; i < numParallel; i++) {
|
||||
mgr.evalOpDefault<kp::OpAlgoCreate>(
|
||||
{ inputsSyncB[i] }, kp::Shader::compile_source(shader));
|
||||
sq->eval<kp::OpAlgoDispatch>(algorithms[i]);
|
||||
}
|
||||
|
||||
auto endSync = std::chrono::high_resolution_clock::now();
|
||||
|
|
@ -63,7 +70,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
|
|||
std::chrono::duration_cast<std::chrono::microseconds>(endSync - startSync)
|
||||
.count();
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>(inputsSyncB);
|
||||
sq->eval<kp::OpTensorSyncLocal>(inputsSyncB);
|
||||
|
||||
for (uint32_t i = 0; i < numParallel; i++) {
|
||||
EXPECT_EQ(inputsSyncB[i]->data(), resultSync);
|
||||
|
|
@ -74,26 +81,23 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
|
|||
std::vector<std::shared_ptr<kp::Tensor>> inputsAsyncB;
|
||||
|
||||
for (uint32_t i = 0; i < numParallel; i++) {
|
||||
inputsAsyncB.push_back(std::make_shared<kp::Tensor>(kp::Tensor(data)));
|
||||
inputsAsyncB.push_back(mgr.tensor(data));
|
||||
}
|
||||
|
||||
mgrAsync.rebuild(inputsAsyncB);
|
||||
std::vector<std::shared_ptr<kp::Sequence>> sqs;
|
||||
|
||||
for (uint32_t i = 0; i < numParallel; i++) {
|
||||
mgrAsync.sequence("async" + std::to_string(i), i);
|
||||
sqs.push_back(mgrAsync.sequence(i));
|
||||
}
|
||||
|
||||
auto startAsync = std::chrono::high_resolution_clock::now();
|
||||
|
||||
for (uint32_t i = 0; i < numParallel; i++) {
|
||||
mgrAsync.evalOpAsync<kp::OpAlgoCreate>(
|
||||
{ inputsAsyncB[i] },
|
||||
"async" + std::to_string(i),
|
||||
kp::Shader::compile_source(shader));
|
||||
sqs[i]->evalAsync<kp::OpAlgoDispatch>(algorithms[i]);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < numParallel; i++) {
|
||||
mgrAsync.evalOpAwait("async" + std::to_string(i));
|
||||
sqs[i]->evalAwait();
|
||||
}
|
||||
|
||||
auto endAsync = std::chrono::high_resolution_clock::now();
|
||||
|
|
@ -101,7 +105,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
|
|||
endAsync - startAsync)
|
||||
.count();
|
||||
|
||||
mgrAsync.evalOpDefault<kp::OpTensorSyncLocal>({ inputsAsyncB });
|
||||
sq->eval<kp::OpTensorSyncLocal>({ inputsAsyncB });
|
||||
|
||||
for (uint32_t i = 0; i < numParallel; i++) {
|
||||
EXPECT_EQ(inputsAsyncB[i]->data(), resultAsync);
|
||||
|
|
@ -138,32 +142,32 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution)
|
|||
}
|
||||
)");
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
|
||||
std::vector<float> data(size, 0.0);
|
||||
std::vector<float> resultAsync(size, 100000000);
|
||||
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(data) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(data) };
|
||||
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(data);
|
||||
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor(data);
|
||||
|
||||
mgr.sequence("asyncOne");
|
||||
mgr.sequence("asyncTwo");
|
||||
std::shared_ptr<kp::Sequence> sq1 = mgr.sequence();
|
||||
std::shared_ptr<kp::Sequence> sq2 = mgr.sequence();
|
||||
|
||||
mgr.rebuild({ tensorA, tensorB });
|
||||
sq1->eval<kp::OpTensorSyncLocal>({ tensorA, tensorB });
|
||||
|
||||
std::vector<uint32_t> result = kp::Shader::compile_source(shader);
|
||||
std::shared_ptr<kp::Algorithm> algo1 = mgr.algorithm({tensorA});
|
||||
std::shared_ptr<kp::Algorithm> algo2 = mgr.algorithm({tensorB});
|
||||
|
||||
mgr.evalOpAsync<kp::OpAlgoCreate>(
|
||||
{ tensorA }, "asyncOne", kp::Shader::compile_source(shader));
|
||||
sq1->evalAsync<kp::OpAlgoDispatch>(algo1);
|
||||
sq2->evalAsync<kp::OpAlgoDispatch>(algo2);
|
||||
|
||||
mgr.evalOpAsync<kp::OpAlgoCreate>(
|
||||
{ tensorB }, "asyncTwo", kp::Shader::compile_source(shader));
|
||||
sq1->evalAwait();
|
||||
sq2->evalAwait();
|
||||
|
||||
mgr.evalOpAwait("asyncOne");
|
||||
mgr.evalOpAwait("asyncTwo");
|
||||
|
||||
mgr.evalOpAsyncDefault<kp::OpTensorSyncLocal>({ tensorA, tensorB });
|
||||
mgr.evalOpAwaitDefault();
|
||||
sq1->evalAsync<kp::OpTensorSyncLocal>({ tensorA, tensorB });
|
||||
sq1->evalAwait();
|
||||
|
||||
EXPECT_EQ(tensorA->data(), resultAsync);
|
||||
EXPECT_EQ(tensorB->data(), resultAsync);
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
TEST(TestDestroy, TestDestroyTensorSingle)
|
||||
{
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorA = nullptr;
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
|
|
@ -16,37 +16,36 @@ TEST(TestDestroy, TestDestroyTensorSingle)
|
|||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq = nullptr;
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
mgr.rebuild({ tensorA });
|
||||
tensorA = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
sq = mgr.sequence();
|
||||
std::shared_ptr<kp::Algorithm> algo =
|
||||
mgr.algorithm({ tensorA }, spirv);
|
||||
|
||||
sq->begin();
|
||||
sq->record<kp::OpAlgoCreate>(
|
||||
{ tensorA }, kp::Shader::compile_source(shader));
|
||||
sq->end();
|
||||
|
||||
sq->eval();
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
|
||||
|
||||
mgr.destroy(tensorA);
|
||||
mgr.sequence()
|
||||
->record<kp::OpAlgoDispatch>(algo)
|
||||
->eval()
|
||||
->eval<kp::OpTensorSyncLocal>(algo->getTensors());
|
||||
|
||||
tensorA->destroy();
|
||||
EXPECT_FALSE(tensorA->isInit());
|
||||
}
|
||||
EXPECT_FALSE(tensorA->isInit());
|
||||
}
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
|
||||
}
|
||||
|
||||
TEST(TestDestroy, TestDestroyTensorVector)
|
||||
{
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 1, 1, 1 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 1, 1, 1 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorA = nullptr;
|
||||
std::shared_ptr<kp::Tensor> tensorB = nullptr;
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
|
|
@ -58,6 +57,7 @@ TEST(TestDestroy, TestDestroyTensorVector)
|
|||
pa[index] = pa[index] + 1;
|
||||
pb[index] = pb[index] + 2;
|
||||
})");
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq = nullptr;
|
||||
|
|
@ -65,20 +65,20 @@ TEST(TestDestroy, TestDestroyTensorVector)
|
|||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
mgr.rebuild({ tensorA, tensorB });
|
||||
tensorA = mgr.tensor({ 1, 1, 1 });
|
||||
tensorB = mgr.tensor({ 1, 1, 1 });
|
||||
|
||||
sq = mgr.sequence();
|
||||
std::shared_ptr<kp::Algorithm> algo =
|
||||
mgr.algorithm({tensorA, tensorB}, spirv);
|
||||
|
||||
sq->begin();
|
||||
sq->record<kp::OpAlgoCreate>(
|
||||
{ tensorA, tensorB }, kp::Shader::compile_source(shader));
|
||||
sq->end();
|
||||
mgr.sequence()
|
||||
->record<kp::OpTensorSyncDevice>(algo->getTensors())
|
||||
->record<kp::OpAlgoDispatch>(algo)
|
||||
->record<kp::OpTensorSyncDevice>(algo->getTensors())
|
||||
->eval();
|
||||
|
||||
sq->eval();
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA, tensorB });
|
||||
|
||||
mgr.destroy({ tensorA, tensorB });
|
||||
tensorA->destroy();
|
||||
tensorB->destroy();
|
||||
|
||||
EXPECT_FALSE(tensorA->isInit());
|
||||
EXPECT_FALSE(tensorB->isInit());
|
||||
|
|
@ -88,32 +88,9 @@ TEST(TestDestroy, TestDestroyTensorVector)
|
|||
EXPECT_EQ(tensorB->data(), std::vector<float>({ 3, 3, 3 }));
|
||||
}
|
||||
|
||||
TEST(TestDestroy, TestDestroyTensorVectorUninitialised)
|
||||
{
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 1, 1, 1 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 1, 1, 1 }) };
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq = nullptr;
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
mgr.rebuild({ tensorA, tensorB });
|
||||
|
||||
mgr.destroy({ tensorA, tensorB });
|
||||
|
||||
EXPECT_FALSE(tensorA->isInit());
|
||||
EXPECT_FALSE(tensorB->isInit());
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
|
||||
}
|
||||
|
||||
TEST(TestDestroy, TestDestroySequenceSingle)
|
||||
{
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorA = nullptr;
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
|
|
@ -124,26 +101,21 @@ TEST(TestDestroy, TestDestroySequenceSingle)
|
|||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq = nullptr;
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
mgr.rebuild({ tensorA });
|
||||
tensorA = mgr.tensor({0, 0, 0});
|
||||
|
||||
sq = mgr.sequence();
|
||||
|
||||
sq->begin();
|
||||
sq->record<kp::OpAlgoCreate>(
|
||||
{ tensorA }, kp::Shader::compile_source(shader));
|
||||
sq->end();
|
||||
|
||||
sq->eval();
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
|
||||
|
||||
mgr.destroy(sq);
|
||||
mgr.sequence()
|
||||
->record<kp::OpTensorSyncDevice>({tensorA})
|
||||
->record<kp::OpAlgoDispatch>(mgr.algorithm({tensorA}, spirv))
|
||||
->record<kp::OpTensorSyncLocal>({tensorA})
|
||||
->eval();
|
||||
|
||||
EXPECT_FALSE(sq->isInit());
|
||||
}
|
||||
|
|
@ -151,220 +123,3 @@ TEST(TestDestroy, TestDestroySequenceSingle)
|
|||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
|
||||
}
|
||||
|
||||
TEST(TestDestroy, TestDestroySequenceVector)
|
||||
{
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
layout (local_size_x = 1) in;
|
||||
layout(set = 0, binding = 0) buffer a { float pa[]; };
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq1 = nullptr;
|
||||
std::shared_ptr<kp::Sequence> sq2 = nullptr;
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
mgr.rebuild({ tensorA });
|
||||
|
||||
sq1 = mgr.sequence("One");
|
||||
sq1->begin();
|
||||
sq1->record<kp::OpAlgoCreate>(
|
||||
{ tensorA }, kp::Shader::compile_source(shader));
|
||||
sq1->end();
|
||||
sq1->eval();
|
||||
|
||||
sq2 = mgr.sequence("Two");
|
||||
sq2->begin();
|
||||
sq2->record<kp::OpAlgoCreate>(
|
||||
{ tensorA }, kp::Shader::compile_source(shader));
|
||||
sq2->end();
|
||||
sq2->eval();
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
|
||||
|
||||
mgr.destroy({ sq1, sq2 });
|
||||
|
||||
EXPECT_FALSE(sq1->isInit());
|
||||
EXPECT_FALSE(sq2->isInit());
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 2, 2, 2 }));
|
||||
}
|
||||
|
||||
TEST(TestDestroy, TestDestroySequenceNameSingleInsideManager)
|
||||
{
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
layout (local_size_x = 1) in;
|
||||
layout(set = 0, binding = 0) buffer a { float pa[]; };
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
{
|
||||
mgr.rebuild({ tensorA });
|
||||
|
||||
mgr.evalOp<kp::OpAlgoCreate>(
|
||||
{ tensorA }, "one",
|
||||
kp::Shader::compile_source(shader));
|
||||
|
||||
mgr.evalOp<kp::OpAlgoCreate>(
|
||||
{ tensorA }, "two",
|
||||
kp::Shader::compile_source(shader));
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
|
||||
|
||||
mgr.destroy("one");
|
||||
mgr.destroy("two");
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 2, 2, 2 }));
|
||||
}
|
||||
|
||||
TEST(TestDestroy, TestDestroySequenceNameSingleOutsideManager)
|
||||
{
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
layout (local_size_x = 1) in;
|
||||
layout(set = 0, binding = 0) buffer a { float pa[]; };
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
{
|
||||
std::shared_ptr<kp::Sequence> sq1 = nullptr;
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
mgr.rebuild({ tensorA });
|
||||
|
||||
sq1 = mgr.sequence("One");
|
||||
sq1->begin();
|
||||
sq1->record<kp::OpAlgoCreate>(
|
||||
{ tensorA }, kp::Shader::compile_source(shader));
|
||||
sq1->end();
|
||||
sq1->eval();
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
|
||||
|
||||
mgr.destroy("One");
|
||||
|
||||
EXPECT_FALSE(sq1->isInit());
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
|
||||
}
|
||||
|
||||
TEST(TestDestroy, TestDestroySequenceNameVectorInsideManager)
|
||||
{
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
layout (local_size_x = 1) in;
|
||||
layout(set = 0, binding = 0) buffer a { float pa[]; };
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
{
|
||||
mgr.rebuild({ tensorA });
|
||||
|
||||
mgr.evalOp<kp::OpAlgoCreate>(
|
||||
{ tensorA }, "one",
|
||||
kp::Shader::compile_source(shader));
|
||||
|
||||
mgr.evalOp<kp::OpAlgoCreate>(
|
||||
{ tensorA }, "two",
|
||||
kp::Shader::compile_source(shader));
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
|
||||
|
||||
mgr.destroy(std::vector<std::string>({"one", "two"}));
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 2, 2, 2 }));
|
||||
}
|
||||
|
||||
TEST(TestDestroy, TestDestroySequenceNameVectorOutsideManager)
|
||||
{
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
layout (local_size_x = 1) in;
|
||||
layout(set = 0, binding = 0) buffer a { float pa[]; };
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
{
|
||||
mgr.rebuild({ tensorA });
|
||||
|
||||
mgr.evalOp<kp::OpAlgoCreate>(
|
||||
{ tensorA }, "one",
|
||||
kp::Shader::compile_source(shader));
|
||||
|
||||
mgr.evalOp<kp::OpAlgoCreate>(
|
||||
{ tensorA }, "two",
|
||||
kp::Shader::compile_source(shader));
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
|
||||
|
||||
mgr.destroy(std::vector<std::string>({"one", "two"}));
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 2, 2, 2 }));
|
||||
}
|
||||
|
||||
TEST(TestDestroy, TestDestroySequenceNameDefaultOutsideManager)
|
||||
{
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
layout (local_size_x = 1) in;
|
||||
layout(set = 0, binding = 0) buffer a { float pa[]; };
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
pa[index] = pa[index] + 1;
|
||||
})");
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
{
|
||||
mgr.rebuild({ tensorA });
|
||||
|
||||
mgr.evalOpDefault<kp::OpAlgoCreate>(
|
||||
{ tensorA },
|
||||
kp::Shader::compile_source(shader));
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
|
||||
|
||||
mgr.destroy(KP_DEFAULT_SESSION);
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,47 +11,40 @@ TEST(TestLogisticRegression, TestMainLogisticRegression)
|
|||
uint32_t ITERATIONS = 100;
|
||||
float learningRate = 0.1;
|
||||
|
||||
std::shared_ptr<kp::Tensor> xI{ new kp::Tensor({ 0, 1, 1, 1, 1 }) };
|
||||
std::shared_ptr<kp::Tensor> xJ{ new kp::Tensor({ 0, 0, 0, 1, 1 }) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> y{ new kp::Tensor({ 0, 0, 0, 1, 1 }) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> wIn{ new kp::Tensor({ 0.001, 0.001 }) };
|
||||
std::shared_ptr<kp::Tensor> wOutI{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
|
||||
std::shared_ptr<kp::Tensor> wOutJ{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> bIn{ new kp::Tensor({ 0 }) };
|
||||
std::shared_ptr<kp::Tensor> bOut{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> lOut{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
|
||||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { xI, xJ, y,
|
||||
wIn, wOutI, wOutJ,
|
||||
bIn, bOut, lOut };
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
mgr.rebuild(params);
|
||||
std::shared_ptr<kp::Tensor> xI = mgr.tensor({ 0, 1, 1, 1, 1 });
|
||||
std::shared_ptr<kp::Tensor> xJ = mgr.tensor({ 0, 0, 0, 1, 1 });
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
|
||||
std::shared_ptr<kp::Tensor> y = mgr.tensor({ 0, 0, 0, 1, 1 });
|
||||
|
||||
// Record op algo base
|
||||
sq->begin();
|
||||
std::shared_ptr<kp::Tensor> wIn = mgr.tensor({ 0.001, 0.001 });
|
||||
std::shared_ptr<kp::Tensor> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
std::shared_ptr<kp::Tensor> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
|
||||
sq->record<kp::OpTensorSyncDevice>({ wIn, bIn });
|
||||
std::shared_ptr<kp::Tensor> bIn = mgr.tensor({ 0 });
|
||||
std::shared_ptr<kp::Tensor> bOut = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
|
||||
sq->record<kp::OpAlgoCreate>(
|
||||
params,
|
||||
std::vector<uint32_t>(
|
||||
std::shared_ptr<kp::Tensor> lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { xI, xJ, y,
|
||||
wIn, wOutI, wOutJ,
|
||||
bIn, bOut, lOut };
|
||||
|
||||
std::vector<uint32_t> spirv = std::vector<uint32_t>(
|
||||
(uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
|
||||
(uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv +
|
||||
kp::shader_data::shaders_glsl_logisticregression_comp_spv_len)),
|
||||
kp::Workgroup(), kp::Constants({5.0}));
|
||||
kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));
|
||||
|
||||
sq->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
|
||||
std::shared_ptr<kp::Algorithm> algorithm =
|
||||
mgr.algorithm(params, spirv, kp::Workgroup(), kp::Constants({5.0}));
|
||||
|
||||
sq->end();
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.sequence()
|
||||
->record<kp::OpTensorSyncDevice>({ wIn, bIn })
|
||||
->record<kp::OpAlgoDispatch>(algorithm)
|
||||
->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
|
||||
|
||||
// Iterate across all expected iterations
|
||||
for (size_t i = 0; i < ITERATIONS; i++) {
|
||||
|
|
@ -64,21 +57,21 @@ TEST(TestLogisticRegression, TestMainLogisticRegression)
|
|||
bIn->data()[0] -= learningRate * bOut->data()[j];
|
||||
}
|
||||
}
|
||||
|
||||
// Based on the inputs the outputs should be at least:
|
||||
// * wi < 0.01
|
||||
// * wj > 1.0
|
||||
// * b < 0
|
||||
// TODO: Add EXPECT_DOUBLE_EQ instead
|
||||
EXPECT_LT(wIn->data()[0], 0.01);
|
||||
EXPECT_GT(wIn->data()[1], 1.0);
|
||||
EXPECT_LT(bIn->data()[0], 0.0);
|
||||
|
||||
KP_LOG_WARN("Result wIn i: {}, wIn j: {}, bIn: {}",
|
||||
wIn->data()[0],
|
||||
wIn->data()[1],
|
||||
bIn->data()[0]);
|
||||
}
|
||||
|
||||
// Based on the inputs the outputs should be at least:
|
||||
// * wi < 0.01
|
||||
// * wj > 1.0
|
||||
// * b < 0
|
||||
// TODO: Add EXPECT_DOUBLE_EQ instead
|
||||
EXPECT_LT(wIn->data()[0], 0.01);
|
||||
EXPECT_GT(wIn->data()[1], 1.0);
|
||||
EXPECT_LT(bIn->data()[0], 0.0);
|
||||
|
||||
KP_LOG_WARN("Result wIn i: {}, wIn j: {}, bIn: {}",
|
||||
wIn->data()[0],
|
||||
wIn->data()[1],
|
||||
bIn->data()[0]);
|
||||
}
|
||||
|
||||
TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy)
|
||||
|
|
@ -87,50 +80,43 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy)
|
|||
uint32_t ITERATIONS = 100;
|
||||
float learningRate = 0.1;
|
||||
|
||||
kp::Constants wInVec = { 0.001, 0.001 };
|
||||
std::vector<float> bInVec = { 0 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> xI{ new kp::Tensor({ 0, 1, 1, 1, 1 }) };
|
||||
std::shared_ptr<kp::Tensor> xJ{ new kp::Tensor({ 0, 0, 0, 1, 1 }) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> y{ new kp::Tensor({ 0, 0, 0, 1, 1 }) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> wIn{ new kp::Tensor(
|
||||
wInVec, kp::Tensor::TensorTypes::eHost) };
|
||||
std::shared_ptr<kp::Tensor> wOutI{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
|
||||
std::shared_ptr<kp::Tensor> wOutJ{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> bIn{ new kp::Tensor(
|
||||
bInVec, kp::Tensor::TensorTypes::eHost) };
|
||||
std::shared_ptr<kp::Tensor> bOut{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
|
||||
|
||||
std::shared_ptr<kp::Tensor> lOut{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
|
||||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { xI, xJ, y,
|
||||
wIn, wOutI, wOutJ,
|
||||
bIn, bOut, lOut };
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
mgr.rebuild(params);
|
||||
std::shared_ptr<kp::Tensor> xI = mgr.tensor({ 0, 1, 1, 1, 1 });
|
||||
std::shared_ptr<kp::Tensor> xJ = mgr.tensor({ 0, 0, 0, 1, 1 });
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
|
||||
std::shared_ptr<kp::Tensor> y = mgr.tensor({ 0, 0, 0, 1, 1 });
|
||||
|
||||
// Record op algo base
|
||||
sq->begin();
|
||||
std::shared_ptr<kp::Tensor> wIn = mgr.tensor(
|
||||
{ 0.001, 0.001 }, kp::Tensor::TensorTypes::eHost);
|
||||
std::shared_ptr<kp::Tensor> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
std::shared_ptr<kp::Tensor> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
|
||||
sq->record<kp::OpAlgoCreate>(
|
||||
params,
|
||||
std::vector<uint32_t>(
|
||||
std::shared_ptr<kp::Tensor> bIn = mgr.tensor(
|
||||
{ 0 },
|
||||
kp::Tensor::TensorTypes::eHost);
|
||||
std::shared_ptr<kp::Tensor> bOut = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
|
||||
std::shared_ptr<kp::Tensor> lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { xI, xJ, y,
|
||||
wIn, wOutI, wOutJ,
|
||||
bIn, bOut, lOut };
|
||||
|
||||
std::vector<uint32_t> spirv = std::vector<uint32_t>(
|
||||
(uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
|
||||
(uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv +
|
||||
kp::shader_data::shaders_glsl_logisticregression_comp_spv_len)),
|
||||
kp::Workgroup(), kp::Constants({5.0}));
|
||||
kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));
|
||||
|
||||
sq->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
|
||||
std::shared_ptr<kp::Algorithm> algorithm =
|
||||
mgr.algorithm(params, spirv, kp::Workgroup(), kp::Constants({5.0}));
|
||||
|
||||
sq->end();
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.sequence()
|
||||
->record<kp::OpTensorSyncDevice>({ wIn, bIn })
|
||||
->record<kp::OpAlgoDispatch>(algorithm)
|
||||
->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
|
||||
|
||||
// Iterate across all expected iterations
|
||||
for (size_t i = 0; i < ITERATIONS; i++) {
|
||||
|
|
@ -145,7 +131,6 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy)
|
|||
wIn->mapDataIntoHostMemory();
|
||||
bIn->mapDataIntoHostMemory();
|
||||
}
|
||||
}
|
||||
|
||||
// Based on the inputs the outputs should be at least:
|
||||
// * wi < 0.01
|
||||
|
|
@ -160,4 +145,5 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy)
|
|||
wIn->data()[0],
|
||||
wIn->data()[1],
|
||||
bIn->data()[0]);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,9 +3,6 @@
|
|||
|
||||
#include "kompute/Kompute.hpp"
|
||||
|
||||
#include "kompute_test/shaders/shadertest_workgroup.hpp"
|
||||
|
||||
|
||||
TEST(TestWorkgroup, TestSimpleWorkgroup)
|
||||
{
|
||||
std::shared_ptr<kp::Tensor> tensorA = nullptr;
|
||||
|
|
@ -31,9 +28,9 @@ TEST(TestWorkgroup, TestSimpleWorkgroup)
|
|||
std::shared_ptr<kp::Algorithm> algorithm = mgr.algorithm(params, spirv, workgroup);
|
||||
|
||||
sq = mgr.sequence();
|
||||
sq->record(std::make_shared<kp::OpTensorSyncDevice>(params));
|
||||
sq->record(std::make_shared<kp::OpAlgoDispatch>(params, algorithm));
|
||||
sq->record(std::make_shared<kp::OpTensorSyncLocal>(params));
|
||||
sq->record<kp::OpTensorSyncDevice>(params);
|
||||
sq->record<kp::OpAlgoDispatch>(params, algorithm);
|
||||
sq->record<kp::OpTensorSyncLocal>(params);
|
||||
sq->eval();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue