diff --git a/single_include/AggregateHeaders.cpp b/single_include/AggregateHeaders.cpp index da1eaabf0..d47892cf1 100644 --- a/single_include/AggregateHeaders.cpp +++ b/single_include/AggregateHeaders.cpp @@ -10,5 +10,6 @@ #include "kompute/operations/OpTensorCopy.hpp" #include "kompute/operations/OpTensorSyncDevice.hpp" #include "kompute/operations/OpTensorSyncLocal.hpp" +#include "kompute/operations/OpAlgoDispatch.hpp" #include "kompute/Algorithm.hpp" #include "kompute/Tensor.hpp" diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index 629ff6a4b..01a2e7522 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -928,7 +928,9 @@ class Tensor /** * Destroys and frees the GPU resources which include the buffer and memory. */ - void freeMemoryDestroyGPUResources(); + void destroy(); + + bool isInit(); /** * Returns the vector of data currently contained by the Tensor. It is @@ -1129,10 +1131,6 @@ public: const Constants& specializationConstants = {}, const Constants& pushConstants = {}); - bool isInit(); - - void freeMemoryDestroyGPUResources(); - /** * Destructor for Algorithm which is responsible for freeing and desroying * respective pipelines and owned parameter groups. @@ -1149,11 +1147,21 @@ public: */ void recordDispatch(std::shared_ptr commandBuffer); + bool isInit(); + void setWorkgroup(const Workgroup& workgroup, uint32_t minSize = 1); + const Workgroup& getWorkgroup(); + const Constants& getSpecializationConstants(); + const Constants& getPushConstants(); + const std::vector>& getTensors(); + + void destroy(); + private: // -------------- NEVER OWNED RESOURCES std::shared_ptr mDevice; + std::vector> mTensors; // -------------- OPTIONALLY OWNED RESOURCES std::shared_ptr mDescriptorSetLayout; @@ -1184,7 +1192,7 @@ private: void createPipeline(); // Parameters - void createParameters(const std::vector>& tensorParams); + void createParameters(); }; } // End namespace kp @@ -1270,6 +1278,10 @@ class Sequence: public std::enable_shared_from_this */ ~Sequence(); + /** + */ + std::shared_ptr record(std::shared_ptr op); + /** * Record function for operation to be added to the GPU queue in batch. This * template requires classes to be derived from the OpBase class. This @@ -1280,7 +1292,146 @@ class Sequence: public std::enable_shared_from_this * @param TArgs Template parameters that are used to initialise operation * which allows for extensible configurations on initialisation. */ - std::shared_ptr record(std::shared_ptr op); + template + std::shared_ptr + record(std::vector> tensors, TArgs&&... params) + { + KP_LOG_DEBUG("Kompute Sequence record function started"); + + static_assert(std::is_base_of::value, + "Kompute Sequence record(...) template only valid with " + "OpBase derived classes"); + + KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance"); + std::shared_ptr op{ + new T(tensors, std::forward(params)...) }; + + return this->record(op); + } + template + std::shared_ptr + record(std::shared_ptr algorithm, TArgs&&... params) + { + KP_LOG_DEBUG("Kompute Sequence record function started"); + + static_assert(std::is_base_of::value, + "Kompute Sequence record(...) template only valid with " + "OpBase derived classes"); + + KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance"); + std::shared_ptr op{ + new T(algorithm, std::forward(params)...) }; + + return this->record(op); + } + + /** + * Eval sends all the recorded and stored operations in the vector of + * operations into the gpu as a submit job with a barrier. + * + * @return shared_ptr of the Sequence class itself + */ + std::shared_ptr eval(); + + std::shared_ptr eval(std::shared_ptr op); + + /** + * Eval sends all the recorded and stored operations in the vector of + * operations into the gpu as a submit job with a barrier. + * + * @return shared_ptr of the Sequence class itself + */ + template + std::shared_ptr + eval(std::vector> tensors, TArgs&&... params) + { + KP_LOG_DEBUG("Kompute Sequence record function started"); + + static_assert(std::is_base_of::value, + "Kompute Sequence record(...) template only valid with " + "OpBase derived classes"); + + KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance"); + std::shared_ptr op{ + new T(tensors, std::forward(params)...) }; + + return this->eval(op); + } + // Needded as otherise can't use initialiser list + template + std::shared_ptr + eval(std::shared_ptr algorithm, TArgs&&... params) + { + KP_LOG_DEBUG("Kompute Sequence record function started"); + + static_assert(std::is_base_of::value, + "Kompute Sequence record(...) template only valid with " + "OpBase derived classes"); + + KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance"); + std::shared_ptr op{ + new T(algorithm, std::forward(params)...) }; + + return this->eval(op); + } + + /** + * Eval Async sends all the recorded and stored operations in the vector of + * operations into the gpu as a submit job with a barrier. EvalAwait() must + * be called after to ensure the sequence is terminated correctly. + * + * @return Boolean stating whether execution was successful. + */ + std::shared_ptr evalAsync(); + + /** + * Eval sends all the recorded and stored operations in the vector of + * operations into the gpu as a submit job with a barrier. + * + * @return shared_ptr of the Sequence class itself + */ + template + std::shared_ptr + evalAsync(std::vector> tensors, TArgs&&... params) + { + KP_LOG_DEBUG("Kompute Sequence record function started"); + + static_assert(std::is_base_of::value, + "Kompute Sequence record(...) template only valid with " + "OpBase derived classes"); + + KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance"); + std::shared_ptr op{ + new T(tensors, std::forward(params)...) }; + + return this->evalAsync(op); + } + // Needed as otherwise it's not possible to use initializer lists + template + std::shared_ptr + evalAsync(std::shared_ptr algorithm, TArgs&&... params) + { + KP_LOG_DEBUG("Kompute Sequence record function started"); + + static_assert(std::is_base_of::value, + "Kompute Sequence record(...) template only valid with " + "OpBase derived classes"); + + KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance"); + std::shared_ptr op{ + new T(algorithm, std::forward(params)...) }; + + return this->evalAsync(op); + } + + /** + * Eval Await waits for the fence to finish processing and then once it + * finishes, it runs the postEval of all operations. + * + * @param waitFor Number of milliseconds to wait before timing out. + * @return Boolean stating whether execution was successful. + */ + std::shared_ptr evalAwait(uint64_t waitFor = UINT64_MAX); /** * Clear function clears all operations currently recorded and starts recording again. @@ -1303,32 +1454,6 @@ class Sequence: public std::enable_shared_from_this */ void end(); - /** - * Eval sends all the recorded and stored operations in the vector of - * operations into the gpu as a submit job with a barrier. - * - * @return Boolean stating whether execution was successful. - */ - std::shared_ptr eval(); - - /** - * Eval Async sends all the recorded and stored operations in the vector of - * operations into the gpu as a submit job with a barrier. EvalAwait() must - * be called after to ensure the sequence is terminated correctly. - * - * @return Boolean stating whether execution was successful. - */ - std::shared_ptr evalAsync(); - - /** - * Eval Await waits for the fence to finish processing and then once it - * finishes, it runs the postEval of all operations. - * - * @param waitFor Number of milliseconds to wait before timing out. - * @return Boolean stating whether execution was successful. - */ - std::shared_ptr evalAwait(uint64_t waitFor = UINT64_MAX); - /** * Returns true if the sequence is currently in recording activated. * @@ -1336,6 +1461,8 @@ class Sequence: public std::enable_shared_from_this */ bool isRecording(); + bool isInit(); + /** * Returns true if the sequence is currently running - mostly used for async * workloads. @@ -1348,7 +1475,7 @@ class Sequence: public std::enable_shared_from_this * Destroys and frees the GPU resources which include the buffer and memory * and sets the sequence as init=False. */ - void freeMemoryDestroyGPUResources(); + void destroy(); private: // -------------- NEVER OWNED RESOURCES @@ -1444,6 +1571,8 @@ class Manager * they would like to create the resources on. * * @param physicalDeviceIndex The index of the physical device to use + * @param manageResources (Optional) Whether to manage the memory of the + * resources created and destroy when the manager is destroyed. * @param familyQueueIndices (Optional) List of queue indices to add for * explicit allocation * @param totalQueues The total number of compute queues to create. @@ -1462,8 +1591,7 @@ class Manager */ Manager(std::shared_ptr instance, std::shared_ptr physicalDevice, - std::shared_ptr device, - uint32_t physicalDeviceIndex); + std::shared_ptr device); /** * Manager destructor which would ensure all owned resources are destroyed @@ -1506,12 +1634,14 @@ class Manager const Constants& specializationConstants = {}, const Constants& pushConstants = {}); + void destroy(); + void clear(); + private: // -------------- OPTIONALLY OWNED RESOURCES std::shared_ptr mInstance = nullptr; bool mFreeInstance = false; std::shared_ptr mPhysicalDevice = nullptr; - uint32_t mPhysicalDeviceIndex = -1; std::shared_ptr mDevice = nullptr; bool mFreeDevice = false; @@ -1523,7 +1653,7 @@ class Manager std::vector mComputeQueueFamilyIndices; std::vector> mComputeQueues; - uint32_t mCurrentSequenceIndex = -1; + bool mManageResources = false; #if DEBUG #ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS @@ -1534,7 +1664,7 @@ class Manager // Create functions void createInstance(); - void createDevice(const std::vector& familyQueueIndices = {}); + void createDevice(const std::vector& familyQueueIndices = {}, uint32_t hysicalDeviceIndex = 0); }; } // End namespace kp @@ -1553,8 +1683,7 @@ class OpAlgoDispatch : public OpBase { public: - OpAlgoDispatch(const std::vector>& tensors, - const std::shared_ptr& algorithm); + OpAlgoDispatch(const std::shared_ptr& algorithm); /** * Default destructor, which is in charge of destroying the algorithm @@ -1586,7 +1715,6 @@ class OpAlgoDispatch : public OpBase private: // -------------- ALWAYS OWNED RESOURCES - std::vector> mTensors; std::shared_ptr mAlgorithm; }; diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp index 6ebb08efe..aee9ddd36 100644 --- a/src/Algorithm.cpp +++ b/src/Algorithm.cpp @@ -22,7 +22,7 @@ Algorithm::~Algorithm() { KP_LOG_DEBUG("Kompute Algorithm Destructor started"); - this->freeMemoryDestroyGPUResources(); + this->destroy(); } void @@ -35,23 +35,35 @@ Algorithm::rebuild( { KP_LOG_DEBUG("Kompute Algorithm rebuild started"); - this->setWorkgroup(workgroup); + this->mTensors = tensors; this->mSpirv = spirv; this->mSpecializationConstants = specializationConstants; this->mPushConstants = pushConstants; + this->setWorkgroup(workgroup); // Descriptor pool is created first so if available then destroy all before rebuild if (this->mFreeDescriptorPool) { - this->freeMemoryDestroyGPUResources(); + this->destroy(); } - this->createParameters(tensors); + this->createParameters(); this->createShaderModule(); this->createPipeline(); } +bool +Algorithm::isInit() { + return this->mPipeline && + this->mPipelineCache && + this->mPipelineLayout && + this->mDescriptorPool && + this->mDescriptorSet && + this->mDescriptorSetLayout && + this->mShaderModule; +} + void -Algorithm::freeMemoryDestroyGPUResources() { +Algorithm::destroy() { if (!this->mDevice) { KP_LOG_WARN( @@ -68,6 +80,7 @@ Algorithm::freeMemoryDestroyGPUResources() { this->mDevice->destroy( *this->mPipeline, (vk::Optional)nullptr); + this->mPipeline = nullptr; } if (this->mFreePipelineCache) { @@ -79,6 +92,7 @@ Algorithm::freeMemoryDestroyGPUResources() { this->mDevice->destroy( *this->mPipelineCache, (vk::Optional)nullptr); + this->mPipelineCache = nullptr; } if (this->mFreePipelineLayout) { @@ -90,6 +104,7 @@ Algorithm::freeMemoryDestroyGPUResources() { this->mDevice->destroy( *this->mPipelineLayout, (vk::Optional)nullptr); + this->mPipelineLayout = nullptr; } if (this->mFreeShaderModule) { @@ -101,6 +116,7 @@ Algorithm::freeMemoryDestroyGPUResources() { this->mDevice->destroy( *this->mShaderModule, (vk::Optional)nullptr); + this->mShaderModule = nullptr; } if (this->mFreeDescriptorSet) { @@ -111,6 +127,7 @@ Algorithm::freeMemoryDestroyGPUResources() { } this->mDevice->freeDescriptorSets( *this->mDescriptorPool, 1, this->mDescriptorSet.get()); + this->mDescriptorSet = nullptr; } if (this->mFreeDescriptorSetLayout) { @@ -122,6 +139,7 @@ Algorithm::freeMemoryDestroyGPUResources() { this->mDevice->destroy( *this->mDescriptorSetLayout, (vk::Optional)nullptr); + this->mDescriptorSetLayout = nullptr; } if (this->mFreeDescriptorPool) { @@ -133,18 +151,19 @@ Algorithm::freeMemoryDestroyGPUResources() { this->mDevice->destroy( *this->mDescriptorPool, (vk::Optional)nullptr); + this->mDescriptorPool = nullptr; } } void -Algorithm::createParameters(const std::vector>& tensorParams) +Algorithm::createParameters() { KP_LOG_DEBUG("Kompute Algorithm createParameters started"); std::vector descriptorPoolSizes = { vk::DescriptorPoolSize( vk::DescriptorType::eStorageBuffer, - static_cast(tensorParams.size()) // Descriptor count + static_cast(this->mTensors.size()) // Descriptor count ) }; @@ -161,7 +180,7 @@ Algorithm::createParameters(const std::vector>& tensorPa this->mFreeDescriptorPool = true; std::vector descriptorSetBindings; - for (size_t i = 0; i < tensorParams.size(); i++) { + for (size_t i = 0; i < this->mTensors.size(); i++) { descriptorSetBindings.push_back( vk::DescriptorSetLayoutBinding(i, // Binding index vk::DescriptorType::eStorageBuffer, @@ -193,11 +212,11 @@ Algorithm::createParameters(const std::vector>& tensorPa this->mFreeDescriptorSet = true; KP_LOG_DEBUG("Kompute Algorithm updating descriptor sets"); - for (size_t i = 0; i < tensorParams.size(); i++) { + for (size_t i = 0; i < this->mTensors.size(); i++) { std::vector computeWriteDescriptorSets; vk::DescriptorBufferInfo descriptorBufferInfo = - tensorParams[i]->constructDescriptorBufferInfo(); + this->mTensors[i]->constructDescriptorBufferInfo(); computeWriteDescriptorSets.push_back( vk::WriteDescriptorSet(*this->mDescriptorSet, @@ -377,4 +396,24 @@ Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize) { } } +const Workgroup& +Algorithm::getWorkgroup() { + return this->mWorkgroup; +} + +const Constants& +Algorithm::getSpecializationConstants() { + return this->mSpecializationConstants; +} + +const Constants& +Algorithm::getPushConstants() { + return this->mPushConstants; +} + +const std::vector>& +Algorithm::getTensors() { + return this->mTensors; +} + } diff --git a/src/Manager.cpp b/src/Manager.cpp index 04e3a7e8b..833069d9f 100644 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -33,26 +33,33 @@ Manager::Manager() Manager::Manager(uint32_t physicalDeviceIndex, const std::vector& familyQueueIndices) { - this->mPhysicalDeviceIndex = physicalDeviceIndex; + this->mManageResources = false; this->createInstance(); - this->createDevice(familyQueueIndices); + this->createDevice(familyQueueIndices, physicalDeviceIndex); } Manager::Manager(std::shared_ptr instance, std::shared_ptr physicalDevice, - std::shared_ptr device, - uint32_t physicalDeviceIndex) + std::shared_ptr device) { + this->mManageResources = true; + this->mInstance = instance; this->mPhysicalDevice = physicalDevice; this->mDevice = device; - this->mPhysicalDeviceIndex = physicalDeviceIndex; } Manager::~Manager() { KP_LOG_DEBUG("Kompute Manager Destructor started"); + this->destroy(); +} + +void +Manager::destroy() { + + KP_LOG_DEBUG("Kompute Manager destroy() started"); if (this->mDevice == nullptr) { KP_LOG_ERROR( @@ -60,32 +67,32 @@ Manager::~Manager() return; } - if (this->mManagedSequences.size()) { + if (this->mManageResources && this->mManagedSequences.size()) { KP_LOG_DEBUG("Kompute Manager explicitly running destructor for " "managed sequences"); for (const std::weak_ptr& weakSq : this->mManagedSequences) { if (std::shared_ptr sq = weakSq.lock()) { - sq->freeMemoryDestroyGPUResources(); + sq->destroy(); } } this->mManagedSequences.clear(); } - if (this->mManagedAlgorithms.size()) { + if (this->mManageResources && this->mManagedAlgorithms.size()) { KP_LOG_DEBUG("Kompute Manager explicitly freeing algorithms"); for (const std::weak_ptr& weakAlgorithm : this->mManagedAlgorithms) { if (std::shared_ptr algorithm = weakAlgorithm.lock()) { - algorithm->freeMemoryDestroyGPUResources(); + algorithm->destroy(); } } this->mManagedAlgorithms.clear(); } - if (this->mManagedTensors.size()) { + if (this->mManageResources && this->mManagedTensors.size()) { KP_LOG_DEBUG("Kompute Manager explicitly freeing tensors"); for (const std::weak_ptr& weakTensor : this->mManagedTensors) { if (std::shared_ptr tensor = weakTensor.lock()) { - tensor->freeMemoryDestroyGPUResources(); + tensor->destroy(); } } this->mManagedTensors.clear(); @@ -95,6 +102,7 @@ Manager::~Manager() KP_LOG_INFO("Destroying device"); this->mDevice->destroy( (vk::Optional)nullptr); + this->mDevice = nullptr; KP_LOG_DEBUG("Kompute Manager Destroyed Device"); } @@ -109,6 +117,7 @@ Manager::~Manager() if (this->mDebugReportCallback) { this->mInstance->destroyDebugReportCallbackEXT( this->mDebugReportCallback, nullptr, this->mDebugDispatcher); + this->mInstance = nullptr; KP_LOG_DEBUG("Kompute Manager Destroyed Debug Report Callback"); } #endif @@ -117,6 +126,7 @@ Manager::~Manager() if (this->mFreeInstance) { this->mInstance->destroy( (vk::Optional)nullptr); + this->mInstance = nullptr; KP_LOG_DEBUG("Kompute Manager Destroyed Instance"); } } @@ -207,7 +217,31 @@ Manager::createInstance() } void -Manager::createDevice(const std::vector& familyQueueIndices) +Manager::clear() { + if (this->mManageResources) { + this->mManagedTensors.erase( + std::remove_if( + begin(this->mManagedTensors), + end(this->mManagedTensors), + [](std::weak_ptr t) {return t.expired();}), + end(this->mManagedTensors)); + this->mManagedAlgorithms.erase( + std::remove_if( + begin(this->mManagedAlgorithms), + end(this->mManagedAlgorithms), + [](std::weak_ptr t) {return t.expired();}), + end(this->mManagedAlgorithms)); + this->mManagedSequences.erase( + std::remove_if( + begin(this->mManagedSequences), + end(this->mManagedSequences), + [](std::weak_ptr t) {return t.expired();}), + end(this->mManagedSequences)); + } +} + +void +Manager::createDevice(const std::vector& familyQueueIndices, uint32_t physicalDeviceIndex) { KP_LOG_DEBUG("Kompute Manager creating Device"); @@ -215,7 +249,7 @@ Manager::createDevice(const std::vector& familyQueueIndices) if (this->mInstance == nullptr) { throw std::runtime_error("Kompute Manager instance is null"); } - if (this->mPhysicalDeviceIndex < 0) { + if (physicalDeviceIndex < 0) { throw std::runtime_error( "Kompute Manager physical device index not provided"); } @@ -226,7 +260,7 @@ Manager::createDevice(const std::vector& familyQueueIndices) this->mInstance->enumeratePhysicalDevices(); vk::PhysicalDevice physicalDevice = - physicalDevices[this->mPhysicalDeviceIndex]; + physicalDevices[physicalDeviceIndex]; this->mPhysicalDevice = std::make_shared(physicalDevice); @@ -235,7 +269,7 @@ Manager::createDevice(const std::vector& familyQueueIndices) physicalDevice.getProperties(); KP_LOG_INFO("Using physical device index {} found {}", - this->mPhysicalDeviceIndex, + physicalDeviceIndex, physicalDeviceProperties.deviceName); if (!familyQueueIndices.size()) { @@ -321,7 +355,9 @@ Manager::tensor( std::shared_ptr tensor{ new kp::Tensor(this->mPhysicalDevice, this->mDevice, data, tensorType) }; - this->mManagedTensors.push_back(tensor); + if (this->mManageResources) { + this->mManagedTensors.push_back(tensor); + } return tensor; } @@ -345,7 +381,9 @@ Manager::algorithm( specializationConstants, pushConstants)}; - this->mManagedAlgorithms.push_back(algorithm); + if (this->mManageResources) { + this->mManagedAlgorithms.push_back(algorithm); + } return algorithm; } @@ -362,7 +400,9 @@ Manager::sequence(uint32_t queueIndex) this->mComputeQueues[queueIndex], this->mComputeQueueFamilyIndices[queueIndex]) }; - this->mManagedSequences.push_back(sq); + if (this->mManageResources) { + this->mManagedSequences.push_back(sq); + } return sq; } diff --git a/src/OpAlgoDispatch.cpp b/src/OpAlgoDispatch.cpp index b4ecdcf57..a20900189 100644 --- a/src/OpAlgoDispatch.cpp +++ b/src/OpAlgoDispatch.cpp @@ -4,12 +4,10 @@ namespace kp { -OpAlgoDispatch::OpAlgoDispatch(const std::vector>& tensors, - const std::shared_ptr& algorithm) +OpAlgoDispatch::OpAlgoDispatch(const std::shared_ptr& algorithm) { KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor"); - this->mTensors = tensors; this->mAlgorithm = algorithm; } @@ -24,7 +22,7 @@ OpAlgoDispatch::record(std::shared_ptr commandBuffer) KP_LOG_DEBUG("Kompute OpAlgoDispatch record called"); // Barrier to ensure the data is finished writing to buffer memory - for (std::shared_ptr tensor : this->mTensors) { + for (const std::shared_ptr& tensor : this->mAlgorithm->getTensors()) { tensor->recordBufferMemoryBarrier( commandBuffer, vk::AccessFlagBits::eHostWrite, diff --git a/src/Sequence.cpp b/src/Sequence.cpp index da8771cc3..52e147aaa 100644 --- a/src/Sequence.cpp +++ b/src/Sequence.cpp @@ -61,6 +61,12 @@ Sequence::end() } } +void +Sequence::clear() { + KP_LOG_DEBUG("Kompute Sequence calling clear"); + this->end(); +} + std::shared_ptr Sequence::eval() { @@ -69,6 +75,13 @@ Sequence::eval() return this->evalAsync()->evalAwait(); } +std::shared_ptr +Sequence::eval(std::shared_ptr op) { + this->clear(); + this->record(op); + this->eval(); +} + std::shared_ptr Sequence::evalAsync() { @@ -138,8 +151,16 @@ Sequence::isRecording() return this->mRecording; } +bool +Sequence::isInit() { + return this->mDevice && + this->mCommandPool && + this->mCommandBuffer && + this->mComputeQueue; +} + void -Sequence::freeMemoryDestroyGPUResources() +Sequence::destroy() { KP_LOG_DEBUG("Kompute Sequence freeMemoryDestroyGPUResources called"); @@ -189,6 +210,16 @@ Sequence::freeMemoryDestroyGPUResources() this->mOperations.clear(); } + if (this->mDevice) { + this->mDevice = nullptr; + } + if (this->mPhysicalDevice) { + this->mPhysicalDevice = nullptr; + } + if (this->mComputeQueue) { + this->mComputeQueue = nullptr; + } + } std::shared_ptr diff --git a/src/Tensor.cpp b/src/Tensor.cpp index 3078acd8a..461e3ca1e 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -76,6 +76,15 @@ Tensor::tensorType() return this->mTensorType; } +bool +Tensor::isInit() { + return this->mDevice && + this->mPrimaryBuffer && + this->mPrimaryMemory && + this->mStagingBuffer && + this->mStagingMemory; +} + void Tensor::setData(const std::vector& data) { @@ -429,7 +438,7 @@ Tensor::allocateBindMemory(std::shared_ptr buffer, } void -Tensor::freeMemoryDestroyGPUResources() +Tensor::destroy() { KP_LOG_DEBUG("Kompute Tensor started freeMemoryDestroyGPUResources()"); @@ -495,6 +504,10 @@ Tensor::freeMemoryDestroyGPUResources() } } + if (this->mDevice) { + this->mDevice = nullptr; + } + KP_LOG_DEBUG("Kompute Tensor successful freeMemoryDestroyGPUResources()"); } diff --git a/src/include/kompute/Algorithm.hpp b/src/include/kompute/Algorithm.hpp index 4016c5efb..8b37f3e9a 100644 --- a/src/include/kompute/Algorithm.hpp +++ b/src/include/kompute/Algorithm.hpp @@ -45,10 +45,6 @@ public: const Constants& specializationConstants = {}, const Constants& pushConstants = {}); - bool isInit(); - - void freeMemoryDestroyGPUResources(); - /** * Destructor for Algorithm which is responsible for freeing and desroying * respective pipelines and owned parameter groups. @@ -65,11 +61,21 @@ public: */ void recordDispatch(std::shared_ptr commandBuffer); + bool isInit(); + void setWorkgroup(const Workgroup& workgroup, uint32_t minSize = 1); + const Workgroup& getWorkgroup(); + const Constants& getSpecializationConstants(); + const Constants& getPushConstants(); + const std::vector>& getTensors(); + + void destroy(); + private: // -------------- NEVER OWNED RESOURCES std::shared_ptr mDevice; + std::vector> mTensors; // -------------- OPTIONALLY OWNED RESOURCES std::shared_ptr mDescriptorSetLayout; @@ -100,7 +106,7 @@ private: void createPipeline(); // Parameters - void createParameters(const std::vector>& tensorParams); + void createParameters(); }; } // End namespace kp diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index cd808952a..0e97f12fa 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -30,6 +30,8 @@ class Manager * they would like to create the resources on. * * @param physicalDeviceIndex The index of the physical device to use + * @param manageResources (Optional) Whether to manage the memory of the + * resources created and destroy when the manager is destroyed. * @param familyQueueIndices (Optional) List of queue indices to add for * explicit allocation * @param totalQueues The total number of compute queues to create. @@ -48,8 +50,7 @@ class Manager */ Manager(std::shared_ptr instance, std::shared_ptr physicalDevice, - std::shared_ptr device, - uint32_t physicalDeviceIndex); + std::shared_ptr device); /** * Manager destructor which would ensure all owned resources are destroyed @@ -92,12 +93,14 @@ class Manager const Constants& specializationConstants = {}, const Constants& pushConstants = {}); + void destroy(); + void clear(); + private: // -------------- OPTIONALLY OWNED RESOURCES std::shared_ptr mInstance = nullptr; bool mFreeInstance = false; std::shared_ptr mPhysicalDevice = nullptr; - uint32_t mPhysicalDeviceIndex = -1; std::shared_ptr mDevice = nullptr; bool mFreeDevice = false; @@ -109,7 +112,7 @@ class Manager std::vector mComputeQueueFamilyIndices; std::vector> mComputeQueues; - uint32_t mCurrentSequenceIndex = -1; + bool mManageResources = false; #if DEBUG #ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS @@ -120,7 +123,7 @@ class Manager // Create functions void createInstance(); - void createDevice(const std::vector& familyQueueIndices = {}); + void createDevice(const std::vector& familyQueueIndices = {}, uint32_t hysicalDeviceIndex = 0); }; } // End namespace kp diff --git a/src/include/kompute/Sequence.hpp b/src/include/kompute/Sequence.hpp index 47827d729..e3bac936e 100644 --- a/src/include/kompute/Sequence.hpp +++ b/src/include/kompute/Sequence.hpp @@ -31,6 +31,10 @@ class Sequence: public std::enable_shared_from_this */ ~Sequence(); + /** + */ + std::shared_ptr record(std::shared_ptr op); + /** * Record function for operation to be added to the GPU queue in batch. This * template requires classes to be derived from the OpBase class. This @@ -41,7 +45,148 @@ class Sequence: public std::enable_shared_from_this * @param TArgs Template parameters that are used to initialise operation * which allows for extensible configurations on initialisation. */ - std::shared_ptr record(std::shared_ptr op); + template + std::shared_ptr + record(std::vector> tensors, TArgs&&... params) + { + KP_LOG_DEBUG("Kompute Sequence record function started"); + + static_assert(std::is_base_of::value, + "Kompute Sequence record(...) template only valid with " + "OpBase derived classes"); + + KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance"); + std::shared_ptr op{ + new T(tensors, std::forward(params)...) }; + + return this->record(op); + } + template + std::shared_ptr + record(std::shared_ptr algorithm, TArgs&&... params) + { + KP_LOG_DEBUG("Kompute Sequence record function started"); + + static_assert(std::is_base_of::value, + "Kompute Sequence record(...) template only valid with " + "OpBase derived classes"); + + KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance"); + std::shared_ptr op{ + new T(algorithm, std::forward(params)...) }; + + return this->record(op); + } + + /** + * Eval sends all the recorded and stored operations in the vector of + * operations into the gpu as a submit job with a barrier. + * + * @return shared_ptr of the Sequence class itself + */ + std::shared_ptr eval(); + + std::shared_ptr eval(std::shared_ptr op); + + /** + * Eval sends all the recorded and stored operations in the vector of + * operations into the gpu as a submit job with a barrier. + * + * @return shared_ptr of the Sequence class itself + */ + // TODO: Aim to have only a single function with tensors/algorithm + template + std::shared_ptr + eval(std::vector> tensors, TArgs&&... params) + { + KP_LOG_DEBUG("Kompute Sequence record function started"); + + static_assert(std::is_base_of::value, + "Kompute Sequence record(...) template only valid with " + "OpBase derived classes"); + + KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance"); + std::shared_ptr op{ + new T(tensors, std::forward(params)...) }; + + // TODO: Aim to be able to handle errors when returning without throw except + return this->eval(op); + } + // Needded as otherise can't use initialiser list + template + std::shared_ptr + eval(std::shared_ptr algorithm, TArgs&&... params) + { + KP_LOG_DEBUG("Kompute Sequence record function started"); + + static_assert(std::is_base_of::value, + "Kompute Sequence record(...) template only valid with " + "OpBase derived classes"); + + KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance"); + std::shared_ptr op{ + new T(algorithm, std::forward(params)...) }; + + return this->eval(op); + } + + /** + * Eval Async sends all the recorded and stored operations in the vector of + * operations into the gpu as a submit job with a barrier. EvalAwait() must + * be called after to ensure the sequence is terminated correctly. + * + * @return Boolean stating whether execution was successful. + */ + std::shared_ptr evalAsync(); + + /** + * Eval sends all the recorded and stored operations in the vector of + * operations into the gpu as a submit job with a barrier. + * + * @return shared_ptr of the Sequence class itself + */ + template + std::shared_ptr + evalAsync(std::vector> tensors, TArgs&&... params) + { + KP_LOG_DEBUG("Kompute Sequence record function started"); + + static_assert(std::is_base_of::value, + "Kompute Sequence record(...) template only valid with " + "OpBase derived classes"); + + KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance"); + std::shared_ptr op{ + new T(tensors, std::forward(params)...) }; + + return this->evalAsync(op); + } + // Needed as otherwise it's not possible to use initializer lists + template + std::shared_ptr + evalAsync(std::shared_ptr algorithm, TArgs&&... params) + { + KP_LOG_DEBUG("Kompute Sequence record function started"); + + static_assert(std::is_base_of::value, + "Kompute Sequence record(...) template only valid with " + "OpBase derived classes"); + + KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance"); + std::shared_ptr op{ + new T(algorithm, std::forward(params)...) }; + + return this->evalAsync(op); + } + + /** + * Eval Await waits for the fence to finish processing and then once it + * finishes, it runs the postEval of all operations. + * + * @param waitFor Number of milliseconds to wait before timing out. + * @return Boolean stating whether execution was successful. + */ + std::shared_ptr evalAwait(uint64_t waitFor = UINT64_MAX); /** * Clear function clears all operations currently recorded and starts recording again. @@ -64,32 +209,6 @@ class Sequence: public std::enable_shared_from_this */ void end(); - /** - * Eval sends all the recorded and stored operations in the vector of - * operations into the gpu as a submit job with a barrier. - * - * @return Boolean stating whether execution was successful. - */ - std::shared_ptr eval(); - - /** - * Eval Async sends all the recorded and stored operations in the vector of - * operations into the gpu as a submit job with a barrier. EvalAwait() must - * be called after to ensure the sequence is terminated correctly. - * - * @return Boolean stating whether execution was successful. - */ - std::shared_ptr evalAsync(); - - /** - * Eval Await waits for the fence to finish processing and then once it - * finishes, it runs the postEval of all operations. - * - * @param waitFor Number of milliseconds to wait before timing out. - * @return Boolean stating whether execution was successful. - */ - std::shared_ptr evalAwait(uint64_t waitFor = UINT64_MAX); - /** * Returns true if the sequence is currently in recording activated. * @@ -97,6 +216,9 @@ class Sequence: public std::enable_shared_from_this */ bool isRecording(); + + bool isInit(); + /** * Returns true if the sequence is currently running - mostly used for async * workloads. @@ -109,7 +231,7 @@ class Sequence: public std::enable_shared_from_this * Destroys and frees the GPU resources which include the buffer and memory * and sets the sequence as init=False. */ - void freeMemoryDestroyGPUResources(); + void destroy(); private: // -------------- NEVER OWNED RESOURCES diff --git a/src/include/kompute/Tensor.hpp b/src/include/kompute/Tensor.hpp index 40adcc700..c3521b7a8 100644 --- a/src/include/kompute/Tensor.hpp +++ b/src/include/kompute/Tensor.hpp @@ -59,7 +59,9 @@ class Tensor /** * Destroys and frees the GPU resources which include the buffer and memory. */ - void freeMemoryDestroyGPUResources(); + void destroy(); + + bool isInit(); /** * Returns the vector of data currently contained by the Tensor. It is diff --git a/src/include/kompute/operations/OpAlgoDispatch.hpp b/src/include/kompute/operations/OpAlgoDispatch.hpp index 0af5b5fba..1b5ab1bf0 100644 --- a/src/include/kompute/operations/OpAlgoDispatch.hpp +++ b/src/include/kompute/operations/OpAlgoDispatch.hpp @@ -17,8 +17,7 @@ class OpAlgoDispatch : public OpBase { public: - OpAlgoDispatch(const std::vector>& tensors, - const std::shared_ptr& algorithm); + OpAlgoDispatch(const std::shared_ptr& algorithm); /** * Default destructor, which is in charge of destroying the algorithm @@ -50,7 +49,6 @@ class OpAlgoDispatch : public OpBase private: // -------------- ALWAYS OWNED RESOURCES - std::vector> mTensors; std::shared_ptr mAlgorithm; }; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 3102ec648..a30792aad 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -11,8 +11,7 @@ else() endif() file(GLOB test_kompute_CPP - "${CMAKE_CURRENT_SOURCE_DIR}/TestMain.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/TestWorkgroup.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp" ) add_executable(test_kompute ${test_kompute_CPP}) diff --git a/test/TestAsyncOperations.cpp b/test/TestAsyncOperations.cpp index 42efdff9b..6e5ba8adf 100644 --- a/test/TestAsyncOperations.cpp +++ b/test/TestAsyncOperations.cpp @@ -37,25 +37,32 @@ TEST(TestAsyncOperations, TestManagerParallelExecution) } )"); + std::vector spirv = kp::Shader::compile_source(shader); + std::vector data(size, 0.0); std::vector resultSync(size, 100000000); std::vector resultAsync(size, 100000000); kp::Manager mgr; + std::shared_ptr sq = mgr.sequence(); + std::vector> inputsSyncB; + std::vector> algorithms; for (uint32_t i = 0; i < numParallel; i++) { - inputsSyncB.push_back(std::make_shared(kp::Tensor(data))); + inputsSyncB.push_back(mgr.tensor(data)); + algorithms.push_back(mgr.algorithm({ inputsSyncB[i] }, spirv)); } - mgr.rebuild(inputsSyncB); + sq->eval(inputsSyncB); + + mgr.sequence()->eval(inputsSyncB); auto startSync = std::chrono::high_resolution_clock::now(); for (uint32_t i = 0; i < numParallel; i++) { - mgr.evalOpDefault( - { inputsSyncB[i] }, kp::Shader::compile_source(shader)); + sq->eval(algorithms[i]); } auto endSync = std::chrono::high_resolution_clock::now(); @@ -63,7 +70,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution) std::chrono::duration_cast(endSync - startSync) .count(); - mgr.evalOpDefault(inputsSyncB); + sq->eval(inputsSyncB); for (uint32_t i = 0; i < numParallel; i++) { EXPECT_EQ(inputsSyncB[i]->data(), resultSync); @@ -74,26 +81,23 @@ TEST(TestAsyncOperations, TestManagerParallelExecution) std::vector> inputsAsyncB; for (uint32_t i = 0; i < numParallel; i++) { - inputsAsyncB.push_back(std::make_shared(kp::Tensor(data))); + inputsAsyncB.push_back(mgr.tensor(data)); } - mgrAsync.rebuild(inputsAsyncB); + std::vector> sqs; for (uint32_t i = 0; i < numParallel; i++) { - mgrAsync.sequence("async" + std::to_string(i), i); + sqs.push_back(mgrAsync.sequence(i)); } auto startAsync = std::chrono::high_resolution_clock::now(); for (uint32_t i = 0; i < numParallel; i++) { - mgrAsync.evalOpAsync( - { inputsAsyncB[i] }, - "async" + std::to_string(i), - kp::Shader::compile_source(shader)); + sqs[i]->evalAsync(algorithms[i]); } for (uint32_t i = 0; i < numParallel; i++) { - mgrAsync.evalOpAwait("async" + std::to_string(i)); + sqs[i]->evalAwait(); } auto endAsync = std::chrono::high_resolution_clock::now(); @@ -101,7 +105,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution) endAsync - startAsync) .count(); - mgrAsync.evalOpDefault({ inputsAsyncB }); + sq->eval({ inputsAsyncB }); for (uint32_t i = 0; i < numParallel; i++) { EXPECT_EQ(inputsAsyncB[i]->data(), resultAsync); @@ -138,32 +142,32 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution) } )"); + std::vector spirv = kp::Shader::compile_source(shader); + std::vector data(size, 0.0); std::vector resultAsync(size, 100000000); kp::Manager mgr; - std::shared_ptr tensorA{ new kp::Tensor(data) }; - std::shared_ptr tensorB{ new kp::Tensor(data) }; + std::shared_ptr tensorA = mgr.tensor(data); + std::shared_ptr tensorB = mgr.tensor(data); - mgr.sequence("asyncOne"); - mgr.sequence("asyncTwo"); + std::shared_ptr sq1 = mgr.sequence(); + std::shared_ptr sq2 = mgr.sequence(); - mgr.rebuild({ tensorA, tensorB }); + sq1->eval({ tensorA, tensorB }); - std::vector result = kp::Shader::compile_source(shader); + std::shared_ptr algo1 = mgr.algorithm({tensorA}); + std::shared_ptr algo2 = mgr.algorithm({tensorB}); - mgr.evalOpAsync( - { tensorA }, "asyncOne", kp::Shader::compile_source(shader)); + sq1->evalAsync(algo1); + sq2->evalAsync(algo2); - mgr.evalOpAsync( - { tensorB }, "asyncTwo", kp::Shader::compile_source(shader)); + sq1->evalAwait(); + sq2->evalAwait(); - mgr.evalOpAwait("asyncOne"); - mgr.evalOpAwait("asyncTwo"); - - mgr.evalOpAsyncDefault({ tensorA, tensorB }); - mgr.evalOpAwaitDefault(); + sq1->evalAsync({ tensorA, tensorB }); + sq1->evalAwait(); EXPECT_EQ(tensorA->data(), resultAsync); EXPECT_EQ(tensorB->data(), resultAsync); diff --git a/test/TestDestroy.cpp b/test/TestDestroy.cpp index 940fdf722..43b7c8e1a 100644 --- a/test/TestDestroy.cpp +++ b/test/TestDestroy.cpp @@ -5,7 +5,7 @@ TEST(TestDestroy, TestDestroyTensorSingle) { - std::shared_ptr tensorA{ new kp::Tensor({ 0, 0, 0 }) }; + std::shared_ptr tensorA = nullptr; std::string shader(R"( #version 450 @@ -16,37 +16,36 @@ TEST(TestDestroy, TestDestroyTensorSingle) pa[index] = pa[index] + 1; })"); + std::vector spirv = kp::Shader::compile_source(shader); + { std::shared_ptr sq = nullptr; { kp::Manager mgr; - mgr.rebuild({ tensorA }); + tensorA = mgr.tensor({ 0, 0, 0 }); - sq = mgr.sequence(); + std::shared_ptr algo = + mgr.algorithm({ tensorA }, spirv); - sq->begin(); - sq->record( - { tensorA }, kp::Shader::compile_source(shader)); - sq->end(); - - sq->eval(); - - mgr.evalOpDefault({ tensorA }); - - mgr.destroy(tensorA); + mgr.sequence() + ->record(algo) + ->eval() + ->eval(algo->getTensors()); + tensorA->destroy(); EXPECT_FALSE(tensorA->isInit()); } + EXPECT_FALSE(tensorA->isInit()); } EXPECT_EQ(tensorA->data(), std::vector({ 1, 1, 1 })); } TEST(TestDestroy, TestDestroyTensorVector) { - std::shared_ptr tensorA{ new kp::Tensor({ 1, 1, 1 }) }; - std::shared_ptr tensorB{ new kp::Tensor({ 1, 1, 1 }) }; + std::shared_ptr tensorA = nullptr; + std::shared_ptr tensorB = nullptr; std::string shader(R"( #version 450 @@ -58,6 +57,7 @@ TEST(TestDestroy, TestDestroyTensorVector) pa[index] = pa[index] + 1; pb[index] = pb[index] + 2; })"); + std::vector spirv = kp::Shader::compile_source(shader); { std::shared_ptr sq = nullptr; @@ -65,20 +65,20 @@ TEST(TestDestroy, TestDestroyTensorVector) { kp::Manager mgr; - mgr.rebuild({ tensorA, tensorB }); + tensorA = mgr.tensor({ 1, 1, 1 }); + tensorB = mgr.tensor({ 1, 1, 1 }); - sq = mgr.sequence(); + std::shared_ptr algo = + mgr.algorithm({tensorA, tensorB}, spirv); - sq->begin(); - sq->record( - { tensorA, tensorB }, kp::Shader::compile_source(shader)); - sq->end(); + mgr.sequence() + ->record(algo->getTensors()) + ->record(algo) + ->record(algo->getTensors()) + ->eval(); - sq->eval(); - - mgr.evalOpDefault({ tensorA, tensorB }); - - mgr.destroy({ tensorA, tensorB }); + tensorA->destroy(); + tensorB->destroy(); EXPECT_FALSE(tensorA->isInit()); EXPECT_FALSE(tensorB->isInit()); @@ -88,32 +88,9 @@ TEST(TestDestroy, TestDestroyTensorVector) EXPECT_EQ(tensorB->data(), std::vector({ 3, 3, 3 })); } -TEST(TestDestroy, TestDestroyTensorVectorUninitialised) -{ - std::shared_ptr tensorA{ new kp::Tensor({ 1, 1, 1 }) }; - std::shared_ptr tensorB{ new kp::Tensor({ 1, 1, 1 }) }; - - { - std::shared_ptr sq = nullptr; - - { - kp::Manager mgr; - - mgr.rebuild({ tensorA, tensorB }); - - mgr.destroy({ tensorA, tensorB }); - - EXPECT_FALSE(tensorA->isInit()); - EXPECT_FALSE(tensorB->isInit()); - } - } - EXPECT_EQ(tensorA->data(), std::vector({ 1, 1, 1 })); - EXPECT_EQ(tensorA->data(), std::vector({ 1, 1, 1 })); -} - TEST(TestDestroy, TestDestroySequenceSingle) { - std::shared_ptr tensorA{ new kp::Tensor({ 0, 0, 0 }) }; + std::shared_ptr tensorA = nullptr; std::string shader(R"( #version 450 @@ -124,26 +101,21 @@ TEST(TestDestroy, TestDestroySequenceSingle) pa[index] = pa[index] + 1; })"); + std::vector spirv = kp::Shader::compile_source(shader); + { std::shared_ptr sq = nullptr; { kp::Manager mgr; - mgr.rebuild({ tensorA }); + tensorA = mgr.tensor({0, 0, 0}); - sq = mgr.sequence(); - - sq->begin(); - sq->record( - { tensorA }, kp::Shader::compile_source(shader)); - sq->end(); - - sq->eval(); - - mgr.evalOpDefault({ tensorA }); - - mgr.destroy(sq); + mgr.sequence() + ->record({tensorA}) + ->record(mgr.algorithm({tensorA}, spirv)) + ->record({tensorA}) + ->eval(); EXPECT_FALSE(sq->isInit()); } @@ -151,220 +123,3 @@ TEST(TestDestroy, TestDestroySequenceSingle) EXPECT_EQ(tensorA->data(), std::vector({ 1, 1, 1 })); } -TEST(TestDestroy, TestDestroySequenceVector) -{ - std::shared_ptr tensorA{ new kp::Tensor({ 0, 0, 0 }) }; - - std::string shader(R"( - #version 450 - layout (local_size_x = 1) in; - layout(set = 0, binding = 0) buffer a { float pa[]; }; - void main() { - uint index = gl_GlobalInvocationID.x; - pa[index] = pa[index] + 1; - })"); - - { - std::shared_ptr sq1 = nullptr; - std::shared_ptr sq2 = nullptr; - - { - kp::Manager mgr; - - mgr.rebuild({ tensorA }); - - sq1 = mgr.sequence("One"); - sq1->begin(); - sq1->record( - { tensorA }, kp::Shader::compile_source(shader)); - sq1->end(); - sq1->eval(); - - sq2 = mgr.sequence("Two"); - sq2->begin(); - sq2->record( - { tensorA }, kp::Shader::compile_source(shader)); - sq2->end(); - sq2->eval(); - - mgr.evalOpDefault({ tensorA }); - - mgr.destroy({ sq1, sq2 }); - - EXPECT_FALSE(sq1->isInit()); - EXPECT_FALSE(sq2->isInit()); - } - } - EXPECT_EQ(tensorA->data(), std::vector({ 2, 2, 2 })); -} - -TEST(TestDestroy, TestDestroySequenceNameSingleInsideManager) -{ - std::shared_ptr tensorA{ new kp::Tensor({ 0, 0, 0 }) }; - - std::string shader(R"( - #version 450 - layout (local_size_x = 1) in; - layout(set = 0, binding = 0) buffer a { float pa[]; }; - void main() { - uint index = gl_GlobalInvocationID.x; - pa[index] = pa[index] + 1; - })"); - - { - kp::Manager mgr; - { - mgr.rebuild({ tensorA }); - - mgr.evalOp( - { tensorA }, "one", - kp::Shader::compile_source(shader)); - - mgr.evalOp( - { tensorA }, "two", - kp::Shader::compile_source(shader)); - - mgr.evalOpDefault({ tensorA }); - - mgr.destroy("one"); - mgr.destroy("two"); - } - } - EXPECT_EQ(tensorA->data(), std::vector({ 2, 2, 2 })); -} - -TEST(TestDestroy, TestDestroySequenceNameSingleOutsideManager) -{ - std::shared_ptr tensorA{ new kp::Tensor({ 0, 0, 0 }) }; - - std::string shader(R"( - #version 450 - layout (local_size_x = 1) in; - layout(set = 0, binding = 0) buffer a { float pa[]; }; - void main() { - uint index = gl_GlobalInvocationID.x; - pa[index] = pa[index] + 1; - })"); - - { - std::shared_ptr sq1 = nullptr; - - { - kp::Manager mgr; - - mgr.rebuild({ tensorA }); - - sq1 = mgr.sequence("One"); - sq1->begin(); - sq1->record( - { tensorA }, kp::Shader::compile_source(shader)); - sq1->end(); - sq1->eval(); - - mgr.evalOpDefault({ tensorA }); - - mgr.destroy("One"); - - EXPECT_FALSE(sq1->isInit()); - } - } - EXPECT_EQ(tensorA->data(), std::vector({ 1, 1, 1 })); -} - -TEST(TestDestroy, TestDestroySequenceNameVectorInsideManager) -{ - std::shared_ptr tensorA{ new kp::Tensor({ 0, 0, 0 }) }; - - std::string shader(R"( - #version 450 - layout (local_size_x = 1) in; - layout(set = 0, binding = 0) buffer a { float pa[]; }; - void main() { - uint index = gl_GlobalInvocationID.x; - pa[index] = pa[index] + 1; - })"); - - { - kp::Manager mgr; - { - mgr.rebuild({ tensorA }); - - mgr.evalOp( - { tensorA }, "one", - kp::Shader::compile_source(shader)); - - mgr.evalOp( - { tensorA }, "two", - kp::Shader::compile_source(shader)); - - mgr.evalOpDefault({ tensorA }); - - mgr.destroy(std::vector({"one", "two"})); - } - } - EXPECT_EQ(tensorA->data(), std::vector({ 2, 2, 2 })); -} - -TEST(TestDestroy, TestDestroySequenceNameVectorOutsideManager) -{ - std::shared_ptr tensorA{ new kp::Tensor({ 0, 0, 0 }) }; - - std::string shader(R"( - #version 450 - layout (local_size_x = 1) in; - layout(set = 0, binding = 0) buffer a { float pa[]; }; - void main() { - uint index = gl_GlobalInvocationID.x; - pa[index] = pa[index] + 1; - })"); - - { - kp::Manager mgr; - { - mgr.rebuild({ tensorA }); - - mgr.evalOp( - { tensorA }, "one", - kp::Shader::compile_source(shader)); - - mgr.evalOp( - { tensorA }, "two", - kp::Shader::compile_source(shader)); - - mgr.evalOpDefault({ tensorA }); - - mgr.destroy(std::vector({"one", "two"})); - } - } - EXPECT_EQ(tensorA->data(), std::vector({ 2, 2, 2 })); -} - -TEST(TestDestroy, TestDestroySequenceNameDefaultOutsideManager) -{ - std::shared_ptr tensorA{ new kp::Tensor({ 0, 0, 0 }) }; - - std::string shader(R"( - #version 450 - layout (local_size_x = 1) in; - layout(set = 0, binding = 0) buffer a { float pa[]; }; - void main() { - uint index = gl_GlobalInvocationID.x; - pa[index] = pa[index] + 1; - })"); - - { - kp::Manager mgr; - { - mgr.rebuild({ tensorA }); - - mgr.evalOpDefault( - { tensorA }, - kp::Shader::compile_source(shader)); - - mgr.evalOpDefault({ tensorA }); - - mgr.destroy(KP_DEFAULT_SESSION); - } - } - EXPECT_EQ(tensorA->data(), std::vector({ 1, 1, 1 })); -} diff --git a/test/TestLogisticRegression.cpp b/test/TestLogisticRegression.cpp index 00425ddc0..16c08afb9 100644 --- a/test/TestLogisticRegression.cpp +++ b/test/TestLogisticRegression.cpp @@ -11,47 +11,40 @@ TEST(TestLogisticRegression, TestMainLogisticRegression) uint32_t ITERATIONS = 100; float learningRate = 0.1; - std::shared_ptr xI{ new kp::Tensor({ 0, 1, 1, 1, 1 }) }; - std::shared_ptr xJ{ new kp::Tensor({ 0, 0, 0, 1, 1 }) }; - - std::shared_ptr y{ new kp::Tensor({ 0, 0, 0, 1, 1 }) }; - - std::shared_ptr wIn{ new kp::Tensor({ 0.001, 0.001 }) }; - std::shared_ptr wOutI{ new kp::Tensor({ 0, 0, 0, 0, 0 }) }; - std::shared_ptr wOutJ{ new kp::Tensor({ 0, 0, 0, 0, 0 }) }; - - std::shared_ptr bIn{ new kp::Tensor({ 0 }) }; - std::shared_ptr bOut{ new kp::Tensor({ 0, 0, 0, 0, 0 }) }; - - std::shared_ptr lOut{ new kp::Tensor({ 0, 0, 0, 0, 0 }) }; - - std::vector> params = { xI, xJ, y, - wIn, wOutI, wOutJ, - bIn, bOut, lOut }; - { kp::Manager mgr; - mgr.rebuild(params); + std::shared_ptr xI = mgr.tensor({ 0, 1, 1, 1, 1 }); + std::shared_ptr xJ = mgr.tensor({ 0, 0, 0, 1, 1 }); - std::shared_ptr sq = mgr.sequence(); + std::shared_ptr y = mgr.tensor({ 0, 0, 0, 1, 1 }); - // Record op algo base - sq->begin(); + std::shared_ptr wIn = mgr.tensor({ 0.001, 0.001 }); + std::shared_ptr wOutI = mgr.tensor({ 0, 0, 0, 0, 0 }); + std::shared_ptr wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 }); - sq->record({ wIn, bIn }); + std::shared_ptr bIn = mgr.tensor({ 0 }); + std::shared_ptr bOut = mgr.tensor({ 0, 0, 0, 0, 0 }); - sq->record( - params, - std::vector( + std::shared_ptr lOut = mgr.tensor({ 0, 0, 0, 0, 0 }); + + std::vector> params = { xI, xJ, y, + wIn, wOutI, wOutJ, + bIn, bOut, lOut }; + + std::vector spirv = std::vector( (uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv, (uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv + - kp::shader_data::shaders_glsl_logisticregression_comp_spv_len)), - kp::Workgroup(), kp::Constants({5.0})); + kp::shader_data::shaders_glsl_logisticregression_comp_spv_len)); - sq->record({ wOutI, wOutJ, bOut, lOut }); + std::shared_ptr algorithm = + mgr.algorithm(params, spirv, kp::Workgroup(), kp::Constants({5.0})); - sq->end(); + std::shared_ptr sq = + mgr.sequence() + ->record({ wIn, bIn }) + ->record(algorithm) + ->record({ wOutI, wOutJ, bOut, lOut }); // Iterate across all expected iterations for (size_t i = 0; i < ITERATIONS; i++) { @@ -64,21 +57,21 @@ TEST(TestLogisticRegression, TestMainLogisticRegression) bIn->data()[0] -= learningRate * bOut->data()[j]; } } + + // Based on the inputs the outputs should be at least: + // * wi < 0.01 + // * wj > 1.0 + // * b < 0 + // TODO: Add EXPECT_DOUBLE_EQ instead + EXPECT_LT(wIn->data()[0], 0.01); + EXPECT_GT(wIn->data()[1], 1.0); + EXPECT_LT(bIn->data()[0], 0.0); + + KP_LOG_WARN("Result wIn i: {}, wIn j: {}, bIn: {}", + wIn->data()[0], + wIn->data()[1], + bIn->data()[0]); } - - // Based on the inputs the outputs should be at least: - // * wi < 0.01 - // * wj > 1.0 - // * b < 0 - // TODO: Add EXPECT_DOUBLE_EQ instead - EXPECT_LT(wIn->data()[0], 0.01); - EXPECT_GT(wIn->data()[1], 1.0); - EXPECT_LT(bIn->data()[0], 0.0); - - KP_LOG_WARN("Result wIn i: {}, wIn j: {}, bIn: {}", - wIn->data()[0], - wIn->data()[1], - bIn->data()[0]); } TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy) @@ -87,50 +80,43 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy) uint32_t ITERATIONS = 100; float learningRate = 0.1; - kp::Constants wInVec = { 0.001, 0.001 }; - std::vector bInVec = { 0 }; - - std::shared_ptr xI{ new kp::Tensor({ 0, 1, 1, 1, 1 }) }; - std::shared_ptr xJ{ new kp::Tensor({ 0, 0, 0, 1, 1 }) }; - - std::shared_ptr y{ new kp::Tensor({ 0, 0, 0, 1, 1 }) }; - - std::shared_ptr wIn{ new kp::Tensor( - wInVec, kp::Tensor::TensorTypes::eHost) }; - std::shared_ptr wOutI{ new kp::Tensor({ 0, 0, 0, 0, 0 }) }; - std::shared_ptr wOutJ{ new kp::Tensor({ 0, 0, 0, 0, 0 }) }; - - std::shared_ptr bIn{ new kp::Tensor( - bInVec, kp::Tensor::TensorTypes::eHost) }; - std::shared_ptr bOut{ new kp::Tensor({ 0, 0, 0, 0, 0 }) }; - - std::shared_ptr lOut{ new kp::Tensor({ 0, 0, 0, 0, 0 }) }; - - std::vector> params = { xI, xJ, y, - wIn, wOutI, wOutJ, - bIn, bOut, lOut }; - { kp::Manager mgr; - mgr.rebuild(params); + std::shared_ptr xI = mgr.tensor({ 0, 1, 1, 1, 1 }); + std::shared_ptr xJ = mgr.tensor({ 0, 0, 0, 1, 1 }); - std::shared_ptr sq = mgr.sequence(); + std::shared_ptr y = mgr.tensor({ 0, 0, 0, 1, 1 }); - // Record op algo base - sq->begin(); + std::shared_ptr wIn = mgr.tensor( + { 0.001, 0.001 }, kp::Tensor::TensorTypes::eHost); + std::shared_ptr wOutI = mgr.tensor({ 0, 0, 0, 0, 0 }); + std::shared_ptr wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 }); - sq->record( - params, - std::vector( + std::shared_ptr bIn = mgr.tensor( + { 0 }, + kp::Tensor::TensorTypes::eHost); + std::shared_ptr bOut = mgr.tensor({ 0, 0, 0, 0, 0 }); + + std::shared_ptr lOut = mgr.tensor({ 0, 0, 0, 0, 0 }); + + std::vector> params = { xI, xJ, y, + wIn, wOutI, wOutJ, + bIn, bOut, lOut }; + + std::vector spirv = std::vector( (uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv, (uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv + - kp::shader_data::shaders_glsl_logisticregression_comp_spv_len)), - kp::Workgroup(), kp::Constants({5.0})); + kp::shader_data::shaders_glsl_logisticregression_comp_spv_len)); - sq->record({ wOutI, wOutJ, bOut, lOut }); + std::shared_ptr algorithm = + mgr.algorithm(params, spirv, kp::Workgroup(), kp::Constants({5.0})); - sq->end(); + std::shared_ptr sq = + mgr.sequence() + ->record({ wIn, bIn }) + ->record(algorithm) + ->record({ wOutI, wOutJ, bOut, lOut }); // Iterate across all expected iterations for (size_t i = 0; i < ITERATIONS; i++) { @@ -145,7 +131,6 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy) wIn->mapDataIntoHostMemory(); bIn->mapDataIntoHostMemory(); } - } // Based on the inputs the outputs should be at least: // * wi < 0.01 @@ -160,4 +145,5 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy) wIn->data()[0], wIn->data()[1], bIn->data()[0]); + } } diff --git a/test/TestWorkgroup.cpp b/test/TestWorkgroup.cpp index 73cbdce61..59ab1ff10 100644 --- a/test/TestWorkgroup.cpp +++ b/test/TestWorkgroup.cpp @@ -3,9 +3,6 @@ #include "kompute/Kompute.hpp" -#include "kompute_test/shaders/shadertest_workgroup.hpp" - - TEST(TestWorkgroup, TestSimpleWorkgroup) { std::shared_ptr tensorA = nullptr; @@ -31,9 +28,9 @@ TEST(TestWorkgroup, TestSimpleWorkgroup) std::shared_ptr algorithm = mgr.algorithm(params, spirv, workgroup); sq = mgr.sequence(); - sq->record(std::make_shared(params)); - sq->record(std::make_shared(params, algorithm)); - sq->record(std::make_shared(params)); + sq->record(params); + sq->record(params, algorithm); + sq->record(params); sq->eval(); } }