diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp index a002e0d67..295663432 100644 --- a/src/Algorithm.cpp +++ b/src/Algorithm.cpp @@ -9,7 +9,8 @@ Algorithm::Algorithm() SPDLOG_DEBUG("Kompute Algorithm base constructor"); } -Algorithm::Algorithm(std::shared_ptr device, std::shared_ptr commandBuffer) +Algorithm::Algorithm(std::shared_ptr device, + std::shared_ptr commandBuffer) { SPDLOG_DEBUG("Kompute Algorithm Constructor with device"); @@ -28,8 +29,10 @@ Algorithm::~Algorithm() } } -void Algorithm::init(std::string shaderFilePath, - std::vector> tensorParams) { +void +Algorithm::init(std::string shaderFilePath, + std::vector> tensorParams) +{ SPDLOG_DEBUG("Kompute Algorithm init started"); // TODO: Move to util function @@ -38,77 +41,79 @@ void Algorithm::init(std::string shaderFilePath, this->createPipeline(); } -void Algorithm::createParameters(std::vector>& tensorParams) { +void +Algorithm::createParameters(std::vector>& tensorParams) +{ SPDLOG_DEBUG("Kompute Algorithm createParameters started"); // TODO: Explore design for having multiple descriptor pool sizes std::vector descriptorPoolSizes = { - vk::DescriptorPoolSize( - vk::DescriptorType::eStorageBuffer, - 1 // Descriptor count - ) - }; + vk::DescriptorPoolSize(vk::DescriptorType::eStorageBuffer, + 1 // Descriptor count + ) + }; // TODO: Explore design for having more than 1 set configurable vk::DescriptorPoolCreateInfo descriptorPoolInfo( - vk::DescriptorPoolCreateFlags(), - 1, // Max sets - static_cast(descriptorPoolSizes.size()), - descriptorPoolSizes.data()); + vk::DescriptorPoolCreateFlags(), + 1, // Max sets + static_cast(descriptorPoolSizes.size()), + descriptorPoolSizes.data()); SPDLOG_DEBUG("Kompute Algorithm creating descriptor pool"); this->mDescriptorPool = std::make_shared(); - this->mDevice->createDescriptorPool(&descriptorPoolInfo, nullptr, this->mDescriptorPool.get()); + this->mDevice->createDescriptorPool( + &descriptorPoolInfo, nullptr, this->mDescriptorPool.get()); std::vector descriptorSetBindings; - // TODO: Explore allowing descriptor set bind index to be configurable by user to specify which tensors woudl go on each binding + // TODO: Explore allowing descriptor set bind index to be configurable by + // user to specify which tensors woudl go on each binding for (size_t i = 0; i < tensorParams.size(); i++) { descriptorSetBindings.push_back( - vk::DescriptorSetLayoutBinding( - i, // Binding index - vk::DescriptorType::eStorageBuffer, - 1, // Descriptor count - vk::ShaderStageFlagBits::eCompute) - ); + vk::DescriptorSetLayoutBinding(i, // Binding index + vk::DescriptorType::eStorageBuffer, + 1, // Descriptor count + vk::ShaderStageFlagBits::eCompute)); } // This is the component that is fed into the pipeline vk::DescriptorSetLayoutCreateInfo descriptorSetLayoutInfo( - vk::DescriptorSetLayoutCreateFlags(), - static_cast(descriptorSetBindings.size()), - descriptorSetBindings.data() - ); + vk::DescriptorSetLayoutCreateFlags(), + static_cast(descriptorSetBindings.size()), + descriptorSetBindings.data()); SPDLOG_DEBUG("Kompute Algorithm creating descriptor set layout"); - // TODO: We createa signle descriptor set layout which would have to be extended if multiple set layouts to be supported + // TODO: We createa signle descriptor set layout which would have to be + // extended if multiple set layouts to be supported this->mDescriptorSetLayout = std::make_shared(); - this->mDevice->createDescriptorSetLayout(&descriptorSetLayoutInfo, nullptr, this->mDescriptorSetLayout.get()); + this->mDevice->createDescriptorSetLayout( + &descriptorSetLayoutInfo, nullptr, this->mDescriptorSetLayout.get()); vk::DescriptorSetAllocateInfo descriptorSetAllocateInfo( - *this->mDescriptorPool, - 1, // Descriptor set layout count - this->mDescriptorSetLayout.get()); + *this->mDescriptorPool, + 1, // Descriptor set layout count + this->mDescriptorSetLayout.get()); SPDLOG_DEBUG("Kompute Algorithm allocating descriptor sets"); this->mDescriptorSet = std::make_shared(); - this->mDevice->allocateDescriptorSets(&descriptorSetAllocateInfo, this->mDescriptorSet.get()); + this->mDevice->allocateDescriptorSets(&descriptorSetAllocateInfo, + this->mDescriptorSet.get()); std::vector computeWriteDescriptorSets; for (size_t i = 0; i < tensorParams.size(); i++) { vk::DescriptorBufferInfo descriptorBufferInfo = - tensorParams[i]->constructDescriptorBufferInfo(); + tensorParams[i]->constructDescriptorBufferInfo(); // TODO: Explore design exposing the destination array element computeWriteDescriptorSets.push_back( - vk::WriteDescriptorSet( - *this->mDescriptorSet, - i, // Destination binding - 0, // Destination array element - 1, // Descriptor count - vk::DescriptorType::eStorageBuffer, - nullptr, // Descriptor image info - &descriptorBufferInfo)); + vk::WriteDescriptorSet(*this->mDescriptorSet, + i, // Destination binding + 0, // Destination array element + 1, // Descriptor count + vk::DescriptorType::eStorageBuffer, + nullptr, // Descriptor image info + &descriptorBufferInfo)); } SPDLOG_DEBUG("Kompute Algorithm updating descriptor sets"); @@ -117,11 +122,13 @@ void Algorithm::createParameters(std::vector>& tensorPar SPDLOG_DEBUG("Kompue Algorithm successfully run init"); } -void Algorithm::createShaderModule(std::string shaderFilePath) { +void +Algorithm::createShaderModule(std::string shaderFilePath) +{ SPDLOG_DEBUG("Kompute Algorithm createShaderModule started"); - std::ifstream fileStream( - shaderFilePath, std::ios::binary | std::ios::in | std::ios::ate); + std::ifstream fileStream(shaderFilePath, + std::ios::binary | std::ios::in | std::ios::ate); size_t shaderFileSize = fileStream.tellg(); fileStream.seekg(0, std::ios::beg); @@ -130,64 +137,79 @@ void Algorithm::createShaderModule(std::string shaderFilePath) { fileStream.close(); vk::ShaderModuleCreateInfo shaderModuleInfo( - vk::ShaderModuleCreateFlags(), - shaderFileSize, - (uint32_t*)shaderFileData); + vk::ShaderModuleCreateFlags(), shaderFileSize, (uint32_t*)shaderFileData); - SPDLOG_DEBUG("Kompute Algorithm Creating shader module. ShaderFileSize: {}", shaderFileSize); + SPDLOG_DEBUG("Kompute Algorithm Creating shader module. ShaderFileSize: {}", + shaderFileSize); this->mFreeShaderModule = true; this->mShaderModule = std::make_shared(); this->mDevice->createShaderModule( - &shaderModuleInfo, - nullptr, - this->mShaderModule.get()); + &shaderModuleInfo, nullptr, this->mShaderModule.get()); SPDLOG_DEBUG("Kompute Algorithm create shader module success"); } -void Algorithm::createPipeline() { +void +Algorithm::createPipeline() +{ SPDLOG_DEBUG("Kompute Algorithm calling create Pipeline"); // TODO: Explore design for supporting multiple sets vk::PipelineLayoutCreateInfo pipelineLayoutInfo( - vk::PipelineLayoutCreateFlags(), - 1, // Set layout count - this->mDescriptorSetLayout.get()); + vk::PipelineLayoutCreateFlags(), + 1, // Set layout count + this->mDescriptorSetLayout.get()); this->mPipelineLayout = std::make_shared(); - this->mDevice->createPipelineLayout(&pipelineLayoutInfo, nullptr, this->mPipelineLayout.get()); + this->mDevice->createPipelineLayout( + &pipelineLayoutInfo, nullptr, this->mPipelineLayout.get()); - vk::PipelineShaderStageCreateInfo shaderStage(vk::PipelineShaderStageCreateFlags(), vk::ShaderStageFlagBits::eCompute, *this->mShaderModule, "main", nullptr); + vk::PipelineShaderStageCreateInfo shaderStage( + vk::PipelineShaderStageCreateFlags(), + vk::ShaderStageFlagBits::eCompute, + *this->mShaderModule, + "main", + nullptr); - vk::ComputePipelineCreateInfo pipelineInfo(vk::PipelineCreateFlags(), shaderStage, *this->mPipelineLayout, vk::Pipeline(), 0); + vk::ComputePipelineCreateInfo pipelineInfo(vk::PipelineCreateFlags(), + shaderStage, + *this->mPipelineLayout, + vk::Pipeline(), + 0); // TODO: Confirm what the best structure is with pipeline cache this->mFreePipelineCache = true; - vk::PipelineCacheCreateInfo pipelineCacheInfo = vk::PipelineCacheCreateInfo(); + vk::PipelineCacheCreateInfo pipelineCacheInfo = + vk::PipelineCacheCreateInfo(); this->mPipelineCache = std::make_shared(); - this->mDevice->createPipelineCache(&pipelineCacheInfo, nullptr, this->mPipelineCache.get()); + this->mDevice->createPipelineCache( + &pipelineCacheInfo, nullptr, this->mPipelineCache.get()); - vk::ResultValue pipelineResult = this->mDevice->createComputePipeline(*this->mPipelineCache, pipelineInfo); + vk::ResultValue pipelineResult = + this->mDevice->createComputePipeline(*this->mPipelineCache, pipelineInfo); if (pipelineResult.result != vk::Result::eSuccess) { - throw std::runtime_error("Failed to create pipeline result: " + vk::to_string(pipelineResult.result)); + throw std::runtime_error("Failed to create pipeline result: " + + vk::to_string(pipelineResult.result)); } this->mFreePipeline = true; this->mPipeline = std::make_shared(pipelineResult.value); } -void Algorithm::recordDispatch(uint32_t x, uint32_t y, uint32_t z) { +void +Algorithm::recordDispatch(uint32_t x, uint32_t y, uint32_t z) +{ SPDLOG_DEBUG("Kompute Algorithm calling record dispatch"); - this->mCommandBuffer->bindPipeline(vk::PipelineBindPoint::eCompute, *this->mPipeline); + this->mCommandBuffer->bindPipeline(vk::PipelineBindPoint::eCompute, + *this->mPipeline); - this->mCommandBuffer->bindDescriptorSets( - vk::PipelineBindPoint::eCompute, - *this->mPipelineLayout, - 0, // First set - *this->mDescriptorSet, - nullptr // Dispatcher + this->mCommandBuffer->bindDescriptorSets(vk::PipelineBindPoint::eCompute, + *this->mPipelineLayout, + 0, // First set + *this->mDescriptorSet, + nullptr // Dispatcher ); this->mCommandBuffer->dispatch(x, y, z); diff --git a/src/Algorithm.hpp b/src/Algorithm.hpp index 7888a33b3..75a1d48ae 100644 --- a/src/Algorithm.hpp +++ b/src/Algorithm.hpp @@ -19,7 +19,8 @@ class Algorithm public: Algorithm(); - Algorithm(std::shared_ptr device, std::shared_ptr commandBuffer); + Algorithm(std::shared_ptr device, + std::shared_ptr commandBuffer); // TODO: Add specialisation data // TODO: Explore other ways of passing shader (ie raw bytes) @@ -31,7 +32,7 @@ class Algorithm // Record commands void recordDispatch(uint32_t x, uint32_t y, uint32_t z); -private: + private: // Shared resources std::shared_ptr mDevice; std::shared_ptr mCommandBuffer; diff --git a/src/OpBase.hpp b/src/OpBase.hpp index 97c2dabff..381e8081a 100644 --- a/src/OpBase.hpp +++ b/src/OpBase.hpp @@ -20,9 +20,7 @@ class OpBase { private: public: - OpBase() { - SPDLOG_DEBUG("Compute OpBase base constructor"); - } + OpBase() { SPDLOG_DEBUG("Compute OpBase base constructor"); } OpBase(std::shared_ptr physicalDevice, std::shared_ptr device, @@ -44,11 +42,7 @@ class OpBase virtual void record() { SPDLOG_DEBUG("Kompute OpBase record called"); } - virtual void postSubmit() - { - SPDLOG_DEBUG("Kompute OpBase init called"); - } - + virtual void postSubmit() { SPDLOG_DEBUG("Kompute OpBase init called"); } protected: std::shared_ptr mPhysicalDevice; diff --git a/src/OpCreateTensor.cpp b/src/OpCreateTensor.cpp index cc82b9dc1..63d32f9ca 100644 --- a/src/OpCreateTensor.cpp +++ b/src/OpCreateTensor.cpp @@ -65,10 +65,10 @@ OpCreateTensor::record() } } -void OpCreateTensor::postSubmit() +void +OpCreateTensor::postSubmit() { SPDLOG_DEBUG("Kompute OpCreateTensor postSubmit called"); - } } diff --git a/src/OpMult.cpp b/src/OpMult.cpp index 18c6d0340..c55b6fac8 100644 --- a/src/OpMult.cpp +++ b/src/OpMult.cpp @@ -46,23 +46,36 @@ OpMult::init(std::vector> tensors) this->mTensorOutput = tensors[2]; // TODO: Explore adding a validate function - if (!(this->mTensorLHS->isInit() && this->mTensorRHS->isInit() && this->mTensorOutput->isInit())) { - throw std::runtime_error("Kompute OpMult all tensor parameters must be initialised. LHS: " + std::to_string(this->mTensorLHS->isInit()) + " RHS: " + std::to_string(this->mTensorRHS->isInit()) + " Output: " + std::to_string(this->mTensorOutput->isInit())); + if (!(this->mTensorLHS->isInit() && this->mTensorRHS->isInit() && + this->mTensorOutput->isInit())) { + throw std::runtime_error( + "Kompute OpMult all tensor parameters must be initialised. LHS: " + + std::to_string(this->mTensorLHS->isInit()) + + " RHS: " + std::to_string(this->mTensorRHS->isInit()) + + " Output: " + std::to_string(this->mTensorOutput->isInit())); } - // TODO: Explore use-cases where tensors shouldn't be the same size, and how to deal with those situations - if (!(this->mTensorLHS->size() == this->mTensorRHS->size() && this->mTensorRHS->size() == this->mTensorOutput->size())) { - throw std::runtime_error("Kompute OpMult all tensor parameters must be the same size LHS: " + std::to_string(this->mTensorLHS->size()) + " RHS: " + std::to_string(this->mTensorRHS->size()) + " Output: " + std::to_string(this->mTensorOutput->size())); + // TODO: Explore use-cases where tensors shouldn't be the same size, and how + // to deal with those situations + if (!(this->mTensorLHS->size() == this->mTensorRHS->size() && + this->mTensorRHS->size() == this->mTensorOutput->size())) { + throw std::runtime_error( + "Kompute OpMult all tensor parameters must be the same size LHS: " + + std::to_string(this->mTensorLHS->size()) + + " RHS: " + std::to_string(this->mTensorRHS->size()) + + " Output: " + std::to_string(this->mTensorOutput->size())); } this->mTensorOutputStaging = std::make_shared( this->mTensorOutput->data(), Tensor::TensorTypes::eStaging); - this->mTensorOutputStaging->init(this->mPhysicalDevice, this->mDevice, this->mCommandBuffer, this->mTensorOutput->data()); + this->mTensorOutputStaging->init(this->mPhysicalDevice, + this->mDevice, + this->mCommandBuffer, + this->mTensorOutput->data()); // TODO: Make this path configurable - this->mAlgorithm->init( - "shaders/glsl/opmult.comp.spv", tensors); + this->mAlgorithm->init("shaders/glsl/opmult.comp.spv", tensors); } void @@ -75,7 +88,8 @@ OpMult::record() this->mTensorOutputStaging->recordCopyFrom(this->mTensorOutput); } -void OpMult::postSubmit() +void +OpMult::postSubmit() { SPDLOG_DEBUG("Kompute OpCreateTensor postSubmit called"); diff --git a/src/OpMult.hpp b/src/OpMult.hpp index c3c9274ab..fd1635fdf 100644 --- a/src/OpMult.hpp +++ b/src/OpMult.hpp @@ -10,8 +10,8 @@ #include -#include "Tensor.hpp" #include "Algorithm.hpp" +#include "Tensor.hpp" #include "OpBase.hpp" diff --git a/src/Parameter.hpp b/src/Parameter.hpp index 739c63826..4ad4170ce 100644 --- a/src/Parameter.hpp +++ b/src/Parameter.hpp @@ -28,7 +28,7 @@ class Algorithm ~Algorithm(); -private: + private: // Shared resources std::shared_ptr mDevice; diff --git a/src/Sequence.cpp b/src/Sequence.cpp index 769348278..5c982f031 100644 --- a/src/Sequence.cpp +++ b/src/Sequence.cpp @@ -120,7 +120,8 @@ Sequence::eval() this->end(); } - // TODO: Explore whether moving postSubmit calls to a separate sequence function that is explicitly called by the manager + // TODO: Explore whether moving postSubmit calls to a separate sequence + // function that is explicitly called by the manager for (size_t i = 0; i < this->mOperations.size(); i++) { this->mOperations[i]->postSubmit(); } diff --git a/src/Tensor.cpp b/src/Tensor.cpp index d34a5e104..d6c1f6d43 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -105,7 +105,9 @@ Tensor::isInit() return this->mIsInit; } -void Tensor::setData(const std::vector& data) { +void +Tensor::setData(const std::vector& data) +{ this->mData = data; } @@ -135,36 +137,43 @@ Tensor::recordCopyFrom(std::shared_ptr copyFromTensor) } // TODO: Explore if this function should be here or expose buffer -vk::DescriptorBufferInfo Tensor::constructDescriptorBufferInfo() { +vk::DescriptorBufferInfo +Tensor::constructDescriptorBufferInfo() +{ vk::DeviceSize bufferSize = this->memorySize(); - return vk::DescriptorBufferInfo( - *this->mBuffer, - 0, // offset - bufferSize - ); + return vk::DescriptorBufferInfo(*this->mBuffer, + 0, // offset + bufferSize); } -void Tensor::copyDataFromHostBuffer() { +void +Tensor::copyDataFromHostBuffer() +{ SPDLOG_DEBUG("Kompute Tensor copying data from host buffer"); if (this->mTensorType != TensorTypes::eStaging) { - spdlog::warn("Copying tensor data manually to DEVICE buffer instead of using record GPU command"); + spdlog::warn("Copying tensor data manually to DEVICE buffer instead of " + "using record GPU command"); } vk::DeviceSize bufferSize = this->memorySize(); - void* mapped = this->mDevice->mapMemory(*this->mMemory, 0, bufferSize, vk::MemoryMapFlags()); + void* mapped = this->mDevice->mapMemory( + *this->mMemory, 0, bufferSize, vk::MemoryMapFlags()); vk::MappedMemoryRange mappedMemoryRange(*this->mMemory, 0, bufferSize); this->mDevice->invalidateMappedMemoryRanges(mappedMemoryRange); memcpy(this->mData.data(), mapped, bufferSize); this->mDevice->unmapMemory(*this->mMemory); } -void Tensor::copyDataToHostBuffer() { +void +Tensor::copyDataToHostBuffer() +{ SPDLOG_DEBUG("Kompute Tensor copying data to buffer"); if (this->mTensorType != TensorTypes::eStaging) { - spdlog::warn("Copying tensor data manually to DEVICE buffer instead of using record GPU command"); + spdlog::warn("Copying tensor data manually to DEVICE buffer instead of " + "using record GPU command"); } vk::DeviceSize bufferSize = this->memorySize(); diff --git a/src/Tensor.hpp b/src/Tensor.hpp index 9880a808c..d7e830886 100644 --- a/src/Tensor.hpp +++ b/src/Tensor.hpp @@ -52,7 +52,7 @@ class Tensor // Record functions void recordCopyFrom(std::shared_ptr copyFromTensor); // TODO: Add memory buffer barrier capabilities - //void recordBufferMemoryBarrier(); + // void recordBufferMemoryBarrier(); // Util functions vk::DescriptorBufferInfo constructDescriptorBufferInfo(); diff --git a/src/main.cpp b/src/main.cpp index 08bf9a698..60c09a02c 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -637,7 +637,7 @@ main() // TODO: Add capabilities for just output tensor types spdlog::info("Creating output tensor"); std::shared_ptr tensorOutput{ new kp::Tensor( - { 0.0, 0.0, 0.0 }) }; + { 0.0, 0.0, 0.0 }) }; mgr.evalOp({ tensorOutput }); spdlog::info("OpCreateTensor success for tensors");