#include #include "kompute/Algorithm.hpp" namespace kp { Algorithm::Algorithm( std::shared_ptr device, const std::vector>& tensors, const std::vector& spirv, const Workgroup& workgroup, const Constants& specializationConstants, const Constants& pushConstants) { KP_LOG_DEBUG("Kompute Algorithm Constructor with device"); this->mDevice = device; this->setWorkgroup(workgroup); this->mPushConstants = pushConstants; this->rebuild(tensors, spirv, workgroup, specializationConstants, pushConstants); } Algorithm::~Algorithm() { KP_LOG_DEBUG("Kompute Algorithm Destructor started"); this->freeMemoryDestroyGPUResources(); } void Algorithm::rebuild( const std::vector>& tensors, const std::vector& spirv, const Workgroup& workgroup, const Constants& specializationConstants, const Constants& pushConstants) { KP_LOG_DEBUG("Kompute Algorithm rebuild started"); // Descriptor pool is created first so if available then destroy all before rebuild if (this->mFreeDescriptorPool) { this->freeMemoryDestroyGPUResources(); } this->createParameters(tensors); this->createShaderModule(); this->createPipeline(); } void Algorithm::freeMemoryDestroyGPUResources() { if (!this->mDevice) { KP_LOG_WARN( "Kompute Algorithm destroy function reached with null Device pointer"); return; } if (this->mFreePipeline) { KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline"); if (!this->mPipeline) { KP_LOG_WARN("Kompute Algorithm Error requested to destroy " "pipeline but it is null"); } this->mDevice->destroy( *this->mPipeline, (vk::Optional)nullptr); } if (this->mFreePipelineCache) { KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline cache"); if (!this->mPipelineCache) { KP_LOG_WARN("Kompute Algorithm Error requested to destroy " "pipeline cache but it is null"); } this->mDevice->destroy( *this->mPipelineCache, (vk::Optional)nullptr); } if (this->mFreePipelineLayout) { KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline layout"); if (!this->mPipelineLayout) { KP_LOG_WARN("Kompute Algorithm Error requested to destroy " "pipeline layout but it is null"); } this->mDevice->destroy( *this->mPipelineLayout, (vk::Optional)nullptr); } if (this->mFreeShaderModule) { KP_LOG_DEBUG("Kompute Algorithm Destroying shader module"); if (!this->mShaderModule) { KP_LOG_WARN("Kompute Algorithm Error requested to destroy shader " "module but it is null"); } this->mDevice->destroy( *this->mShaderModule, (vk::Optional)nullptr); } if (this->mFreeDescriptorSet) { KP_LOG_DEBUG("Kompute Algorithm Freeing Descriptor Set"); if (!this->mDescriptorSet) { KP_LOG_WARN( "Kompute Algorithm Error requested to free descriptor set"); } this->mDevice->freeDescriptorSets( *this->mDescriptorPool, 1, this->mDescriptorSet.get()); } if (this->mFreeDescriptorSetLayout) { KP_LOG_DEBUG("Kompute Algorithm Destroying Descriptor Set Layout"); if (!this->mDescriptorSetLayout) { KP_LOG_WARN("Kompute Algorithm Error requested to destroy " "descriptor set layout but it is null"); } this->mDevice->destroy( *this->mDescriptorSetLayout, (vk::Optional)nullptr); } if (this->mFreeDescriptorPool) { KP_LOG_DEBUG("Kompute Algorithm Destroying Descriptor Pool"); if (!this->mDescriptorPool) { KP_LOG_WARN("Kompute Algorithm Error requested to destroy " "descriptor pool but it is null"); } this->mDevice->destroy( *this->mDescriptorPool, (vk::Optional)nullptr); } } void Algorithm::createParameters(const std::vector>& tensorParams) { KP_LOG_DEBUG("Kompute Algorithm createParameters started"); std::vector descriptorPoolSizes = { vk::DescriptorPoolSize( vk::DescriptorType::eStorageBuffer, static_cast(tensorParams.size()) // Descriptor count ) }; vk::DescriptorPoolCreateInfo descriptorPoolInfo( vk::DescriptorPoolCreateFlags(), 1, // Max sets static_cast(descriptorPoolSizes.size()), descriptorPoolSizes.data()); KP_LOG_DEBUG("Kompute Algorithm creating descriptor pool"); this->mDescriptorPool = std::make_shared(); this->mDevice->createDescriptorPool( &descriptorPoolInfo, nullptr, this->mDescriptorPool.get()); this->mFreeDescriptorPool = true; std::vector descriptorSetBindings; for (size_t i = 0; i < tensorParams.size(); i++) { descriptorSetBindings.push_back( vk::DescriptorSetLayoutBinding(i, // Binding index vk::DescriptorType::eStorageBuffer, 1, // Descriptor count vk::ShaderStageFlagBits::eCompute)); } // This is the component that is fed into the pipeline vk::DescriptorSetLayoutCreateInfo descriptorSetLayoutInfo( vk::DescriptorSetLayoutCreateFlags(), static_cast(descriptorSetBindings.size()), descriptorSetBindings.data()); KP_LOG_DEBUG("Kompute Algorithm creating descriptor set layout"); this->mDescriptorSetLayout = std::make_shared(); this->mDevice->createDescriptorSetLayout( &descriptorSetLayoutInfo, nullptr, this->mDescriptorSetLayout.get()); this->mFreeDescriptorSetLayout = true; vk::DescriptorSetAllocateInfo descriptorSetAllocateInfo( *this->mDescriptorPool, 1, // Descriptor set layout count this->mDescriptorSetLayout.get()); KP_LOG_DEBUG("Kompute Algorithm allocating descriptor sets"); this->mDescriptorSet = std::make_shared(); this->mDevice->allocateDescriptorSets(&descriptorSetAllocateInfo, this->mDescriptorSet.get()); this->mFreeDescriptorSet = true; KP_LOG_DEBUG("Kompute Algorithm updating descriptor sets"); for (size_t i = 0; i < tensorParams.size(); i++) { std::vector computeWriteDescriptorSets; vk::DescriptorBufferInfo descriptorBufferInfo = tensorParams[i]->constructDescriptorBufferInfo(); computeWriteDescriptorSets.push_back( vk::WriteDescriptorSet(*this->mDescriptorSet, i, // Destination binding 0, // Destination array element 1, // Descriptor count vk::DescriptorType::eStorageBuffer, nullptr, // Descriptor image info &descriptorBufferInfo)); this->mDevice->updateDescriptorSets(computeWriteDescriptorSets, nullptr); } KP_LOG_DEBUG("Kompue Algorithm successfully run init"); } void Algorithm::createShaderModule() { KP_LOG_DEBUG("Kompute Algorithm createShaderModule started"); vk::ShaderModuleCreateInfo shaderModuleInfo( vk::ShaderModuleCreateFlags(), sizeof(uint32_t) * this->mSpirv.size(), this->mSpirv.data()); KP_LOG_DEBUG("Kompute Algorithm Creating shader module. ShaderFileSize: {}", this->mSpirv.size()); this->mFreeShaderModule = true; this->mShaderModule = std::make_shared(); this->mDevice->createShaderModule( &shaderModuleInfo, nullptr, this->mShaderModule.get()); this->mFreeShaderModule = true; KP_LOG_DEBUG("Kompute Algorithm create shader module success"); } void Algorithm::createPipeline() { KP_LOG_DEBUG("Kompute Algorithm calling create Pipeline"); vk::PipelineLayoutCreateInfo pipelineLayoutInfo( vk::PipelineLayoutCreateFlags(), 1, // Set layout count this->mDescriptorSetLayout.get()); this->mPipelineLayout = std::make_shared(); this->mDevice->createPipelineLayout( &pipelineLayoutInfo, nullptr, this->mPipelineLayout.get()); this->mFreePipelineLayout = true; std::vector specializationEntries; for (uint32_t i = 0; i < this->mSpecializationConstants.size(); i++) { vk::SpecializationMapEntry specializationEntry( static_cast(i), static_cast(sizeof(float) * i), sizeof(float)); specializationEntries.push_back(specializationEntry); } // This passes ownership of the memory so we remove ownership from // specialization container by using "transferDataOwnership" vk::SpecializationInfo specializationInfo( static_cast(specializationEntries.size()), specializationEntries.data(), sizeof(float) * this->mSpecializationConstants.size(), this->mSpecializationConstants.data()); vk::PipelineShaderStageCreateInfo shaderStage( vk::PipelineShaderStageCreateFlags(), vk::ShaderStageFlagBits::eCompute, *this->mShaderModule, "main", &specializationInfo); vk::ComputePipelineCreateInfo pipelineInfo(vk::PipelineCreateFlags(), shaderStage, *this->mPipelineLayout, vk::Pipeline(), 0); vk::PipelineCacheCreateInfo pipelineCacheInfo = vk::PipelineCacheCreateInfo(); this->mPipelineCache = std::make_shared(); this->mDevice->createPipelineCache( &pipelineCacheInfo, nullptr, this->mPipelineCache.get()); this->mFreePipelineCache = true; #ifdef KOMPUTE_CREATE_PIPELINE_RESULT_VALUE vk::ResultValue pipelineResult = this->mDevice->createComputePipeline(*this->mPipelineCache, pipelineInfo); if (pipelineResult.result != vk::Result::eSuccess) { throw std::runtime_error("Failed to create pipeline result: " + vk::to_string(pipelineResult.result)); } #else vk::Pipeline pipelineResult = this->mDevice->createComputePipeline(*this->mPipelineCache, pipelineInfo); this->mFreePipeline = true; #endif this->mFreePipeline = true; this->mPipeline = std::make_shared(pipelineResult); } void Algorithm::recordDispatch(std::shared_ptr commandBuffer) { KP_LOG_DEBUG("Kompute Algorithm calling record dispatch"); commandBuffer->bindPipeline(vk::PipelineBindPoint::eCompute, *this->mPipeline); commandBuffer->bindDescriptorSets(vk::PipelineBindPoint::eCompute, *this->mPipelineLayout, 0, // First set *this->mDescriptorSet, nullptr // Dispatcher ); commandBuffer->dispatch(this->mWorkgroup[0], this->mWorkgroup[1], this->mWorkgroup[2]); } void Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize) { // The dispatch size is set up based on either explicitly provided template // parameters or by default it would take the shape and size of the tensors if (workgroup[0] > 0) { // If at least the x value is provided we use mainly the parameters // provided this->mWorkgroup = { workgroup[0], workgroup[1] > 0 ? workgroup[1] : 1, workgroup[2] > 0 ? workgroup[2] : 1 }; } else { this->mWorkgroup = { minSize, 1, 1 }; } KP_LOG_INFO("Kompute OpAlgoCreate dispatch size X: {}, Y: {}, Z: {}", this->mWorkgroup[0], this->mWorkgroup[1], this->mWorkgroup[2]); } }