diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp index cfae65643..c58c5a228 100644 --- a/src/Algorithm.cpp +++ b/src/Algorithm.cpp @@ -4,23 +4,25 @@ namespace kp { -Algorithm::Algorithm( - std::shared_ptr device, - const std::vector>& tensors, - const std::vector& spirv, - const Workgroup& workgroup, - const Constants& specializationConstants) +Algorithm::Algorithm(std::shared_ptr device, + const std::vector>& tensors, + const std::vector& spirv, + const Workgroup& workgroup, + const Constants& specializationConstants) { KP_LOG_DEBUG("Kompute Algorithm Constructor with device"); this->mDevice = device; if (tensors.size() && spirv.size()) { - KP_LOG_INFO("Kompute Algorithm initialising with tensor size: {} and spirv size: {}", tensors.size(), spirv.size()); + KP_LOG_INFO("Kompute Algorithm initialising with tensor size: {} and " + "spirv size: {}", + tensors.size(), + spirv.size()); this->rebuild(tensors, spirv, workgroup, specializationConstants); - } - else { - KP_LOG_INFO("Kompute Algorithm constructor with empty tensors and or spirv so not rebuilding vulkan components"); + } else { + KP_LOG_INFO("Kompute Algorithm constructor with empty tensors and or " + "spirv so not rebuilding vulkan components"); } } @@ -32,20 +34,21 @@ Algorithm::~Algorithm() } void -Algorithm::rebuild( - const std::vector>& tensors, - const std::vector& spirv, - const Workgroup& workgroup, - const Constants& specializationConstants) +Algorithm::rebuild(const std::vector>& tensors, + const std::vector& spirv, + const Workgroup& workgroup, + const Constants& specializationConstants) { KP_LOG_DEBUG("Kompute Algorithm rebuild started"); this->mTensors = tensors; this->mSpirv = spirv; this->mSpecializationConstants = specializationConstants; - this->setWorkgroup(workgroup, this->mTensors.size() ? this->mTensors[0]->size() : 1); + this->setWorkgroup(workgroup, + this->mTensors.size() ? this->mTensors[0]->size() : 1); - // Descriptor pool is created first so if available then destroy all before rebuild + // Descriptor pool is created first so if available then destroy all before + // rebuild if (this->isInit()) { this->destroy(); } @@ -56,22 +59,20 @@ Algorithm::rebuild( } bool -Algorithm::isInit() { - return this->mPipeline && - this->mPipelineCache && - this->mPipelineLayout && - this->mDescriptorPool && - this->mDescriptorSet && - this->mDescriptorSetLayout && - this->mShaderModule; +Algorithm::isInit() +{ + return this->mPipeline && this->mPipelineCache && this->mPipelineLayout && + this->mDescriptorPool && this->mDescriptorSet && + this->mDescriptorSetLayout && this->mShaderModule; } void -Algorithm::destroy() { +Algorithm::destroy() +{ if (!this->mDevice) { - KP_LOG_WARN( - "Kompute Algorithm destroy function reached with null Device pointer"); + KP_LOG_WARN("Kompute Algorithm destroy function reached with null " + "Device pointer"); return; } @@ -79,7 +80,7 @@ Algorithm::destroy() { KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline"); if (!this->mPipeline) { KP_LOG_WARN("Kompute Algorithm Error requested to destroy " - "pipeline but it is null"); + "pipeline but it is null"); } this->mDevice->destroy( *this->mPipeline, @@ -91,7 +92,7 @@ Algorithm::destroy() { KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline cache"); if (!this->mPipelineCache) { KP_LOG_WARN("Kompute Algorithm Error requested to destroy " - "pipeline cache but it is null"); + "pipeline cache but it is null"); } this->mDevice->destroy( *this->mPipelineCache, @@ -103,7 +104,7 @@ Algorithm::destroy() { KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline layout"); if (!this->mPipelineLayout) { KP_LOG_WARN("Kompute Algorithm Error requested to destroy " - "pipeline layout but it is null"); + "pipeline layout but it is null"); } this->mDevice->destroy( *this->mPipelineLayout, @@ -115,7 +116,7 @@ Algorithm::destroy() { KP_LOG_DEBUG("Kompute Algorithm Destroying shader module"); if (!this->mShaderModule) { KP_LOG_WARN("Kompute Algorithm Error requested to destroy shader " - "module but it is null"); + "module but it is null"); } this->mDevice->destroy( *this->mShaderModule, @@ -123,10 +124,10 @@ Algorithm::destroy() { this->mShaderModule = nullptr; } - // We don't call freeDescriptorSet as the descriptor pool is not created with - // VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT more at + // We don't call freeDescriptorSet as the descriptor pool is not created + // with VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT more at // (https://www.khronos.org/registry/vulkan/specs/1.0/html/vkspec.html#VUID-vkFreeDescriptorSets-descriptorPool-00312)) - //if (this->mFreeDescriptorSet && this->mDescriptorSet) { + // if (this->mFreeDescriptorSet && this->mDescriptorSet) { // KP_LOG_DEBUG("Kompute Algorithm Freeing Descriptor Set"); // if (!this->mDescriptorSet) { // KP_LOG_WARN( @@ -141,7 +142,7 @@ Algorithm::destroy() { KP_LOG_DEBUG("Kompute Algorithm Destroying Descriptor Set Layout"); if (!this->mDescriptorSetLayout) { KP_LOG_WARN("Kompute Algorithm Error requested to destroy " - "descriptor set layout but it is null"); + "descriptor set layout but it is null"); } this->mDevice->destroy( *this->mDescriptorSetLayout, @@ -153,7 +154,7 @@ Algorithm::destroy() { KP_LOG_DEBUG("Kompute Algorithm Destroying Descriptor Pool"); if (!this->mDescriptorPool) { KP_LOG_WARN("Kompute Algorithm Error requested to destroy " - "descriptor pool but it is null"); + "descriptor pool but it is null"); } this->mDevice->destroy( *this->mDescriptorPool, @@ -246,10 +247,10 @@ Algorithm::createShaderModule() { KP_LOG_DEBUG("Kompute Algorithm createShaderModule started"); - vk::ShaderModuleCreateInfo shaderModuleInfo( - vk::ShaderModuleCreateFlags(), - sizeof(uint32_t) * this->mSpirv.size(), - this->mSpirv.data()); + vk::ShaderModuleCreateInfo shaderModuleInfo(vk::ShaderModuleCreateFlags(), + sizeof(uint32_t) * + this->mSpirv.size(), + this->mSpirv.data()); KP_LOG_DEBUG("Kompute Algorithm Creating shader module. ShaderFileSize: {}", this->mSpirv.size()); @@ -281,14 +282,14 @@ Algorithm::createPipeline() for (uint32_t i = 0; i < this->mSpecializationConstants.size(); i++) { vk::SpecializationMapEntry specializationEntry( - static_cast(i), - static_cast(sizeof(float) * i), - sizeof(float)); + static_cast(i), + static_cast(sizeof(float) * i), + sizeof(float)); specializationEntries.push_back(specializationEntry); } - // This passes ownership of the memory so we remove ownership from + // This passes ownership of the memory so we remove ownership from // specialization container by using "transferDataOwnership" vk::SpecializationInfo specializationInfo( static_cast(specializationEntries.size()), @@ -338,7 +339,8 @@ Algorithm::createPipeline() // TODO: Update to consistent // this->mPipeline = std::make_shared(); // this->mDevice->createComputePipelines( - // *this->mPipelineCache, 1, &pipelineInfo, nullptr, this->mPipeline.get()); + // *this->mPipelineCache, 1, &pipelineInfo, nullptr, + // this->mPipeline.get()); KP_LOG_DEBUG("Kompute Algorithm Create Pipeline Success"); } @@ -349,29 +351,31 @@ Algorithm::bindCore(const vk::CommandBuffer& commandBuffer) KP_LOG_DEBUG("Kompute Algorithm binding pipeline"); commandBuffer.bindPipeline(vk::PipelineBindPoint::eCompute, - *this->mPipeline); + *this->mPipeline); KP_LOG_DEBUG("Kompute Algorithm binding descriptor sets"); commandBuffer.bindDescriptorSets(vk::PipelineBindPoint::eCompute, - *this->mPipelineLayout, - 0, // First set - *this->mDescriptorSet, - nullptr // Dispatcher + *this->mPipelineLayout, + 0, // First set + *this->mDescriptorSet, + nullptr // Dispatcher ); } void -Algorithm::bindPush(const vk::CommandBuffer& commandBuffer, const Constants& pushConstants) +Algorithm::bindPush(const vk::CommandBuffer& commandBuffer, + const Constants& pushConstants) { if (pushConstants.size()) { - KP_LOG_DEBUG("Kompute Algorithm binding push constants size: {}", pushConstants.size()); + KP_LOG_DEBUG("Kompute Algorithm binding push constants size: {}", + pushConstants.size()); commandBuffer.pushConstants(*this->mPipelineLayout, - vk::ShaderStageFlagBits::eCompute, - 0, - pushConstants.size() * sizeof(float), - pushConstants.data()); + vk::ShaderStageFlagBits::eCompute, + 0, + pushConstants.size() * sizeof(float), + pushConstants.data()); } } @@ -380,11 +384,13 @@ Algorithm::recordDispatch(const vk::CommandBuffer& commandBuffer) { KP_LOG_DEBUG("Kompute Algorithm recording dispatch"); - commandBuffer.dispatch(this->mWorkgroup[0], this->mWorkgroup[1], this->mWorkgroup[2]); + commandBuffer.dispatch( + this->mWorkgroup[0], this->mWorkgroup[1], this->mWorkgroup[2]); } void -Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize) { +Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize) +{ KP_LOG_INFO("Kompute OpAlgoCreate setting dispatch size"); @@ -393,11 +399,9 @@ Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize) { if (workgroup[0] > 0) { // If at least the x value is provided we use mainly the parameters // provided - this->mWorkgroup = { - workgroup[0], - workgroup[1] > 0 ? workgroup[1] : 1, - workgroup[2] > 0 ? workgroup[2] : 1 - }; + this->mWorkgroup = { workgroup[0], + workgroup[1] > 0 ? workgroup[1] : 1, + workgroup[2] > 0 ? workgroup[2] : 1 }; } else { this->mWorkgroup = { minSize, 1, 1 }; } @@ -409,17 +413,20 @@ Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize) { } const Workgroup& -Algorithm::getWorkgroup() { +Algorithm::getWorkgroup() +{ return this->mWorkgroup; } const Constants& -Algorithm::getSpecializationConstants() { +Algorithm::getSpecializationConstants() +{ return this->mSpecializationConstants; } const std::vector>& -Algorithm::getTensors() { +Algorithm::getTensors() +{ return this->mTensors; } diff --git a/src/Manager.cpp b/src/Manager.cpp index bb109aace..38f67de0d 100644 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -55,7 +55,8 @@ Manager::~Manager() } void -Manager::destroy() { +Manager::destroy() +{ KP_LOG_DEBUG("Kompute Manager destroy() started"); @@ -78,7 +79,8 @@ Manager::destroy() { if (this->mManageResources && this->mManagedAlgorithms.size()) { KP_LOG_DEBUG("Kompute Manager explicitly freeing algorithms"); - for (const std::weak_ptr& weakAlgorithm : this->mManagedAlgorithms) { + for (const std::weak_ptr& weakAlgorithm : + this->mManagedAlgorithms) { if (std::shared_ptr algorithm = weakAlgorithm.lock()) { algorithm->destroy(); } @@ -214,31 +216,31 @@ Manager::createInstance() } void -Manager::clear() { +Manager::clear() +{ if (this->mManageResources) { this->mManagedTensors.erase( - std::remove_if( - begin(this->mManagedTensors), - end(this->mManagedTensors), - [](std::weak_ptr t) {return t.expired();}), - end(this->mManagedTensors)); + std::remove_if(begin(this->mManagedTensors), + end(this->mManagedTensors), + [](std::weak_ptr t) { return t.expired(); }), + end(this->mManagedTensors)); this->mManagedAlgorithms.erase( - std::remove_if( - begin(this->mManagedAlgorithms), - end(this->mManagedAlgorithms), - [](std::weak_ptr t) {return t.expired();}), - end(this->mManagedAlgorithms)); + std::remove_if( + begin(this->mManagedAlgorithms), + end(this->mManagedAlgorithms), + [](std::weak_ptr t) { return t.expired(); }), + end(this->mManagedAlgorithms)); this->mManagedSequences.erase( - std::remove_if( - begin(this->mManagedSequences), - end(this->mManagedSequences), - [](std::weak_ptr t) {return t.expired();}), - end(this->mManagedSequences)); + std::remove_if(begin(this->mManagedSequences), + end(this->mManagedSequences), + [](std::weak_ptr t) { return t.expired(); }), + end(this->mManagedSequences)); } } void -Manager::createDevice(const std::vector& familyQueueIndices, uint32_t physicalDeviceIndex) +Manager::createDevice(const std::vector& familyQueueIndices, + uint32_t physicalDeviceIndex) { KP_LOG_DEBUG("Kompute Manager creating Device"); @@ -256,8 +258,7 @@ Manager::createDevice(const std::vector& familyQueueIndices, uint32_t std::vector physicalDevices = this->mInstance->enumeratePhysicalDevices(); - vk::PhysicalDevice physicalDevice = - physicalDevices[physicalDeviceIndex]; + vk::PhysicalDevice physicalDevice = physicalDevices[physicalDeviceIndex]; this->mPhysicalDevice = std::make_shared(physicalDevice); @@ -342,16 +343,14 @@ Manager::createDevice(const std::vector& familyQueueIndices, uint32_t } std::shared_ptr -Manager::tensor( - const std::vector& data, - Tensor::TensorTypes tensorType) +Manager::tensor(const std::vector& data, Tensor::TensorTypes tensorType) { KP_LOG_DEBUG("Kompute Manager tensor creation triggered"); - std::shared_ptr tensor{ - new kp::Tensor(this->mPhysicalDevice, this->mDevice, data, tensorType) }; + std::shared_ptr tensor{ new kp::Tensor( + this->mPhysicalDevice, this->mDevice, data, tensorType) }; - if (this->mManageResources) { + if (this->mManageResources) { this->mManagedTensors.push_back(tensor); } @@ -359,23 +358,18 @@ Manager::tensor( } std::shared_ptr -Manager::algorithm( - const std::vector>& tensors, - const std::vector& spirv, - const Workgroup& workgroup, - const Constants& specializationConstants) { +Manager::algorithm(const std::vector>& tensors, + const std::vector& spirv, + const Workgroup& workgroup, + const Constants& specializationConstants) +{ KP_LOG_DEBUG("Kompute Manager algorithm creation triggered"); - std::shared_ptr algorithm{ - new kp::Algorithm( - this->mDevice, - tensors, - spirv, - workgroup, - specializationConstants)}; + std::shared_ptr algorithm{ new kp::Algorithm( + this->mDevice, tensors, spirv, workgroup, specializationConstants) }; - if (this->mManageResources) { + if (this->mManageResources) { this->mManagedAlgorithms.push_back(algorithm); } @@ -385,16 +379,15 @@ Manager::algorithm( std::shared_ptr Manager::sequence(uint32_t queueIndex) { - KP_LOG_DEBUG("Kompute Manager sequence() with queueIndex: {}", - queueIndex); + KP_LOG_DEBUG("Kompute Manager sequence() with queueIndex: {}", queueIndex); - std::shared_ptr sq{ - new kp::Sequence(this->mPhysicalDevice, - this->mDevice, - this->mComputeQueues[queueIndex], - this->mComputeQueueFamilyIndices[queueIndex]) }; + std::shared_ptr sq{ new kp::Sequence( + this->mPhysicalDevice, + this->mDevice, + this->mComputeQueues[queueIndex], + this->mComputeQueueFamilyIndices[queueIndex]) }; - if (this->mManageResources) { + if (this->mManageResources) { this->mManagedSequences.push_back(sq); } diff --git a/src/OpAlgoDispatch.cpp b/src/OpAlgoDispatch.cpp index b8e49f144..4a30751fb 100644 --- a/src/OpAlgoDispatch.cpp +++ b/src/OpAlgoDispatch.cpp @@ -5,7 +5,7 @@ namespace kp { OpAlgoDispatch::OpAlgoDispatch(const std::shared_ptr& algorithm, - const kp::Constants& pushConstants) + const kp::Constants& pushConstants) { KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor"); @@ -24,7 +24,8 @@ OpAlgoDispatch::record(const vk::CommandBuffer& commandBuffer) KP_LOG_DEBUG("Kompute OpAlgoDispatch record called"); // Barrier to ensure the data is finished writing to buffer memory - for (const std::shared_ptr& tensor : this->mAlgorithm->getTensors()) { + for (const std::shared_ptr& tensor : + this->mAlgorithm->getTensors()) { tensor->recordBufferMemoryBarrier( commandBuffer, vk::AccessFlagBits::eHostWrite, diff --git a/src/OpTensorSyncDevice.cpp b/src/OpTensorSyncDevice.cpp index 5fdd7b74c..85cefde77 100644 --- a/src/OpTensorSyncDevice.cpp +++ b/src/OpTensorSyncDevice.cpp @@ -30,8 +30,8 @@ OpTensorSyncDevice::record(const vk::CommandBuffer& commandBuffer) for (size_t i = 0; i < this->mTensors.size(); i++) { if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) { - this->mTensors[i]->recordCopyFromStagingToDevice( - commandBuffer, false); + this->mTensors[i]->recordCopyFromStagingToDevice(commandBuffer, + false); } } } diff --git a/src/OpTensorSyncLocal.cpp b/src/OpTensorSyncLocal.cpp index 6add3fa20..092490d15 100644 --- a/src/OpTensorSyncLocal.cpp +++ b/src/OpTensorSyncLocal.cpp @@ -30,8 +30,8 @@ OpTensorSyncLocal::record(const vk::CommandBuffer& commandBuffer) for (size_t i = 0; i < this->mTensors.size(); i++) { if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) { - this->mTensors[i]->recordCopyFromDeviceToStaging( - commandBuffer, true); + this->mTensors[i]->recordCopyFromDeviceToStaging(commandBuffer, + true); } } } diff --git a/src/Sequence.cpp b/src/Sequence.cpp index 077681fac..68ff082ce 100644 --- a/src/Sequence.cpp +++ b/src/Sequence.cpp @@ -37,7 +37,8 @@ Sequence::begin() } if (this->isRunning()) { - throw std::runtime_error("Kompute Sequence begin called when sequence still running"); + throw std::runtime_error( + "Kompute Sequence begin called when sequence still running"); } KP_LOG_INFO("Kompute Sequence command now started recording"); @@ -53,8 +54,7 @@ Sequence::end() if (!this->isRecording()) { KP_LOG_WARN("Kompute Sequence end called when not recording"); return; - } - else { + } else { KP_LOG_INFO("Kompute Sequence command recording END"); this->mCommandBuffer->end(); this->mRecording = false; @@ -62,7 +62,8 @@ Sequence::end() } void -Sequence::clear() { +Sequence::clear() +{ KP_LOG_DEBUG("Kompute Sequence calling clear"); this->end(); } @@ -76,7 +77,8 @@ Sequence::eval() } std::shared_ptr -Sequence::eval(std::shared_ptr op) { +Sequence::eval(std::shared_ptr op) +{ this->clear(); return this->record(op)->eval(); } @@ -89,8 +91,9 @@ Sequence::evalAsync() } if (this->mIsRunning) { - throw std::runtime_error("Kompute Sequence evalAsync called when an eval async was " - "called without successful wait"); + throw std::runtime_error( + "Kompute Sequence evalAsync called when an eval async was " + "called without successful wait"); } this->mIsRunning = true; @@ -137,7 +140,8 @@ Sequence::evalAwait(uint64_t waitFor) this->mIsRunning = false; if (result == vk::Result::eTimeout) { - KP_LOG_WARN("Kompute Sequence evalAwait reached timeout of {}", waitFor); + KP_LOG_WARN("Kompute Sequence evalAwait reached timeout of {}", + waitFor); return shared_from_this(); } @@ -161,11 +165,10 @@ Sequence::isRecording() } bool -Sequence::isInit() { - return this->mDevice && - this->mCommandPool && - this->mCommandBuffer && - this->mComputeQueue; +Sequence::isInit() +{ + return this->mDevice && this->mCommandPool && this->mCommandBuffer && + this->mComputeQueue; } void @@ -175,16 +178,15 @@ Sequence::destroy() if (!this->mDevice) { KP_LOG_WARN("Kompute Sequence destroy called " - "with null Device pointer"); + "with null Device pointer"); return; } if (this->mFreeCommandBuffer) { KP_LOG_INFO("Freeing CommandBuffer"); if (!this->mCommandBuffer) { - KP_LOG_WARN( - "Kompute Sequence destroy called with null " - "CommandPool pointer"); + KP_LOG_WARN("Kompute Sequence destroy called with null " + "CommandPool pointer"); return; } this->mDevice->freeCommandBuffers( @@ -199,9 +201,8 @@ Sequence::destroy() if (this->mFreeCommandPool) { KP_LOG_INFO("Destroying CommandPool"); if (this->mCommandPool == nullptr) { - KP_LOG_WARN( - "Kompute Sequence destroy called with null " - "CommandPool pointer"); + KP_LOG_WARN("Kompute Sequence destroy called with null " + "CommandPool pointer"); return; } this->mDevice->destroy( @@ -228,7 +229,6 @@ Sequence::destroy() if (this->mComputeQueue) { this->mComputeQueue = nullptr; } - } std::shared_ptr diff --git a/src/Shader.cpp b/src/Shader.cpp index cdcd66e74..428b5a667 100644 --- a/src/Shader.cpp +++ b/src/Shader.cpp @@ -5,11 +5,13 @@ namespace kp { std::vector -Shader::compile_sources(const std::vector& sources, - const std::vector& files, - const std::string& entryPoint, - std::vector> definitions, - const TBuiltInResource& resources) { +Shader::compile_sources( + const std::vector& sources, + const std::vector& files, + const std::string& entryPoint, + std::vector> definitions, + const TBuiltInResource& resources) +{ // Initialize glslang library. glslang::InitializeProcess(); @@ -18,27 +20,32 @@ Shader::compile_sources(const std::vector& sources, const EShLanguage language = EShLangCompute; glslang::TShader shader(language); - std::vector filesCStr(files.size()), sourcesCStr(sources.size()); - for (size_t i = 0; i < sources.size(); i++) sourcesCStr[i] = sources[i].c_str(); + std::vector filesCStr(files.size()), + sourcesCStr(sources.size()); + for (size_t i = 0; i < sources.size(); i++) + sourcesCStr[i] = sources[i].c_str(); if (files.size() > 1) { assert(files.size() == sources.size()); - for (size_t i = 0; i < files.size(); i++) filesCStr[i] = files[i].c_str(); - shader.setStringsWithLengthsAndNames(sourcesCStr.data(), nullptr, filesCStr.data(), filesCStr.size()); - } - else { - filesCStr = {""}; - shader.setStringsWithLengthsAndNames(sourcesCStr.data(), nullptr, filesCStr.data(), sourcesCStr.size()); + for (size_t i = 0; i < files.size(); i++) + filesCStr[i] = files[i].c_str(); + shader.setStringsWithLengthsAndNames( + sourcesCStr.data(), nullptr, filesCStr.data(), filesCStr.size()); + } else { + filesCStr = { "" }; + shader.setStringsWithLengthsAndNames( + sourcesCStr.data(), nullptr, filesCStr.data(), sourcesCStr.size()); } shader.setEntryPoint(entryPoint.c_str()); shader.setSourceEntryPoint(entryPoint.c_str()); std::string info_log = ""; - const EShMessages messages = static_cast(EShMsgDefault | EShMsgVulkanRules | EShMsgSpvRules); - if (!shader.parse(&resources, 100, false, messages)) - { - info_log = std::string(shader.getInfoLog()) + "\n" + std::string(shader.getInfoDebugLog()); + const EShMessages messages = static_cast( + EShMsgDefault | EShMsgVulkanRules | EShMsgSpvRules); + if (!shader.parse(&resources, 100, false, messages)) { + info_log = std::string(shader.getInfoLog()) + "\n" + + std::string(shader.getInfoDebugLog()); KP_LOG_ERROR("Kompute Shader Error: {}", info_log); throw std::runtime_error(info_log); } @@ -47,24 +54,23 @@ Shader::compile_sources(const std::vector& sources, glslang::TProgram program; program.addShader(&shader); // Link program. - if (!program.link(messages)) - { - info_log = std::string(program.getInfoLog()) + "\n" + std::string(program.getInfoDebugLog()); + if (!program.link(messages)) { + info_log = std::string(program.getInfoLog()) + "\n" + + std::string(program.getInfoDebugLog()); KP_LOG_ERROR("Kompute Shader Error: {}", info_log); throw std::runtime_error(info_log); } // Save any info log that was generated. - if (shader.getInfoLog()) - { - info_log += std::string(shader.getInfoLog()) + "\n" + std::string(shader.getInfoDebugLog()) + "\n"; + if (shader.getInfoLog()) { + info_log += std::string(shader.getInfoLog()) + "\n" + + std::string(shader.getInfoDebugLog()) + "\n"; KP_LOG_INFO("Kompute Shader Information: {}", info_log); } - glslang::TIntermediate *intermediate = program.getIntermediate(language); + glslang::TIntermediate* intermediate = program.getIntermediate(language); // Translate to SPIRV. - if (!intermediate) - { + if (!intermediate) { info_log += "Failed to get shared intermediate code.\n"; KP_LOG_ERROR("Kompute Shader Error: {}", info_log); throw std::runtime_error(info_log); @@ -74,8 +80,7 @@ Shader::compile_sources(const std::vector& sources, std::vector spirv; glslang::GlslangToSpv(*intermediate, spirv, &logger); - if (shader.getInfoLog()) - { + if (shader.getInfoLog()) { info_log += logger.getAllMessages() + "\n"; KP_LOG_DEBUG("Kompute Shader all result messages: {}", info_log); } @@ -87,11 +92,17 @@ Shader::compile_sources(const std::vector& sources, } std::vector -Shader::compile_source(const std::string& source, - const std::string& entryPoint, - std::vector> definitions, - const TBuiltInResource& resource) { - return compile_sources({source}, std::vector({}), entryPoint, definitions, resource); +Shader::compile_source( + const std::string& source, + const std::string& entryPoint, + std::vector> definitions, + const TBuiltInResource& resource) +{ + return compile_sources({ source }, + std::vector({}), + entryPoint, + definitions, + resource); } } diff --git a/src/Tensor.cpp b/src/Tensor.cpp index dd645708e..f584c07bd 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -4,9 +4,9 @@ namespace kp { Tensor::Tensor(std::shared_ptr physicalDevice, - std::shared_ptr device, - const std::vector& data, - const TensorTypes& tensorType) + std::shared_ptr device, + const std::vector& data, + const TensorTypes& tensorType) { KP_LOG_DEBUG("Kompute Tensor constructor data length: {}, and type: {}", data.size(), @@ -29,17 +29,16 @@ Tensor::~Tensor() } void -Tensor::rebuild(const std::vector& data, - TensorTypes tensorType) +Tensor::rebuild(const std::vector& data, TensorTypes tensorType) { - KP_LOG_DEBUG("Kompute Tensor rebuilding with size {}", - data.size()); + KP_LOG_DEBUG("Kompute Tensor rebuilding with size {}", data.size()); this->mData = data; this->mTensorType = tensorType; if (this->mPrimaryBuffer || this->mPrimaryMemory) { - KP_LOG_DEBUG("Kompute Tensor destroying existing resources before rebuild"); + KP_LOG_DEBUG( + "Kompute Tensor destroying existing resources before rebuild"); this->destroy(); } @@ -77,10 +76,9 @@ Tensor::tensorType() } bool -Tensor::isInit() { - return this->mDevice && - this->mPrimaryBuffer && - this->mPrimaryMemory; +Tensor::isInit() +{ + return this->mDevice && this->mPrimaryBuffer && this->mPrimaryMemory; } void @@ -105,17 +103,16 @@ Tensor::recordCopyFrom(const vk::CommandBuffer& commandBuffer, KP_LOG_DEBUG("Kompute Tensor recordCopyFrom data size {}.", bufferSize); this->recordCopyBuffer(commandBuffer, - copyFromTensor->mPrimaryBuffer, - this->mPrimaryBuffer, - bufferSize, - copyRegion, - createBarrier); + copyFromTensor->mPrimaryBuffer, + this->mPrimaryBuffer, + bufferSize, + copyRegion, + createBarrier); } void -Tensor::recordCopyFromStagingToDevice( - const vk::CommandBuffer& commandBuffer, - bool createBarrier) +Tensor::recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer, + bool createBarrier) { vk::DeviceSize bufferSize(this->memorySize()); vk::BufferCopy copyRegion(0, 0, bufferSize); @@ -123,17 +120,16 @@ Tensor::recordCopyFromStagingToDevice( KP_LOG_DEBUG("Kompute Tensor copying data size {}.", bufferSize); this->recordCopyBuffer(commandBuffer, - this->mStagingBuffer, - this->mPrimaryBuffer, - bufferSize, - copyRegion, - createBarrier); + this->mStagingBuffer, + this->mPrimaryBuffer, + bufferSize, + copyRegion, + createBarrier); } void -Tensor::recordCopyFromDeviceToStaging( - const vk::CommandBuffer& commandBuffer, - bool createBarrier) +Tensor::recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer, + bool createBarrier) { vk::DeviceSize bufferSize(this->memorySize()); vk::BufferCopy copyRegion(0, 0, bufferSize); @@ -141,20 +137,20 @@ Tensor::recordCopyFromDeviceToStaging( KP_LOG_DEBUG("Kompute Tensor copying data size {}.", bufferSize); this->recordCopyBuffer(commandBuffer, - this->mPrimaryBuffer, - this->mStagingBuffer, - bufferSize, - copyRegion, - createBarrier); + this->mPrimaryBuffer, + this->mStagingBuffer, + bufferSize, + copyRegion, + createBarrier); } void Tensor::recordCopyBuffer(const vk::CommandBuffer& commandBuffer, - std::shared_ptr bufferFrom, - std::shared_ptr bufferTo, - vk::DeviceSize bufferSize, - vk::BufferCopy copyRegion, - bool createBarrier) + std::shared_ptr bufferFrom, + std::shared_ptr bufferTo, + vk::DeviceSize bufferSize, + vk::BufferCopy copyRegion, + bool createBarrier) { commandBuffer.copyBuffer(*bufferFrom, *bufferTo, copyRegion); @@ -170,12 +166,11 @@ Tensor::recordCopyBuffer(const vk::CommandBuffer& commandBuffer, } void -Tensor::recordBufferMemoryBarrier( - const vk::CommandBuffer& commandBuffer, - vk::AccessFlagBits srcAccessMask, - vk::AccessFlagBits dstAccessMask, - vk::PipelineStageFlagBits srcStageMask, - vk::PipelineStageFlagBits dstStageMask) +Tensor::recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer, + vk::AccessFlagBits srcAccessMask, + vk::AccessFlagBits dstAccessMask, + vk::PipelineStageFlagBits srcStageMask, + vk::PipelineStageFlagBits dstStageMask) { KP_LOG_DEBUG("Kompute Tensor recording buffer memory barrier"); @@ -190,11 +185,11 @@ Tensor::recordBufferMemoryBarrier( bufferMemoryBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; commandBuffer.pipelineBarrier(srcStageMask, - dstStageMask, - vk::DependencyFlags(), - nullptr, - bufferMemoryBarrier, - nullptr); + dstStageMask, + vk::DependencyFlags(), + nullptr, + bufferMemoryBarrier, + nullptr); } vk::DescriptorBufferInfo @@ -449,7 +444,7 @@ Tensor::destroy() if (this->mFreePrimaryBuffer) { if (!this->mPrimaryBuffer) { KP_LOG_WARN("Kompose Tensor expected to destroy primary buffer " - "but got null buffer"); + "but got null buffer"); } else { KP_LOG_DEBUG("Kompose Tensor destroying primary buffer"); this->mDevice->destroy( @@ -463,7 +458,7 @@ Tensor::destroy() if (this->mFreeStagingBuffer) { if (!this->mStagingBuffer) { KP_LOG_WARN("Kompose Tensor expected to destroy staging buffer " - "but got null buffer"); + "but got null buffer"); } else { KP_LOG_DEBUG("Kompose Tensor destroying staging buffer"); this->mDevice->destroy( @@ -477,7 +472,7 @@ Tensor::destroy() if (this->mFreePrimaryMemory) { if (!this->mPrimaryMemory) { KP_LOG_WARN("Kompose Tensor expected to free primary memory but " - "got null memory"); + "got null memory"); } else { KP_LOG_DEBUG("Kompose Tensor freeing primary memory"); this->mDevice->freeMemory( @@ -491,7 +486,7 @@ Tensor::destroy() if (this->mFreeStagingMemory) { if (!this->mStagingMemory) { KP_LOG_WARN("Kompose Tensor expected to free staging memory but " - "got null memory"); + "got null memory"); } else { KP_LOG_DEBUG("Kompose Tensor freeing staging memory"); this->mDevice->freeMemory( diff --git a/src/include/kompute/Algorithm.hpp b/src/include/kompute/Algorithm.hpp index b80f3946c..e5fd1287e 100644 --- a/src/include/kompute/Algorithm.hpp +++ b/src/include/kompute/Algorithm.hpp @@ -12,8 +12,7 @@ namespace kp { */ class Algorithm { -public: - + public: /** * Default constructor for Algorithm * @@ -21,12 +20,11 @@ public: * @param commandBuffer The vulkan command buffer to bind the pipeline and * shaders */ - Algorithm( - std::shared_ptr device, - const std::vector>& tensors = {}, - const std::vector& spirv = {}, - const Workgroup& workgroup = {}, - const Constants& specializationConstants = {}); + Algorithm(std::shared_ptr device, + const std::vector>& tensors = {}, + const std::vector& spirv = {}, + const Workgroup& workgroup = {}, + const Constants& specializationConstants = {}); /** * Initialiser for the shader data provided to the algorithm as well as @@ -34,14 +32,13 @@ public: * * @param shaderFileData The bytes in spir-v format of the shader * @tensorParams The Tensors to be used in the Algorithm / shader for - * @specalizationInstalces The specialization parameters to pass to the function - * processing + * @specalizationInstalces The specialization parameters to pass to the + * function processing */ - void rebuild( - const std::vector>& tensors = {}, - const std::vector& spirv = {}, - const Workgroup& workgroup = {}, - const Constants& specializationConstants = {}); + void rebuild(const std::vector>& tensors = {}, + const std::vector& spirv = {}, + const Workgroup& workgroup = {}, + const Constants& specializationConstants = {}); /** * Destructor for Algorithm which is responsible for freeing and desroying @@ -61,7 +58,8 @@ public: void bindCore(const vk::CommandBuffer& commandBuffer); - void bindPush(const vk::CommandBuffer& commandBuffer, const Constants& pushConstants); + void bindPush(const vk::CommandBuffer& commandBuffer, + const Constants& pushConstants); bool isInit(); @@ -73,7 +71,7 @@ public: void destroy(); -private: + private: // -------------- NEVER OWNED RESOURCES std::shared_ptr mDevice; std::vector> mTensors; diff --git a/src/include/kompute/Core.hpp b/src/include/kompute/Core.hpp index 6da52953f..b50bf081d 100644 --- a/src/include/kompute/Core.hpp +++ b/src/include/kompute/Core.hpp @@ -60,12 +60,19 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error; #define KP_LOG_DEBUG(...) #else #if defined(VK_USE_PLATFORM_ANDROID_KHR) -#define KP_LOG_DEBUG(...) \ - ((void)__android_log_print(ANDROID_LOG_DEBUG, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__))) +#define KP_LOG_DEBUG(...) \ + ((void)__android_log_print( \ + ANDROID_LOG_DEBUG, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__))) #elif defined(KOMPUTE_BUILD_PYTHON) #define KP_LOG_DEBUG(...) kp_debug(fmt::format(__VA_ARGS__)) #else -#define KP_LOG_DEBUG(...) fmt::print("[{} {}] [debug] [{}:{}] {}\n", __DATE__, __TIME__, __FILE__, __LINE__, fmt::format(__VA_ARGS__)) +#define KP_LOG_DEBUG(...) \ + fmt::print("[{} {}] [debug] [{}:{}] {}\n", \ + __DATE__, \ + __TIME__, \ + __FILE__, \ + __LINE__, \ + fmt::format(__VA_ARGS__)) #endif // VK_USE_PLATFORM_ANDROID_KHR #endif // SPDLOG_ACTIVE_LEVEL > 1 @@ -73,12 +80,19 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error; #define KP_LOG_INFO(...) #else #if defined(VK_USE_PLATFORM_ANDROID_KHR) -#define KP_LOG_INFO(...) \ - ((void)__android_log_print(ANDROID_LOG_INFO, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__))) +#define KP_LOG_INFO(...) \ + ((void)__android_log_print( \ + ANDROID_LOG_INFO, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__))) #elif defined(KOMPUTE_BUILD_PYTHON) #define KP_LOG_INFO(...) kp_info(fmt::format(__VA_ARGS__)) #else -#define KP_LOG_INFO(...) fmt::print("[{} {}] [debug] [{}:{}] {}\n", __DATE__, __TIME__, __FILE__, __LINE__, fmt::format(__VA_ARGS__)) +#define KP_LOG_INFO(...) \ + fmt::print("[{} {}] [debug] [{}:{}] {}\n", \ + __DATE__, \ + __TIME__, \ + __FILE__, \ + __LINE__, \ + fmt::format(__VA_ARGS__)) #endif // VK_USE_PLATFORM_ANDROID_KHR #endif // SPDLOG_ACTIVE_LEVEL > 2 @@ -86,12 +100,19 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error; #define KP_LOG_WARN(...) #else #if defined(VK_USE_PLATFORM_ANDROID_KHR) -#define KP_LOG_WARN(...) \ - ((void)__android_log_print(ANDROID_LOG_WARN, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__))) +#define KP_LOG_WARN(...) \ + ((void)__android_log_print( \ + ANDROID_LOG_WARN, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__))) #elif defined(KOMPUTE_BUILD_PYTHON) #define KP_LOG_WARN(...) kp_warning(fmt::format(__VA_ARGS__)) #else -#define KP_LOG_WARN(...) fmt::print("[{} {}] [debug] [{}:{}] {}\n", __DATE__, __TIME__, __FILE__, __LINE__, fmt::format(__VA_ARGS__)) +#define KP_LOG_WARN(...) \ + fmt::print("[{} {}] [debug] [{}:{}] {}\n", \ + __DATE__, \ + __TIME__, \ + __FILE__, \ + __LINE__, \ + fmt::format(__VA_ARGS__)) #endif // VK_USE_PLATFORM_ANDROID_KHR #endif // SPDLOG_ACTIVE_LEVEL > 3 @@ -99,12 +120,19 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error; #define KP_LOG_ERROR(...) #else #if defined(VK_USE_PLATFORM_ANDROID_KHR) -#define KP_LOG_ERROR(...) \ - ((void)__android_log_print(ANDROID_LOG_ERROR, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__))) +#define KP_LOG_ERROR(...) \ + ((void)__android_log_print( \ + ANDROID_LOG_ERROR, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__))) #elif defined(KOMPUTE_BUILD_PYTHON) #define KP_LOG_ERROR(...) kp_error(fmt::format(__VA_ARGS__)) #else -#define KP_LOG_ERROR(...) fmt::print("[{} {}] [debug] [{}:{}] {}\n", __DATE__, __TIME__, __FILE__, __LINE__, fmt::format(__VA_ARGS__)) +#define KP_LOG_ERROR(...) \ + fmt::print("[{} {}] [debug] [{}:{}] {}\n", \ + __DATE__, \ + __TIME__, \ + __FILE__, \ + __LINE__, \ + fmt::format(__VA_ARGS__)) #endif // VK_USE_PLATFORM_ANDROID_KHR #endif // SPDLOG_ACTIVE_LEVEL > 4 #endif // KOMPUTE_SPDLOG_ENABLED diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index e651cf2bb..61212abf2 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -84,10 +84,10 @@ class Manager Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice); std::shared_ptr algorithm( - const std::vector>& tensors = {}, - const std::vector& spirv = {}, - const Workgroup& workgroup = {}, - const Constants& specializationConstants = {}); + const std::vector>& tensors = {}, + const std::vector& spirv = {}, + const Workgroup& workgroup = {}, + const Constants& specializationConstants = {}); void destroy(); void clear(); @@ -119,7 +119,8 @@ class Manager // Create functions void createInstance(); - void createDevice(const std::vector& familyQueueIndices = {}, uint32_t hysicalDeviceIndex = 0); + void createDevice(const std::vector& familyQueueIndices = {}, + uint32_t hysicalDeviceIndex = 0); }; } // End namespace kp diff --git a/src/include/kompute/Sequence.hpp b/src/include/kompute/Sequence.hpp index fa3b399e8..29c6a0c3b 100644 --- a/src/include/kompute/Sequence.hpp +++ b/src/include/kompute/Sequence.hpp @@ -9,7 +9,7 @@ namespace kp { /** * Container of operations that can be sent to GPU as batch */ -class Sequence: public std::enable_shared_from_this +class Sequence : public std::enable_shared_from_this { public: /** @@ -46,8 +46,9 @@ class Sequence: public std::enable_shared_from_this * which allows for extensible configurations on initialisation. */ template - std::shared_ptr - record(std::vector> tensors, TArgs&&... params) + std::shared_ptr record( + std::vector> tensors, + TArgs&&... params) { KP_LOG_DEBUG("Kompute Sequence record function started"); @@ -56,14 +57,13 @@ class Sequence: public std::enable_shared_from_this "OpBase derived classes"); KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance"); - std::shared_ptr op{ - new T(tensors, std::forward(params)...) }; + std::shared_ptr op{ new T(tensors, std::forward(params)...) }; return this->record(op); } template - std::shared_ptr - record(std::shared_ptr algorithm, TArgs&&... params) + std::shared_ptr record(std::shared_ptr algorithm, + TArgs&&... params) { KP_LOG_DEBUG("Kompute Sequence record function started"); @@ -72,8 +72,8 @@ class Sequence: public std::enable_shared_from_this "OpBase derived classes"); KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance"); - std::shared_ptr op{ - new T(algorithm, std::forward(params)...) }; + std::shared_ptr op{ new T(algorithm, + std::forward(params)...) }; return this->record(op); } @@ -96,8 +96,8 @@ class Sequence: public std::enable_shared_from_this */ // TODO: Aim to have only a single function with tensors/algorithm template - std::shared_ptr - eval(std::vector> tensors, TArgs&&... params) + std::shared_ptr eval(std::vector> tensors, + TArgs&&... params) { KP_LOG_DEBUG("Kompute Sequence record function started"); @@ -106,16 +106,16 @@ class Sequence: public std::enable_shared_from_this "OpBase derived classes"); KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance"); - std::shared_ptr op{ - new T(tensors, std::forward(params)...) }; + std::shared_ptr op{ new T(tensors, std::forward(params)...) }; - // TODO: Aim to be able to handle errors when returning without throw except + // TODO: Aim to be able to handle errors when returning without throw + // except return this->eval(op); } // Needded as otherise can't use initialiser list template - std::shared_ptr - eval(std::shared_ptr algorithm, TArgs&&... params) + std::shared_ptr eval(std::shared_ptr algorithm, + TArgs&&... params) { KP_LOG_DEBUG("Kompute Sequence record function started"); @@ -124,8 +124,8 @@ class Sequence: public std::enable_shared_from_this "OpBase derived classes"); KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance"); - std::shared_ptr op{ - new T(algorithm, std::forward(params)...) }; + std::shared_ptr op{ new T(algorithm, + std::forward(params)...) }; return this->eval(op); } @@ -147,8 +147,9 @@ class Sequence: public std::enable_shared_from_this * @return shared_ptr of the Sequence class itself */ template - std::shared_ptr - evalAsync(std::vector> tensors, TArgs&&... params) + std::shared_ptr evalAsync( + std::vector> tensors, + TArgs&&... params) { KP_LOG_DEBUG("Kompute Sequence record function started"); @@ -157,15 +158,14 @@ class Sequence: public std::enable_shared_from_this "OpBase derived classes"); KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance"); - std::shared_ptr op{ - new T(tensors, std::forward(params)...) }; + std::shared_ptr op{ new T(tensors, std::forward(params)...) }; return this->evalAsync(op); } // Needed as otherwise it's not possible to use initializer lists template - std::shared_ptr - evalAsync(std::shared_ptr algorithm, TArgs&&... params) + std::shared_ptr evalAsync(std::shared_ptr algorithm, + TArgs&&... params) { KP_LOG_DEBUG("Kompute Sequence record function started"); @@ -174,8 +174,8 @@ class Sequence: public std::enable_shared_from_this "OpBase derived classes"); KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance"); - std::shared_ptr op{ - new T(algorithm, std::forward(params)...) }; + std::shared_ptr op{ new T(algorithm, + std::forward(params)...) }; return this->evalAsync(op); } @@ -190,7 +190,8 @@ class Sequence: public std::enable_shared_from_this std::shared_ptr evalAwait(uint64_t waitFor = UINT64_MAX); /** - * Clear function clears all operations currently recorded and starts recording again. + * Clear function clears all operations currently recorded and starts + * recording again. */ void clear(); @@ -217,7 +218,6 @@ class Sequence: public std::enable_shared_from_this */ bool isRecording(); - bool isInit(); /** diff --git a/src/include/kompute/Shader.hpp b/src/include/kompute/Shader.hpp index 8c9a14c83..2d0e43741 100644 --- a/src/include/kompute/Shader.hpp +++ b/src/include/kompute/Shader.hpp @@ -4,9 +4,9 @@ #include #include +#include #include #include -#include #include "kompute/Core.hpp" @@ -16,161 +16,162 @@ namespace kp { // Has been adobted by: // https://github.com/KhronosGroup/glslang/blob/master/StandAlone/ResourceLimits.cpp const TBuiltInResource defaultResource = { -/* .MaxLights = */ 0, -/* .MaxClipPlanes = */ 0, -/* .MaxTextureUnits = */ 0, -/* .MaxTextureCoords = */ 0, -/* .MaxVertexAttribs = */ 64, -/* .MaxVertexUniformComponents = */ 4096, -/* .MaxVaryingFloats = */ 64, -/* .MaxVertexTextureImageUnits = */ 0, -/* .MaxCombinedTextureImageUnits = */ 0, -/* .MaxTextureImageUnits = */ 0, -/* .MaxFragmentUniformComponents = */ 0, -/* .MaxDrawBuffers = */ 0, -/* .MaxVertexUniformVectors = */ 128, -/* .MaxVaryingVectors = */ 8, -/* .MaxFragmentUniformVectors = */ 0, -/* .MaxVertexOutputVectors = */ 16, -/* .MaxFragmentInputVectors = */ 0, -/* .MinProgramTexelOffset = */ -8, -/* .MaxProgramTexelOffset = */ 7, -/* .MaxClipDistances = */ 8, -/* .MaxComputeWorkGroupCountX = */ 65535, -/* .MaxComputeWorkGroupCountY = */ 65535, -/* .MaxComputeWorkGroupCountZ = */ 65535, -/* .MaxComputeWorkGroupSizeX = */ 1024, -/* .MaxComputeWorkGroupSizeY = */ 1024, -/* .MaxComputeWorkGroupSizeZ = */ 64, -/* .MaxComputeUniformComponents = */ 1024, -/* .MaxComputeTextureImageUnits = */ 16, -/* .MaxComputeImageUniforms = */ 8, -/* .MaxComputeAtomicCounters = */ 8, -/* .MaxComputeAtomicCounterBuffers = */ 1, -/* .MaxVaryingComponents = */ 60, -/* .MaxVertexOutputComponents = */ 64, -/* .MaxGeometryInputComponents = */ 64, -/* .MaxGeometryOutputComponents = */ 128, -/* .MaxFragmentInputComponents = */ 0, -/* .MaxImageUnits = */ 0, -/* .MaxCombinedImageUnitsAndFragmentOutputs = */ 0, -/* .MaxCombinedShaderOutputResources = */ 8, -/* .MaxImageSamples = */ 0, -/* .MaxVertexImageUniforms = */ 0, -/* .MaxTessControlImageUniforms = */ 0, -/* .MaxTessEvaluationImageUniforms = */ 0, -/* .MaxGeometryImageUniforms = */ 0, -/* .MaxFragmentImageUniforms = */ 0, -/* .MaxCombinedImageUniforms = */ 0, -/* .MaxGeometryTextureImageUnits = */ 0, -/* .MaxGeometryOutputVertices = */ 256, -/* .MaxGeometryTotalOutputComponents = */ 1024, -/* .MaxGeometryUniformComponents = */ 1024, -/* .MaxGeometryVaryingComponents = */ 64, -/* .MaxTessControlInputComponents = */ 128, -/* .MaxTessControlOutputComponents = */ 128, -/* .MaxTessControlTextureImageUnits = */ 0, -/* .MaxTessControlUniformComponents = */ 1024, -/* .MaxTessControlTotalOutputComponents = */ 4096, -/* .MaxTessEvaluationInputComponents = */ 128, -/* .MaxTessEvaluationOutputComponents = */ 128, -/* .MaxTessEvaluationTextureImageUnits = */ 16, -/* .MaxTessEvaluationUniformComponents = */ 1024, -/* .MaxTessPatchComponents = */ 120, -/* .MaxPatchVertices = */ 32, -/* .MaxTessGenLevel = */ 64, -/* .MaxViewports = */ 16, -/* .MaxVertexAtomicCounters = */ 0, -/* .MaxTessControlAtomicCounters = */ 0, -/* .MaxTessEvaluationAtomicCounters = */ 0, -/* .MaxGeometryAtomicCounters = */ 0, -/* .MaxFragmentAtomicCounters = */ 0, -/* .MaxCombinedAtomicCounters = */ 8, -/* .MaxAtomicCounterBindings = */ 1, -/* .MaxVertexAtomicCounterBuffers = */ 0, -/* .MaxTessControlAtomicCounterBuffers = */ 0, -/* .MaxTessEvaluationAtomicCounterBuffers = */ 0, -/* .MaxGeometryAtomicCounterBuffers = */ 0, -/* .MaxFragmentAtomicCounterBuffers = */ 0, -/* .MaxCombinedAtomicCounterBuffers = */ 1, -/* .MaxAtomicCounterBufferSize = */ 16384, -/* .MaxTransformFeedbackBuffers = */ 4, -/* .MaxTransformFeedbackInterleavedComponents = */ 64, -/* .MaxCullDistances = */ 8, -/* .MaxCombinedClipAndCullDistances = */ 8, -/* .MaxSamples = */ 4, -/* .maxMeshOutputVerticesNV = */ 256, -/* .maxMeshOutputPrimitivesNV = */ 512, -/* .maxMeshWorkGroupSizeX_NV = */ 32, -/* .maxMeshWorkGroupSizeY_NV = */ 1, -/* .maxMeshWorkGroupSizeZ_NV = */ 1, -/* .maxTaskWorkGroupSizeX_NV = */ 32, -/* .maxTaskWorkGroupSizeY_NV = */ 1, -/* .maxTaskWorkGroupSizeZ_NV = */ 1, -/* .maxMeshViewCountNV = */ 4, -/* .maxDualSourceDrawBuffersEXT = */ 1, + /* .MaxLights = */ 0, + /* .MaxClipPlanes = */ 0, + /* .MaxTextureUnits = */ 0, + /* .MaxTextureCoords = */ 0, + /* .MaxVertexAttribs = */ 64, + /* .MaxVertexUniformComponents = */ 4096, + /* .MaxVaryingFloats = */ 64, + /* .MaxVertexTextureImageUnits = */ 0, + /* .MaxCombinedTextureImageUnits = */ 0, + /* .MaxTextureImageUnits = */ 0, + /* .MaxFragmentUniformComponents = */ 0, + /* .MaxDrawBuffers = */ 0, + /* .MaxVertexUniformVectors = */ 128, + /* .MaxVaryingVectors = */ 8, + /* .MaxFragmentUniformVectors = */ 0, + /* .MaxVertexOutputVectors = */ 16, + /* .MaxFragmentInputVectors = */ 0, + /* .MinProgramTexelOffset = */ -8, + /* .MaxProgramTexelOffset = */ 7, + /* .MaxClipDistances = */ 8, + /* .MaxComputeWorkGroupCountX = */ 65535, + /* .MaxComputeWorkGroupCountY = */ 65535, + /* .MaxComputeWorkGroupCountZ = */ 65535, + /* .MaxComputeWorkGroupSizeX = */ 1024, + /* .MaxComputeWorkGroupSizeY = */ 1024, + /* .MaxComputeWorkGroupSizeZ = */ 64, + /* .MaxComputeUniformComponents = */ 1024, + /* .MaxComputeTextureImageUnits = */ 16, + /* .MaxComputeImageUniforms = */ 8, + /* .MaxComputeAtomicCounters = */ 8, + /* .MaxComputeAtomicCounterBuffers = */ 1, + /* .MaxVaryingComponents = */ 60, + /* .MaxVertexOutputComponents = */ 64, + /* .MaxGeometryInputComponents = */ 64, + /* .MaxGeometryOutputComponents = */ 128, + /* .MaxFragmentInputComponents = */ 0, + /* .MaxImageUnits = */ 0, + /* .MaxCombinedImageUnitsAndFragmentOutputs = */ 0, + /* .MaxCombinedShaderOutputResources = */ 8, + /* .MaxImageSamples = */ 0, + /* .MaxVertexImageUniforms = */ 0, + /* .MaxTessControlImageUniforms = */ 0, + /* .MaxTessEvaluationImageUniforms = */ 0, + /* .MaxGeometryImageUniforms = */ 0, + /* .MaxFragmentImageUniforms = */ 0, + /* .MaxCombinedImageUniforms = */ 0, + /* .MaxGeometryTextureImageUnits = */ 0, + /* .MaxGeometryOutputVertices = */ 256, + /* .MaxGeometryTotalOutputComponents = */ 1024, + /* .MaxGeometryUniformComponents = */ 1024, + /* .MaxGeometryVaryingComponents = */ 64, + /* .MaxTessControlInputComponents = */ 128, + /* .MaxTessControlOutputComponents = */ 128, + /* .MaxTessControlTextureImageUnits = */ 0, + /* .MaxTessControlUniformComponents = */ 1024, + /* .MaxTessControlTotalOutputComponents = */ 4096, + /* .MaxTessEvaluationInputComponents = */ 128, + /* .MaxTessEvaluationOutputComponents = */ 128, + /* .MaxTessEvaluationTextureImageUnits = */ 16, + /* .MaxTessEvaluationUniformComponents = */ 1024, + /* .MaxTessPatchComponents = */ 120, + /* .MaxPatchVertices = */ 32, + /* .MaxTessGenLevel = */ 64, + /* .MaxViewports = */ 16, + /* .MaxVertexAtomicCounters = */ 0, + /* .MaxTessControlAtomicCounters = */ 0, + /* .MaxTessEvaluationAtomicCounters = */ 0, + /* .MaxGeometryAtomicCounters = */ 0, + /* .MaxFragmentAtomicCounters = */ 0, + /* .MaxCombinedAtomicCounters = */ 8, + /* .MaxAtomicCounterBindings = */ 1, + /* .MaxVertexAtomicCounterBuffers = */ 0, + /* .MaxTessControlAtomicCounterBuffers = */ 0, + /* .MaxTessEvaluationAtomicCounterBuffers = */ 0, + /* .MaxGeometryAtomicCounterBuffers = */ 0, + /* .MaxFragmentAtomicCounterBuffers = */ 0, + /* .MaxCombinedAtomicCounterBuffers = */ 1, + /* .MaxAtomicCounterBufferSize = */ 16384, + /* .MaxTransformFeedbackBuffers = */ 4, + /* .MaxTransformFeedbackInterleavedComponents = */ 64, + /* .MaxCullDistances = */ 8, + /* .MaxCombinedClipAndCullDistances = */ 8, + /* .MaxSamples = */ 4, + /* .maxMeshOutputVerticesNV = */ 256, + /* .maxMeshOutputPrimitivesNV = */ 512, + /* .maxMeshWorkGroupSizeX_NV = */ 32, + /* .maxMeshWorkGroupSizeY_NV = */ 1, + /* .maxMeshWorkGroupSizeZ_NV = */ 1, + /* .maxTaskWorkGroupSizeX_NV = */ 32, + /* .maxTaskWorkGroupSizeY_NV = */ 1, + /* .maxTaskWorkGroupSizeZ_NV = */ 1, + /* .maxMeshViewCountNV = */ 4, + /* .maxDualSourceDrawBuffersEXT = */ 1, + + /* .limits = */ + { + /* .nonInductiveForLoops = */ 1, + /* .whileLoops = */ 1, + /* .doWhileLoops = */ 1, + /* .generalUniformIndexing = */ 1, + /* .generalAttributeMatrixVectorIndexing = */ 1, + /* .generalVaryingIndexing = */ 1, + /* .generalSamplerIndexing = */ 1, + /* .generalVariableIndexing = */ 1, + /* .generalConstantMatrixVectorIndexing = */ 1, + } +}; -/* .limits = */ { - /* .nonInductiveForLoops = */ 1, - /* .whileLoops = */ 1, - /* .doWhileLoops = */ 1, - /* .generalUniformIndexing = */ 1, - /* .generalAttributeMatrixVectorIndexing = */ 1, - /* .generalVaryingIndexing = */ 1, - /* .generalSamplerIndexing = */ 1, - /* .generalVariableIndexing = */ 1, - /* .generalConstantMatrixVectorIndexing = */ 1, -}}; - /** Shader utily class with functions to compile and process glsl files. */ -class Shader { -public: +class Shader +{ + public: /** * Compile multiple sources with optional filenames. Currently this function * uses the glslang C++ interface which is not thread safe so this funciton * should not be called from multiple threads concurrently. If you have a - * online shader processing multithreading use-case that can't use offline + * online shader processing multithreading use-case that can't use offline * compilation please open an issue. * * @param sources A list of raw glsl shaders in string format * @param files A list of file names respective to each of the sources * @param entryPoint The function name to use as entry point * @param definitions List of pairs containing key value definitions - * @param resourcesLimit A list that contains the resource limits for the GLSL compiler + * @param resourcesLimit A list that contains the resource limits for the + * GLSL compiler * @return The compiled SPIR-V binary in unsigned int32 format */ static std::vector compile_sources( - const std::vector& sources, - const std::vector& files = {}, - const std::string& entryPoint = "main", - std::vector> definitions = {}, - const TBuiltInResource& resources = defaultResource); + const std::vector& sources, + const std::vector& files = {}, + const std::string& entryPoint = "main", + std::vector> definitions = {}, + const TBuiltInResource& resources = defaultResource); /** - * Compile a single glslang source from string value. Currently this function - * uses the glslang C++ interface which is not thread safe so this funciton - * should not be called from multiple threads concurrently. If you have a - * online shader processing multithreading use-case that can't use offline - * compilation please open an issue. + * Compile a single glslang source from string value. Currently this + * function uses the glslang C++ interface which is not thread safe so this + * funciton should not be called from multiple threads concurrently. If you + * have a online shader processing multithreading use-case that can't use + * offline compilation please open an issue. * * @param source An individual raw glsl shader in string format * @param entryPoint The function name to use as entry point * @param definitions List of pairs containing key value definitions - * @param resourcesLimit A list that contains the resource limits for the GLSL compiler + * @param resourcesLimit A list that contains the resource limits for the + * GLSL compiler * @return The compiled SPIR-V binary in unsigned int32 format */ static std::vector compile_source( - const std::string& source, - const std::string& entryPoint = "main", - std::vector> definitions = {}, - const TBuiltInResource& resources = defaultResource); - + const std::string& source, + const std::string& entryPoint = "main", + std::vector> definitions = {}, + const TBuiltInResource& resources = defaultResource); }; - - } #endif // DKOMPUTE_DISABLE_SHADER_UTILS - diff --git a/src/include/kompute/Tensor.hpp b/src/include/kompute/Tensor.hpp index 513525f73..7b24f3de7 100644 --- a/src/include/kompute/Tensor.hpp +++ b/src/include/kompute/Tensor.hpp @@ -54,7 +54,7 @@ class Tensor * otherwise there is no need to copy from host memory. */ void rebuild(const std::vector& data, - TensorTypes tensorType = TensorTypes::eDevice); + TensorTypes tensorType = TensorTypes::eDevice); /** * Destroys and frees the GPU resources which include the buffer and memory. @@ -125,9 +125,8 @@ class Tensor * @param createBarrier Whether to create a barrier that ensures the data is * copied before further operations. Default is true. */ - void recordCopyFromStagingToDevice( - const vk::CommandBuffer& commandBuffer, - bool createBarrier); + void recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer, + bool createBarrier); /** * Records a copy from the internal device memory to the staging memory @@ -138,9 +137,8 @@ class Tensor * @param createBarrier Whether to create a barrier that ensures the data is * copied before further operations. Default is true. */ - void recordCopyFromDeviceToStaging( - const vk::CommandBuffer& commandBuffer, - bool createBarrier); + void recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer, + bool createBarrier); /** * Records the buffer memory barrier into the command buffer which @@ -152,12 +150,11 @@ class Tensor * @param scrStageMask Pipeline stage flags for source stage mask * @param dstStageMask Pipeline stage flags for destination stage mask */ - void recordBufferMemoryBarrier( - const vk::CommandBuffer& commandBuffer, - vk::AccessFlagBits srcAccessMask, - vk::AccessFlagBits dstAccessMask, - vk::PipelineStageFlagBits srcStageMask, - vk::PipelineStageFlagBits dstStageMask); + void recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer, + vk::AccessFlagBits srcAccessMask, + vk::AccessFlagBits dstAccessMask, + vk::PipelineStageFlagBits srcStageMask, + vk::PipelineStageFlagBits dstStageMask); /** * Constructs a vulkan descriptor buffer info which can be used to specify @@ -205,11 +202,11 @@ class Tensor std::shared_ptr memory, vk::MemoryPropertyFlags memoryPropertyFlags); void recordCopyBuffer(const vk::CommandBuffer& commandBuffer, - std::shared_ptr bufferFrom, - std::shared_ptr bufferTo, - vk::DeviceSize bufferSize, - vk::BufferCopy copyRegion, - bool createBarrier); + std::shared_ptr bufferFrom, + std::shared_ptr bufferTo, + vk::DeviceSize bufferSize, + vk::BufferCopy copyRegion, + bool createBarrier); // Private util functions vk::BufferUsageFlags getPrimaryBufferUsageFlags(); diff --git a/test/TestAsyncOperations.cpp b/test/TestAsyncOperations.cpp index da9f8d887..b1919ce52 100644 --- a/test/TestAsyncOperations.cpp +++ b/test/TestAsyncOperations.cpp @@ -84,7 +84,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution) for (uint32_t i = 0; i < numParallel; i++) { inputsAsyncB.push_back(mgr.tensor(data)); - algosAsync.push_back(mgr.algorithm({inputsAsyncB[i]}, spirv)); + algosAsync.push_back(mgr.algorithm({ inputsAsyncB[i] }, spirv)); } std::vector> sqs; @@ -160,8 +160,8 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution) sq1->eval({ tensorA, tensorB }); - std::shared_ptr algo1 = mgr.algorithm({tensorA}, spirv); - std::shared_ptr algo2 = mgr.algorithm({tensorB}, spirv); + std::shared_ptr algo1 = mgr.algorithm({ tensorA }, spirv); + std::shared_ptr algo2 = mgr.algorithm({ tensorB }, spirv); sq1->evalAsync(algo1); sq2->evalAsync(algo2); diff --git a/test/TestDestroy.cpp b/test/TestDestroy.cpp index cf753267e..fee3854c4 100644 --- a/test/TestDestroy.cpp +++ b/test/TestDestroy.cpp @@ -27,12 +27,12 @@ TEST(TestDestroy, TestDestroyTensorSingle) tensorA = mgr.tensor({ 0, 0, 0 }); std::shared_ptr algo = - mgr.algorithm({ tensorA }, spirv); + mgr.algorithm({ tensorA }, spirv); mgr.sequence() - ->record(algo) - ->eval() - ->eval(algo->getTensors()); + ->record(algo) + ->eval() + ->eval(algo->getTensors()); tensorA->destroy(); EXPECT_FALSE(tensorA->isInit()); @@ -68,14 +68,14 @@ TEST(TestDestroy, TestDestroyTensorVector) tensorA = mgr.tensor({ 1, 1, 1 }); tensorB = mgr.tensor({ 1, 1, 1 }); - std::shared_ptr algo = - mgr.algorithm({tensorA, tensorB}, spirv); + std::shared_ptr algo = + mgr.algorithm({ tensorA, tensorB }, spirv); mgr.sequence() - ->record(algo->getTensors()) - ->record(algo) - ->record(algo->getTensors()) - ->eval(); + ->record(algo->getTensors()) + ->record(algo) + ->record(algo->getTensors()) + ->eval(); tensorA->destroy(); tensorB->destroy(); @@ -109,12 +109,13 @@ TEST(TestDestroy, TestDestroySequenceSingle) { kp::Manager mgr; - tensorA = mgr.tensor({0, 0, 0}); + tensorA = mgr.tensor({ 0, 0, 0 }); - sq = mgr.sequence() - ->record({tensorA}) - ->record(mgr.algorithm({tensorA}, spirv)) - ->record({tensorA}) + sq = + mgr.sequence() + ->record({ tensorA }) + ->record(mgr.algorithm({ tensorA }, spirv)) + ->record({ tensorA }) ->eval(); sq->destroy(); @@ -124,4 +125,3 @@ TEST(TestDestroy, TestDestroySequenceSingle) } EXPECT_EQ(tensorA->data(), std::vector({ 1, 1, 1 })); } - diff --git a/test/TestLogisticRegression.cpp b/test/TestLogisticRegression.cpp index f7ad9eda1..980273246 100644 --- a/test/TestLogisticRegression.cpp +++ b/test/TestLogisticRegression.cpp @@ -29,24 +29,27 @@ TEST(TestLogisticRegression, TestMainLogisticRegression) std::shared_ptr lOut = mgr.tensor({ 0, 0, 0, 0, 0 }); std::vector> params = { xI, xJ, y, - wIn, wOutI, wOutJ, - bIn, bOut, lOut }; + wIn, wOutI, wOutJ, + bIn, bOut, lOut }; mgr.sequence()->eval(params); std::vector spirv = std::vector( - (uint32_t*)kp::shader_data::test_shaders_glsl_test_logistic_regression_comp_spv, - (uint32_t*)(kp::shader_data::test_shaders_glsl_test_logistic_regression_comp_spv + - kp::shader_data::test_shaders_glsl_test_logistic_regression_comp_spv_len)); + (uint32_t*)kp::shader_data:: + test_shaders_glsl_test_logistic_regression_comp_spv, + (uint32_t*)(kp::shader_data:: + test_shaders_glsl_test_logistic_regression_comp_spv + + kp::shader_data:: + test_shaders_glsl_test_logistic_regression_comp_spv_len)); - std::shared_ptr algorithm = - mgr.algorithm(params, spirv, kp::Workgroup({5}), kp::Constants({5.0})); + std::shared_ptr algorithm = mgr.algorithm( + params, spirv, kp::Workgroup({ 5 }), kp::Constants({ 5.0 })); std::shared_ptr sq = - mgr.sequence() - ->record({ wIn, bIn }) - ->record(algorithm) - ->record({ wOutI, wOutJ, bOut, lOut }); + mgr.sequence() + ->record({ wIn, bIn }) + ->record(algorithm) + ->record({ wOutI, wOutJ, bOut, lOut }); // Iterate across all expected iterations for (size_t i = 0; i < ITERATIONS; i++) { @@ -90,37 +93,38 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy) std::shared_ptr y = mgr.tensor({ 0, 0, 0, 1, 1 }); - std::shared_ptr wIn = mgr.tensor( - { 0.001, 0.001 }, kp::Tensor::TensorTypes::eHost); + std::shared_ptr wIn = + mgr.tensor({ 0.001, 0.001 }, kp::Tensor::TensorTypes::eHost); std::shared_ptr wOutI = mgr.tensor({ 0, 0, 0, 0, 0 }); std::shared_ptr wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 }); - std::shared_ptr bIn = mgr.tensor( - { 0 }, - kp::Tensor::TensorTypes::eHost); + std::shared_ptr bIn = + mgr.tensor({ 0 }, kp::Tensor::TensorTypes::eHost); std::shared_ptr bOut = mgr.tensor({ 0, 0, 0, 0, 0 }); std::shared_ptr lOut = mgr.tensor({ 0, 0, 0, 0, 0 }); std::vector> params = { xI, xJ, y, - wIn, wOutI, wOutJ, - bIn, bOut, lOut }; + wIn, wOutI, wOutJ, + bIn, bOut, lOut }; mgr.sequence()->record(params)->eval(); std::vector spirv = std::vector( - (uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv, - (uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv + - kp::shader_data::shaders_glsl_logisticregression_comp_spv_len)); + (uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv, + (uint32_t*)(kp::shader_data:: + shaders_glsl_logisticregression_comp_spv + + kp::shader_data:: + shaders_glsl_logisticregression_comp_spv_len)); std::shared_ptr algorithm = - mgr.algorithm(params, spirv, kp::Workgroup(), kp::Constants({5.0})); + mgr.algorithm(params, spirv, kp::Workgroup(), kp::Constants({ 5.0 })); std::shared_ptr sq = - mgr.sequence() - ->record({ wIn, bIn }) - ->record(algorithm) - ->record({ wOutI, wOutJ, bOut, lOut }); + mgr.sequence() + ->record({ wIn, bIn }) + ->record(algorithm) + ->record({ wOutI, wOutJ, bOut, lOut }); // Iterate across all expected iterations for (size_t i = 0; i < ITERATIONS; i++) { @@ -136,18 +140,18 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy) bIn->mapDataIntoHostMemory(); } - // Based on the inputs the outputs should be at least: - // * wi < 0.01 - // * wj > 1.0 - // * b < 0 - // TODO: Add EXPECT_DOUBLE_EQ instead - EXPECT_LT(wIn->data()[0], 0.01); - EXPECT_GT(wIn->data()[1], 1.0); - EXPECT_LT(bIn->data()[0], 0.0); + // Based on the inputs the outputs should be at least: + // * wi < 0.01 + // * wj > 1.0 + // * b < 0 + // TODO: Add EXPECT_DOUBLE_EQ instead + EXPECT_LT(wIn->data()[0], 0.01); + EXPECT_GT(wIn->data()[1], 1.0); + EXPECT_LT(bIn->data()[0], 0.0); - KP_LOG_WARN("Result wIn i: {}, wIn j: {}, bIn: {}", - wIn->data()[0], - wIn->data()[1], - bIn->data()[0]); + KP_LOG_WARN("Result wIn i: {}, wIn j: {}, bIn: {}", + wIn->data()[0], + wIn->data()[1], + bIn->data()[0]); } } diff --git a/test/TestManager.cpp b/test/TestManager.cpp index f87e81159..ce055ff8c 100644 --- a/test/TestManager.cpp +++ b/test/TestManager.cpp @@ -11,13 +11,14 @@ TEST(TestManager, EndToEndOpMultEvalFlow) std::shared_ptr tensorRHS = mgr.tensor({ 2, 4, 6 }); std::shared_ptr tensorOutput = mgr.tensor({ 0, 0, 0 }); - std::vector> params = - { tensorLHS, tensorRHS, tensorOutput }; + std::vector> params = { tensorLHS, + tensorRHS, + tensorOutput }; mgr.sequence() - ->eval(params) - ->eval(params, mgr.algorithm()) - ->eval(params); + ->eval(params) + ->eval(params, mgr.algorithm()) + ->eval(params); EXPECT_EQ(tensorOutput->data(), std::vector({ 0, 4, 12 })); } @@ -30,14 +31,15 @@ TEST(TestManager, EndToEndOpMultSeqFlow) std::shared_ptr tensorRHS = mgr.tensor({ 2, 4, 6 }); std::shared_ptr tensorOutput = mgr.tensor({ 0, 0, 0 }); - std::vector> params = - { tensorLHS, tensorRHS, tensorOutput }; + std::vector> params = { tensorLHS, + tensorRHS, + tensorOutput }; mgr.sequence() - ->record(params) - ->record(params, mgr.algorithm()) - ->record(params) - ->eval(); + ->record(params) + ->record(params, mgr.algorithm()) + ->record(params) + ->eval(); EXPECT_EQ(tensorOutput->data(), std::vector({ 0, 4, 12 })); } @@ -50,8 +52,9 @@ TEST(TestManager, TestMultipleSequences) std::shared_ptr tensorRHS = mgr.tensor({ 2, 4, 6 }); std::shared_ptr tensorOutput = mgr.tensor({ 0, 0, 0 }); - std::vector> params = - { tensorLHS, tensorRHS, tensorOutput }; + std::vector> params = { tensorLHS, + tensorRHS, + tensorOutput }; mgr.sequence()->eval(params); mgr.sequence()->eval(params, mgr.algorithm()); @@ -59,4 +62,3 @@ TEST(TestManager, TestMultipleSequences) EXPECT_EQ(tensorOutput->data(), std::vector({ 0, 4, 12 })); } - diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp index 8be2e6d82..e050e02ea 100644 --- a/test/TestMultipleAlgoExecutions.cpp +++ b/test/TestMultipleAlgoExecutions.cpp @@ -3,9 +3,10 @@ #include "kompute/Kompute.hpp" -TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality) { +TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality) +{ - kp::Manager mgr; + kp::Manager mgr; auto tensorInA = mgr.tensor({ 2., 2., 2. }); auto tensorInB = mgr.tensor({ 1., 2., 3. }); @@ -38,21 +39,24 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality) { } )"); - std::vector> params = {tensorInA, tensorInB, tensorOutA, tensorOutB}; + std::vector> params = { + tensorInA, tensorInB, tensorOutA, tensorOutB + }; - kp::Workgroup workgroup({3, 1, 1}); + kp::Workgroup workgroup({ 3, 1, 1 }); kp::Constants specConsts({ 2 }); kp::Constants pushConstsA({ 2.0 }); kp::Constants pushConstsB({ 3.0 }); - auto algorithm = mgr.algorithm(params, kp::Shader::compile_source(shader), workgroup, specConsts); + auto algorithm = mgr.algorithm( + params, kp::Shader::compile_source(shader), workgroup, specConsts); // 3. Run operation with string shader synchronously mgr.sequence() - ->record(params) - ->record(algorithm, pushConstsA) - ->record(algorithm, pushConstsB) - ->eval(); + ->record(params) + ->record(algorithm, pushConstsA) + ->record(algorithm, pushConstsB) + ->eval(); auto sq = mgr.sequence(); sq->evalAsync(params); @@ -83,12 +87,12 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord) { mgr.sequence() - ->record({ tensorA }) - ->record(mgr.algorithm({tensorA}, spirv)) - ->record(mgr.algorithm({tensorA}, spirv)) - ->record(mgr.algorithm({tensorA}, spirv)) - ->record({ tensorA }) - ->eval(); + ->record({ tensorA }) + ->record(mgr.algorithm({ tensorA }, spirv)) + ->record(mgr.algorithm({ tensorA }, spirv)) + ->record(mgr.algorithm({ tensorA }, spirv)) + ->record({ tensorA }) + ->eval(); } EXPECT_EQ(tensorA->data(), std::vector({ 3, 3, 3 })); @@ -111,29 +115,20 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords) std::vector spirv = kp::Shader::compile_source(shader); - std::shared_ptr algorithm = mgr.algorithm({tensorA}, spirv); + std::shared_ptr algorithm = + mgr.algorithm({ tensorA }, spirv); std::shared_ptr sq = mgr.sequence(); - mgr.sequence() - ->record({ tensorA }) - ->eval(); + mgr.sequence()->record({ tensorA })->eval(); - mgr.sequence() - ->record(algorithm) - ->eval(); + mgr.sequence()->record(algorithm)->eval(); - mgr.sequence() - ->record(algorithm) - ->eval(); + mgr.sequence()->record(algorithm)->eval(); - mgr.sequence() - ->record(algorithm) - ->eval(); + mgr.sequence()->record(algorithm)->eval(); - mgr.sequence() - ->record({ tensorA }) - ->eval(); + mgr.sequence()->record({ tensorA })->eval(); EXPECT_EQ(tensorA->data(), std::vector({ 3, 3, 3 })); } @@ -156,23 +151,20 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) std::vector spirv = kp::Shader::compile_source(shader); - std::shared_ptr algorithm = mgr.algorithm({tensorA}, spirv); + std::shared_ptr algorithm = + mgr.algorithm({ tensorA }, spirv); std::shared_ptr sq = mgr.sequence(); sq->record({ tensorA })->eval(); - sq->record(algorithm) - ->eval(); + sq->record(algorithm)->eval(); - sq->record(algorithm) - ->eval(); + sq->record(algorithm)->eval(); - sq->record(algorithm) - ->eval(); + sq->record(algorithm)->eval(); - sq->record({ tensorA }) - ->eval(); + sq->record({ tensorA })->eval(); EXPECT_EQ(tensorA->data(), std::vector({ 3, 3, 3 })); } @@ -194,24 +186,20 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval) std::vector spirv = kp::Shader::compile_source(shader); - std::shared_ptr algorithm = mgr.algorithm({tensorA}, spirv); + std::shared_ptr algorithm = + mgr.algorithm({ tensorA }, spirv); std::shared_ptr sq = mgr.sequence(); sq->record({ tensorA })->eval(); - sq->record(algorithm) - ->eval() - ->eval() - ->eval(); + sq->record(algorithm)->eval()->eval()->eval(); - sq->record({ tensorA }) - ->eval(); + sq->record({ tensorA })->eval(); EXPECT_EQ(tensorA->data(), std::vector({ 3, 3, 3 })); } - TEST(TestMultipleAlgoExecutions, SequenceAlgoDestroyOutsideManagerScope) { std::shared_ptr tensorA = nullptr; @@ -234,22 +222,18 @@ TEST(TestMultipleAlgoExecutions, SequenceAlgoDestroyOutsideManagerScope) std::vector spirv = kp::Shader::compile_source(shader); - std::shared_ptr algorithm = mgr.algorithm({tensorA}, spirv); + std::shared_ptr algorithm = + mgr.algorithm({ tensorA }, spirv); sq = mgr.sequence(); sq->record({ tensorA })->eval(); - sq->record(algorithm) - ->eval() - ->eval() - ->eval(); + sq->record(algorithm)->eval()->eval()->eval(); - sq->record({ tensorA }) - ->eval(); + sq->record({ tensorA })->eval(); } } EXPECT_EQ(tensorA->data(), std::vector({ 3, 3, 3 })); } - diff --git a/test/TestOpShadersFromStringAndFile.cpp b/test/TestOpShadersFromStringAndFile.cpp index 1a1c5c599..3e6856a21 100644 --- a/test/TestOpShadersFromStringAndFile.cpp +++ b/test/TestOpShadersFromStringAndFile.cpp @@ -32,10 +32,9 @@ TEST(TestOpAlgoCreate, ShaderRawDataFromConstructor) std::vector> params = { tensorA, tensorB }; mgr.sequence() - ->eval(params) - ->eval(mgr.algorithm(params, spirv)) - ->eval(params); - + ->eval(params) + ->eval(mgr.algorithm(params, spirv)) + ->eval(params); EXPECT_EQ(tensorA->data(), std::vector({ 0, 1, 2 })); EXPECT_EQ(tensorB->data(), std::vector({ 3, 4, 5 })); @@ -48,27 +47,27 @@ TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor) std::shared_ptr tensorA = mgr.tensor({ 3, 4, 5 }); std::shared_ptr tensorB = mgr.tensor({ 0, 0, 0 }); - std::vector spirv = - std::vector( - (uint32_t*)kp::shader_data::test_shaders_glsl_test_op_custom_shader_comp_spv, - (uint32_t*)(kp::shader_data::test_shaders_glsl_test_op_custom_shader_comp_spv + - kp::shader_data:: - test_shaders_glsl_test_op_custom_shader_comp_spv_len)); + std::vector spirv = std::vector( + (uint32_t*) + kp::shader_data::test_shaders_glsl_test_op_custom_shader_comp_spv, + (uint32_t*)(kp::shader_data:: + test_shaders_glsl_test_op_custom_shader_comp_spv + + kp::shader_data:: + test_shaders_glsl_test_op_custom_shader_comp_spv_len)); std::vector> params = { tensorA, tensorB }; mgr.sequence() - ->eval(params) - ->eval(mgr.algorithm(params, spirv)) - ->eval(params); - + ->eval(params) + ->eval(mgr.algorithm(params, spirv)) + ->eval(params); EXPECT_EQ(tensorA->data(), std::vector({ 0, 1, 2 })); EXPECT_EQ(tensorB->data(), std::vector({ 3, 4, 5 })); } // TODO: Add support to read from file for shader -//TEST(TestOpAlgoCreate, ShaderCompiledDataFromFile) +// TEST(TestOpAlgoCreate, ShaderCompiledDataFromFile) //{ // kp::Manager mgr; // @@ -77,7 +76,8 @@ TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor) // mgr.rebuild({ tensorA, tensorB }); // // mgr.evalOpDefault( -// { tensorA, tensorB }, "test/shaders/glsl/test_op_custom_shader.comp.spv"); +// { tensorA, tensorB }, +// "test/shaders/glsl/test_op_custom_shader.comp.spv"); // // mgr.evalOpDefault({ tensorA, tensorB }); // diff --git a/test/TestOpTensorCopy.cpp b/test/TestOpTensorCopy.cpp index dc82a2e50..85e0b545b 100644 --- a/test/TestOpTensorCopy.cpp +++ b/test/TestOpTensorCopy.cpp @@ -18,9 +18,9 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensor) EXPECT_TRUE(tensorB->isInit()); mgr.sequence() - ->eval({ tensorA, tensorB }) - ->eval({ tensorA, tensorB }) - ->eval({ tensorA, tensorB }); + ->eval({ tensorA, tensorB }) + ->eval({ tensorA, tensorB }) + ->eval({ tensorA, tensorB }); // Making sure the GPU holds the same data EXPECT_EQ(tensorA->data(), tensorB->data()); @@ -44,15 +44,14 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti) EXPECT_TRUE(tensorC->isInit()); mgr.sequence() - ->eval({tensorA, tensorB, tensorC}) - ->eval({tensorA, tensorB, tensorC }); + ->eval({ tensorA, tensorB, tensorC }) + ->eval({ tensorA, tensorB, tensorC }); EXPECT_EQ(tensorA->data(), tensorB->data()); EXPECT_EQ(tensorA->data(), tensorC->data()); // Making sure the GPU holds the same data - mgr.sequence() - ->eval({ tensorB, tensorC }); + mgr.sequence()->eval({ tensorB, tensorC }); EXPECT_EQ(tensorA->data(), tensorB->data()); EXPECT_EQ(tensorA->data(), tensorC->data()); @@ -67,8 +66,8 @@ TEST(TestOpTensorCopy, CopyDeviceToHostTensor) std::vector testVecB{ 0, 0, 0 }; std::shared_ptr tensorA = mgr.tensor(testVecA); - std::shared_ptr tensorB = mgr.tensor( - testVecB, kp::Tensor::TensorTypes::eHost); + std::shared_ptr tensorB = + mgr.tensor(testVecB, kp::Tensor::TensorTypes::eHost); // Only calling sync on device type tensor mgr.sequence()->eval({ tensorA }); @@ -93,8 +92,8 @@ TEST(TestOpTensorCopy, CopyHostToDeviceTensor) std::vector testVecA{ 4, 5, 6 }; std::vector testVecB{ 0, 0, 0 }; - std::shared_ptr tensorA = mgr.tensor( - testVecA, kp::Tensor::TensorTypes::eHost); + std::shared_ptr tensorA = + mgr.tensor(testVecA, kp::Tensor::TensorTypes::eHost); std::shared_ptr tensorB = mgr.tensor(testVecB); // Only calling sync on device type tensor @@ -120,17 +119,17 @@ TEST(TestOpTensorCopy, CopyHostToHostTensor) std::vector testVecA{ 5, 6, 7 }; std::vector testVecB{ 0, 0, 0 }; - std::shared_ptr tensorA = mgr.tensor( - testVecA, kp::Tensor::TensorTypes::eHost); - std::shared_ptr tensorB = mgr.tensor( - testVecB, kp::Tensor::TensorTypes::eHost); + std::shared_ptr tensorA = + mgr.tensor(testVecA, kp::Tensor::TensorTypes::eHost); + std::shared_ptr tensorB = + mgr.tensor(testVecB, kp::Tensor::TensorTypes::eHost); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); mgr.sequence() - ->eval({ tensorA }) - ->eval({ tensorA, tensorB }); + ->eval({ tensorA }) + ->eval({ tensorA, tensorB }); EXPECT_EQ(tensorA->data(), tensorB->data()); @@ -146,8 +145,8 @@ TEST(TestOpTensorCopy, SingleTensorShouldFail) std::vector testVecA{ 6, 7, 8 }; - std::shared_ptr tensorA = mgr.tensor( - testVecA, kp::Tensor::TensorTypes::eHost); + std::shared_ptr tensorA = + mgr.tensor(testVecA, kp::Tensor::TensorTypes::eHost); EXPECT_TRUE(tensorA->isInit()); diff --git a/test/TestPushConstant.cpp b/test/TestPushConstant.cpp index 78ba57c00..ae8cf4a32 100644 --- a/test/TestPushConstant.cpp +++ b/test/TestPushConstant.cpp @@ -32,16 +32,18 @@ TEST(TestPushConstants, TestTwoConstants) std::shared_ptr tensor = mgr.tensor({ 0, 0, 0 }); std::shared_ptr algo = - mgr.algorithm({tensor}, spirv, kp::Workgroup({1})); + mgr.algorithm({ tensor }, spirv, kp::Workgroup({ 1 })); sq = mgr.sequence() - ->record({tensor}) - ->record(algo, kp::Constants{0.1, 0.2, 0.3}) - ->record(algo, kp::Constants{0.3, 0.2, 0.1}) - ->record({tensor}) - ->eval(); + ->record({ tensor }) + ->record(algo, + kp::Constants{ 0.1, 0.2, 0.3 }) + ->record(algo, + kp::Constants{ 0.3, 0.2, 0.1 }) + ->record({ tensor }) + ->eval(); - EXPECT_EQ(tensor->data(), kp::Constants({0.4, 0.4, 0.4})); + EXPECT_EQ(tensor->data(), kp::Constants({ 0.4, 0.4, 0.4 })); } } } diff --git a/test/TestSequence.cpp b/test/TestSequence.cpp index 7a8dd07d1..4d0233694 100644 --- a/test/TestSequence.cpp +++ b/test/TestSequence.cpp @@ -17,4 +17,3 @@ TEST(TestSequence, SequenceDestructorViaManager) EXPECT_FALSE(sq->isInit()); } - diff --git a/test/TestSpecializationConstant.cpp b/test/TestSpecializationConstant.cpp index d1e4cdc9e..e66f9d52e 100644 --- a/test/TestSpecializationConstant.cpp +++ b/test/TestSpecializationConstant.cpp @@ -28,17 +28,19 @@ TEST(TestSpecializationConstants, TestTwoConstants) std::shared_ptr tensorA = mgr.tensor({ 0, 0, 0 }); std::shared_ptr tensorB = mgr.tensor({ 0, 0, 0 }); - std::vector> params = {tensorA, tensorB}; + std::vector> params = { tensorA, + tensorB }; - kp::Constants spec = kp::Constants({5.0, 0.3}); + kp::Constants spec = kp::Constants({ 5.0, 0.3 }); - std::shared_ptr algo = mgr.algorithm(params, spirv, {}, spec); + std::shared_ptr algo = + mgr.algorithm(params, spirv, {}, spec); sq = mgr.sequence() - ->record(params) - ->record(algo) - ->record(params) - ->eval(); + ->record(params) + ->record(algo) + ->record(params) + ->eval(); EXPECT_EQ(tensorA->data(), std::vector({ 5, 5, 5 })); EXPECT_EQ(tensorB->data(), std::vector({ 0.3, 0.3, 0.3 })); diff --git a/test/TestTensor.cpp b/test/TestTensor.cpp index 76ecbe60d..d33367722 100644 --- a/test/TestTensor.cpp +++ b/test/TestTensor.cpp @@ -11,4 +11,3 @@ TEST(TestTensor, ConstructorData) EXPECT_EQ(tensor->size(), vec.size()); EXPECT_EQ(tensor->data(), vec); } - diff --git a/test/TestWorkgroup.cpp b/test/TestWorkgroup.cpp index 5da0a4c54..3eb9147a1 100644 --- a/test/TestWorkgroup.cpp +++ b/test/TestWorkgroup.cpp @@ -18,16 +18,21 @@ TEST(TestWorkgroup, TestSimpleWorkgroup) tensorA = mgr.tensor(std::vector(16 * 8)); tensorB = mgr.tensor(std::vector(16 * 8)); - std::vector> params = {tensorA, tensorB}; + std::vector> params = { tensorA, + tensorB }; std::vector spirv( - (uint32_t*)kp::shader_data::test_shaders_glsl_test_workgroup_comp_spv, - (uint32_t*)(kp::shader_data::test_shaders_glsl_test_workgroup_comp_spv + - kp::shader_data::test_shaders_glsl_test_workgroup_comp_spv_len)); + (uint32_t*) + kp::shader_data::test_shaders_glsl_test_workgroup_comp_spv, + (uint32_t*)(kp::shader_data:: + test_shaders_glsl_test_workgroup_comp_spv + + kp::shader_data:: + test_shaders_glsl_test_workgroup_comp_spv_len)); - kp::Workgroup workgroup = {16, 8, 1}; + kp::Workgroup workgroup = { 16, 8, 1 }; - std::shared_ptr algorithm = mgr.algorithm(params, spirv, workgroup); + std::shared_ptr algorithm = + mgr.algorithm(params, spirv, workgroup); sq = mgr.sequence(); sq->record(params); @@ -37,11 +42,26 @@ TEST(TestWorkgroup, TestSimpleWorkgroup) } } - std::vector expectedA = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15}; + std::vector expectedA = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, + 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, + 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, + 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15 + }; - std::vector expectedB = { 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 }; + std::vector expectedB = { + 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, + 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, + 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, + 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, + 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, + 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 + }; EXPECT_EQ(tensorA->data(), expectedA); EXPECT_EQ(tensorB->data(), expectedB); } -