diff --git a/docs/overview/advanced-examples.rst b/docs/overview/advanced-examples.rst index 90066e8cc..0ddf3e32a 100644 --- a/docs/overview/advanced-examples.rst +++ b/docs/overview/advanced-examples.rst @@ -55,7 +55,7 @@ The example below shows how you can enable the "VK_EXT_shader_atomic_float" exte atomicAdd(pa[2], pcs.z); })"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); std::shared_ptr sq = nullptr; @@ -102,7 +102,7 @@ We also provide tools that allow you to `convert shaders into C++ headers spirv = kp::Shader::compile_source(R"( + std::vector spirv = kp::Shader::compileSource(R"( #version 450 layout(set = 0, binding = 0) buffer tensorLhs { @@ -215,7 +215,7 @@ In this case we create a shader that should take a couple of milliseconds to run } )"); - auto algo = mgr.algorithm({tensor}, kp::Shader::compile_source(shader)); + auto algo = mgr.algorithm({tensor}, kp::Shader::compileSource(shader)); Now we are able to run the await function on the default sequence. @@ -361,7 +361,7 @@ Similar to the asyncrhonous usecase above, we can still run synchronous commands } )"); - std::vector spirv = kp::Shader::compile_source(shader); + std::vector spirv = kp::Shader::compileSource(shader); std::shared_ptr algo = mgr.algorithm({tensorA, tenssorB}, spirv); diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp index d5263628b..1b34b35e6 100644 --- a/src/Algorithm.cpp +++ b/src/Algorithm.cpp @@ -20,7 +20,8 @@ Algorithm::Algorithm(std::shared_ptr device, "spirv size: {}", tensors.size(), spirv.size()); - this->rebuild(tensors, spirv, workgroup, specializationConstants, pushConstants); + this->rebuild( + tensors, spirv, workgroup, specializationConstants, pushConstants); } else { KP_LOG_INFO("Kompute Algorithm constructor with empty tensors and or " "spirv so not rebuilding vulkan components"); @@ -425,15 +426,18 @@ Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize) } void -Algorithm::setPush(const Constants& pushConstants) { +Algorithm::setPush(const Constants& pushConstants) +{ - if (pushConstants.size() != this->mPushConstants.size()) { - throw std::runtime_error(fmt::format("Kompute Algorithm push " - "constant provided is size {} but expected size {}", - pushConstants.size(), this->mPushConstants.size())); - } + if (pushConstants.size() != this->mPushConstants.size()) { + throw std::runtime_error( + fmt::format("Kompute Algorithm push " + "constant provided is size {} but expected size {}", + pushConstants.size(), + this->mPushConstants.size())); + } - this->mPushConstants = pushConstants; + this->mPushConstants = pushConstants; } const Workgroup& @@ -449,7 +453,8 @@ Algorithm::getSpecializationConstants() } const Constants& -Algorithm::getPush() { +Algorithm::getPush() +{ return this->mPushConstants; } diff --git a/src/Manager.cpp b/src/Manager.cpp index 5d6bf4cd4..cdc896332 100644 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -1,8 +1,8 @@ -#include -#include -#include #include +#include +#include +#include #include "kompute/Manager.hpp" @@ -39,7 +39,8 @@ Manager::Manager(uint32_t physicalDeviceIndex, this->mManageResources = true; this->createInstance(); - this->createDevice(familyQueueIndices, physicalDeviceIndex, desiredExtensions); + this->createDevice( + familyQueueIndices, physicalDeviceIndex, desiredExtensions); } Manager::Manager(std::shared_ptr instance, @@ -177,7 +178,8 @@ Manager::createInstance() }; std::vector envLayerNames; const char* envLayerNamesVal = std::getenv("KOMPUTE_ENV_DEBUG_LAYERS"); - KP_LOG_DEBUG("Kompute Manager adding environment layers: {}", envLayerNamesVal); + KP_LOG_DEBUG("Kompute Manager adding environment layers: {}", + envLayerNamesVal); if (envLayerNamesVal != NULL && *envLayerNamesVal != '\0') { std::istringstream iss(envLayerNamesVal); std::istream_iterator beg(iss), end; @@ -206,13 +208,15 @@ Manager::createInstance() } if (validLayerNames.size() > 0) { - KP_LOG_DEBUG("Kompute Manager Initializing instance with valid layers: {}", validLayerNames); + KP_LOG_DEBUG( + "Kompute Manager Initializing instance with valid layers: {}", + validLayerNames); computeInstanceCreateInfo.enabledLayerCount = (uint32_t)validLayerNames.size(); computeInstanceCreateInfo.ppEnabledLayerNames = validLayerNames.data(); - } - else { - KP_LOG_WARN("Kompute Manager no valid layer names found from desired layer names"); + } else { + KP_LOG_WARN("Kompute Manager no valid layer names found from desired " + "layer names"); } #endif #endif @@ -347,16 +351,19 @@ Manager::createDevice(const std::vector& familyQueueIndices, deviceQueueCreateInfos.push_back(deviceQueueCreateInfo); } - KP_LOG_DEBUG("Kompute Manager desired extension layers {}", desiredExtensions); + KP_LOG_DEBUG("Kompute Manager desired extension layers {}", + desiredExtensions); - std::vector deviceExtensions = this->mPhysicalDevice->enumerateDeviceExtensionProperties(); + std::vector deviceExtensions = + this->mPhysicalDevice->enumerateDeviceExtensionProperties(); std::set uniqueExtensionNames; for (const vk::ExtensionProperties& ext : deviceExtensions) { std::string extName(ext.extensionName.data()); uniqueExtensionNames.insert(extName); } - KP_LOG_DEBUG("Kompute Manager available extensions {}", uniqueExtensionNames); + KP_LOG_DEBUG("Kompute Manager available extensions {}", + uniqueExtensionNames); std::vector validExtensions; for (std::string ext : desiredExtensions) { if (uniqueExtensionNames.count(ext) != 0) { @@ -364,7 +371,8 @@ Manager::createDevice(const std::vector& familyQueueIndices, } } if (desiredExtensions.size() != validExtensions.size()) { - KP_LOG_ERROR("Kompute Manager not all extensions were added: {}", validExtensions); + KP_LOG_ERROR("Kompute Manager not all extensions were added: {}", + validExtensions); } vk::DeviceCreateInfo deviceCreateInfo(vk::DeviceCreateFlags(), @@ -406,7 +414,12 @@ Manager::algorithm(const std::vector>& tensors, KP_LOG_DEBUG("Kompute Manager algorithm creation triggered"); std::shared_ptr algorithm{ new kp::Algorithm( - this->mDevice, tensors, spirv, workgroup, specializationConstants, pushConstants) }; + this->mDevice, + tensors, + spirv, + workgroup, + specializationConstants, + pushConstants) }; if (this->mManageResources) { this->mManagedAlgorithms.push_back(algorithm); diff --git a/src/OpTensorCopy.cpp b/src/OpTensorCopy.cpp index c93830902..33b9eb838 100644 --- a/src/OpTensorCopy.cpp +++ b/src/OpTensorCopy.cpp @@ -18,13 +18,16 @@ OpTensorCopy::OpTensorCopy(const std::vector>& tensors) uint32_t size = this->mTensors[0]->size(); for (const std::shared_ptr& tensor : tensors) { if (tensor->dataType() != dataType) { - throw std::runtime_error(fmt::format("Attempting to copy tensors of different types from {} to {}", - dataType, tensor->dataType())); + throw std::runtime_error(fmt::format( + "Attempting to copy tensors of different types from {} to {}", + dataType, + tensor->dataType())); } if (tensor->size() != size) { - throw std::runtime_error(fmt::format("Attempting to copy tensors of different sizes from {} to {}", - size, tensor->size())); - + throw std::runtime_error(fmt::format( + "Attempting to copy tensors of different sizes from {} to {}", + size, + tensor->size())); } } } diff --git a/src/OpTensorSyncDevice.cpp b/src/OpTensorSyncDevice.cpp index 4dbfaec83..cd887c148 100644 --- a/src/OpTensorSyncDevice.cpp +++ b/src/OpTensorSyncDevice.cpp @@ -40,7 +40,6 @@ void OpTensorSyncDevice::preEval(const vk::CommandBuffer& commandBuffer) { KP_LOG_DEBUG("Kompute OpTensorSyncDevice preEval called"); - } void diff --git a/src/Sequence.cpp b/src/Sequence.cpp index 6e379eb92..4db458288 100644 --- a/src/Sequence.cpp +++ b/src/Sequence.cpp @@ -18,8 +18,9 @@ Sequence::Sequence(std::shared_ptr physicalDevice, this->createCommandPool(); this->createCommandBuffer(); - if(totalTimestamps>0) - this->createTimestampQueryPool(totalTimestamps+1); //+1 for the first one + if (totalTimestamps > 0) + this->createTimestampQueryPool(totalTimestamps + + 1); //+1 for the first one } Sequence::~Sequence() @@ -48,12 +49,12 @@ Sequence::begin() this->mCommandBuffer->begin(vk::CommandBufferBeginInfo()); this->mRecording = true; - //latch the first timestamp before any commands are submitted - if(this->timestampQueryPool) + // latch the first timestamp before any commands are submitted + if (this->timestampQueryPool) this->mCommandBuffer->writeTimestamp( - vk::PipelineStageFlagBits::eAllCommands, - *this->timestampQueryPool, 0 - ); + vk::PipelineStageFlagBits::eAllCommands, + *this->timestampQueryPool, + 0); } void @@ -246,12 +247,12 @@ Sequence::destroy() this->mOperations.clear(); } - if(this->timestampQueryPool){ + if (this->timestampQueryPool) { KP_LOG_INFO("Destroying QueryPool"); this->mDevice->destroy( - *this->timestampQueryPool, - (vk::Optional)nullptr); - + *this->timestampQueryPool, + (vk::Optional)nullptr); + this->timestampQueryPool = nullptr; KP_LOG_DEBUG("Kompute Sequence Destroyed QueryPool"); } @@ -281,12 +282,12 @@ Sequence::record(std::shared_ptr op) this->mOperations.push_back(op); - if(this->timestampQueryPool) - this->mCommandBuffer->writeTimestamp( - vk::PipelineStageFlagBits::eAllCommands, - *this->timestampQueryPool, this->mOperations.size() - ); - + if (this->timestampQueryPool) + this->mCommandBuffer->writeTimestamp( + vk::PipelineStageFlagBits::eAllCommands, + *this->timestampQueryPool, + this->mOperations.size()); + return shared_from_this(); } @@ -339,7 +340,8 @@ Sequence::createTimestampQueryPool(uint32_t totalTimestamps) { KP_LOG_DEBUG("Kompute Sequence creating query pool"); if (!this->isInit()) { - throw std::runtime_error("createTimestampQueryPool() called on uninitialized Sequence"); + throw std::runtime_error( + "createTimestampQueryPool() called on uninitialized Sequence"); } if (!this->mPhysicalDevice) { throw std::runtime_error("Kompute Sequence physical device is null"); @@ -347,16 +349,16 @@ Sequence::createTimestampQueryPool(uint32_t totalTimestamps) vk::PhysicalDeviceProperties physicalDeviceProperties = this->mPhysicalDevice->getProperties(); - - if(physicalDeviceProperties.limits.timestampComputeAndGraphics){ + + if (physicalDeviceProperties.limits.timestampComputeAndGraphics) { vk::QueryPoolCreateInfo queryPoolInfo; queryPoolInfo.setQueryCount(totalTimestamps); queryPoolInfo.setQueryType(vk::QueryType::eTimestamp); - this->timestampQueryPool = std::make_shared(this->mDevice->createQueryPool(queryPoolInfo)); + this->timestampQueryPool = std::make_shared( + this->mDevice->createQueryPool(queryPoolInfo)); KP_LOG_DEBUG("Query pool for timestamps created"); - } - else{ + } else { throw std::runtime_error("Device does not support timestamps"); } } @@ -364,14 +366,19 @@ Sequence::createTimestampQueryPool(uint32_t totalTimestamps) std::vector Sequence::getTimestamps() { - if(!this->timestampQueryPool) + if (!this->timestampQueryPool) throw std::runtime_error("Timestamp latching not enabled"); - - const auto n = this->mOperations.size()+1; + + const auto n = this->mOperations.size() + 1; std::vector timestamps(n, 0); - this->mDevice->getQueryPoolResults(*this->timestampQueryPool, - 0, n, timestamps.size()*sizeof(std::uint64_t), timestamps.data(), - sizeof(uint64_t), vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait); + this->mDevice->getQueryPoolResults( + *this->timestampQueryPool, + 0, + n, + timestamps.size() * sizeof(std::uint64_t), + timestamps.data(), + sizeof(uint64_t), + vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait); return timestamps; } diff --git a/src/Shader.cpp b/src/Shader.cpp index bedac0165..293752a9a 100644 --- a/src/Shader.cpp +++ b/src/Shader.cpp @@ -99,10 +99,10 @@ Shader::compileSource( const TBuiltInResource& resource) { return compileSources({ source }, - std::vector({}), - entryPoint, - definitions, - resource); + std::vector({}), + entryPoint, + definitions, + resource); } const TBuiltInResource Shader::defaultResource = { diff --git a/src/Tensor.cpp b/src/Tensor.cpp index 947714693..90c21fc8a 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -64,13 +64,10 @@ Tensor::tensorType() bool Tensor::isInit() { - return this->mDevice - && this->mPrimaryBuffer - && this->mPrimaryMemory - && this->mRawData; + return this->mDevice && this->mPrimaryBuffer && this->mPrimaryMemory && + this->mRawData; } - void Tensor::recordCopyFrom(const vk::CommandBuffer& commandBuffer, std::shared_ptr copyFromTensor, @@ -175,7 +172,8 @@ Tensor::recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer, vk::DescriptorBufferInfo Tensor::constructDescriptorBufferInfo() { - KP_LOG_DEBUG("Kompute Tensor construct descriptor buffer info size {}", this->memorySize()); + KP_LOG_DEBUG("Kompute Tensor construct descriptor buffer info size {}", + this->memorySize()); vk::DeviceSize bufferSize = this->memorySize(); return vk::DescriptorBufferInfo(*this->mPrimaryBuffer, 0, // offset @@ -213,7 +211,7 @@ Tensor::getPrimaryMemoryPropertyFlags() break; case TensorTypes::eHost: return vk::MemoryPropertyFlagBits::eHostVisible | - vk::MemoryPropertyFlagBits::eHostCoherent; + vk::MemoryPropertyFlagBits::eHostCoherent; break; case TensorTypes::eStorage: return vk::MemoryPropertyFlagBits::eDeviceLocal; @@ -363,7 +361,8 @@ Tensor::destroy() { KP_LOG_DEBUG("Kompute Tensor started destroy()"); - // Setting raw data to null regardless whether device is available to invalidate Tensor + // Setting raw data to null regardless whether device is available to + // invalidate Tensor this->mRawData = nullptr; this->mSize = 0; this->mDataTypeMemorySize = 0; @@ -442,31 +441,36 @@ Tensor::destroy() template<> Tensor::TensorDataTypes -TensorT::dataType() { +TensorT::dataType() +{ return Tensor::TensorDataTypes::eBool; } template<> Tensor::TensorDataTypes -TensorT::dataType() { +TensorT::dataType() +{ return Tensor::TensorDataTypes::eInt; } template<> Tensor::TensorDataTypes -TensorT::dataType() { +TensorT::dataType() +{ return Tensor::TensorDataTypes::eUnsignedInt; } template<> Tensor::TensorDataTypes -TensorT::dataType() { +TensorT::dataType() +{ return Tensor::TensorDataTypes::eFloat; } template<> Tensor::TensorDataTypes -TensorT::dataType() { +TensorT::dataType() +{ return Tensor::TensorDataTypes::eDouble; } diff --git a/src/include/kompute/Algorithm.hpp b/src/include/kompute/Algorithm.hpp index fae9cfd4b..ecbd36aa6 100644 --- a/src/include/kompute/Algorithm.hpp +++ b/src/include/kompute/Algorithm.hpp @@ -18,15 +18,17 @@ class Algorithm * the underlying resources. * * @param device The Vulkan device to use for creating resources - * @param tensors (optional) The tensors to use to create the descriptor resources + * @param tensors (optional) The tensors to use to create the descriptor + * resources * @param spirv (optional) The spirv code to use to create the algorithm - * @param workgroup (optional) The kp::Workgroup to use for the dispatch which defaults to - * kp::Workgroup(tensor[0].size(), 1, 1) if not set. - * @param specializationConstants (optional) The kp::Constants to use to initialize - * the specialization constants which cannot be changed once set. - * @param pushConstants (optional) The kp::Constants to use when initializing the - * pipeline, which set the size of the push constants - these can be modified but - * all new values must have the same vector size as this initial value. + * @param workgroup (optional) The kp::Workgroup to use for the dispatch + * which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set. + * @param specializationConstants (optional) The kp::Constants to use to + * initialize the specialization constants which cannot be changed once set. + * @param pushConstants (optional) The kp::Constants to use when + * initializing the pipeline, which set the size of the push constants - + * these can be modified but all new values must have the same vector size + * as this initial value. */ Algorithm(std::shared_ptr device, const std::vector>& tensors = {}, @@ -36,18 +38,19 @@ class Algorithm const Constants& pushConstants = {}); /** - * Rebuild function to reconstruct algorithm with configuration parameters to create - * the underlying resources. + * Rebuild function to reconstruct algorithm with configuration parameters + * to create the underlying resources. * * @param tensors The tensors to use to create the descriptor resources * @param spirv The spirv code to use to create the algorithm - * @param workgroup (optional) The kp::Workgroup to use for the dispatch which defaults to - * kp::Workgroup(tensor[0].size(), 1, 1) if not set. - * @param specializationConstants (optional) The kp::Constants to use to initialize - * the specialization constants which cannot be changed once set. - * @param pushConstants (optional) The kp::Constants to use when initializing the - * pipeline, which set the size of the push constants - these can be modified but - * all new values must have the same vector size as this initial value. + * @param workgroup (optional) The kp::Workgroup to use for the dispatch + * which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set. + * @param specializationConstants (optional) The kp::Constants to use to + * initialize the specialization constants which cannot be changed once set. + * @param pushConstants (optional) The kp::Constants to use when + * initializing the pipeline, which set the size of the push constants - + * these can be modified but all new values must have the same vector size + * as this initial value. */ void rebuild(const std::vector>& tensors, const std::vector& spirv, @@ -70,25 +73,26 @@ class Algorithm void recordDispatch(const vk::CommandBuffer& commandBuffer); /** - * Records command that binds the "core" algorithm components which consist of - * binding the pipeline and binding the descriptorsets. + * Records command that binds the "core" algorithm components which consist + * of binding the pipeline and binding the descriptorsets. * * @param commandBuffer Command buffer to record the algorithm resources to */ void recordBindCore(const vk::CommandBuffer& commandBuffer); /** - * Records command that binds the push constants to the command buffer provided - * - it is required that the pushConstants provided are of the same size as the - * ones provided during initialization. + * Records command that binds the push constants to the command buffer + * provided + * - it is required that the pushConstants provided are of the same size as + * the ones provided during initialization. * * @param commandBuffer Command buffer to record the algorithm resources to */ void recordBindPush(const vk::CommandBuffer& commandBuffer); /** - * function that checks all the gpu resource components to verify if these have - * been created and returns true if all are valid. + * function that checks all the gpu resource components to verify if these + * have been created and returns true if all are valid. * * @returns returns true if the algorithm is currently initialized. */ @@ -97,26 +101,28 @@ class Algorithm /** * Sets the work group to use in the recordDispatch * - * @param workgroup The kp::Workgroup value to use to update the algorithm. It - * must have a value greater than 1 on the x value (index 1) otherwise it will - * be initialized on the size of the first tensor (ie. this->mTensor[0]->size()) + * @param workgroup The kp::Workgroup value to use to update the algorithm. + * It must have a value greater than 1 on the x value (index 1) otherwise it + * will be initialized on the size of the first tensor (ie. + * this->mTensor[0]->size()) */ void setWorkgroup(const Workgroup& workgroup, uint32_t minSize = 1); /** - * Sets the push constants to the new value provided to use in the next bindPush() + * Sets the push constants to the new value provided to use in the next + * bindPush() * - * @param The kp::Constant to use to set the push constants to use in the next - * bindPush(...) calls. The constants provided must be of the same size as the - * ones created during initialization. + * @param The kp::Constant to use to set the push constants to use in the + * next bindPush(...) calls. The constants provided must be of the same size + * as the ones created during initialization. */ void setPush(const Constants& pushConstants); /** * Gets the current workgroup from the algorithm. * - * @param The kp::Constant to use to set the push constants to use in the next - * bindPush(...) calls. The constants provided must be of the same size as the - * ones created during initialization. + * @param The kp::Constant to use to set the push constants to use in the + * next bindPush(...) calls. The constants provided must be of the same size + * as the ones created during initialization. */ const Workgroup& getWorkgroup(); /** diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index 6eb2042eb..6b06b83fd 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -24,13 +24,14 @@ class Manager Manager(); /** - * Similar to base constructor but allows for further configuration to use when - * creating the Vulkan resources. + * Similar to base constructor but allows for further configuration to use + * when creating the Vulkan resources. * * @param physicalDeviceIndex The index of the physical device to use * @param familyQueueIndices (Optional) List of queue indices to add for * explicit allocation - * @param desiredExtensions The desired extensions to load from physicalDevice + * @param desiredExtensions The desired extensions to load from + * physicalDevice */ Manager(uint32_t physicalDeviceIndex, const std::vector& familyQueueIndices = {}, @@ -64,7 +65,8 @@ class Manager * If zero (default), disables latching of timestamps. * @returns Shared pointer with initialised sequence */ - std::shared_ptr sequence(uint32_t queueIndex = 0, uint32_t totalTimestamps = 0); + std::shared_ptr sequence(uint32_t queueIndex = 0, + uint32_t totalTimestamps = 0); /** * Create a managed tensor that will be destroyed by this manager @@ -74,7 +76,7 @@ class Manager * @param tensorType The type of tensor to initialize * @returns Shared pointer with initialised tensor */ - template + template std::shared_ptr> tensorT( const std::vector& data, Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) @@ -105,8 +107,13 @@ class Manager const Tensor::TensorDataTypes& dataType, Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) { - std::shared_ptr tensor{ new kp::Tensor( - this->mPhysicalDevice, this->mDevice, data, elementTotalCount, elementMemorySize, dataType, tensorType) }; + std::shared_ptr tensor{ new kp::Tensor(this->mPhysicalDevice, + this->mDevice, + data, + elementTotalCount, + elementMemorySize, + dataType, + tensorType) }; if (this->mManageResources) { this->mManagedTensors.push_back(tensor); diff --git a/src/include/kompute/Sequence.hpp b/src/include/kompute/Sequence.hpp index d29f6aaf0..6eeb265c1 100644 --- a/src/include/kompute/Sequence.hpp +++ b/src/include/kompute/Sequence.hpp @@ -2,8 +2,8 @@ #include "kompute/Core.hpp" -#include "kompute/operations/OpBase.hpp" #include "kompute/operations/OpAlgoDispatch.hpp" +#include "kompute/operations/OpBase.hpp" namespace kp { @@ -40,8 +40,8 @@ class Sequence : public std::enable_shared_from_this * function also requires the Sequence to be recording, otherwise it will * not be able to add the operation. * - * @param op Object derived from kp::BaseOp that will be recoreded by the sequence - * which will be used when the operation is evaluated. + * @param op Object derived from kp::BaseOp that will be recoreded by the + * sequence which will be used when the operation is evaluated. * @return shared_ptr of the Sequence class itself */ std::shared_ptr record(std::shared_ptr op); @@ -59,7 +59,8 @@ class Sequence : public std::enable_shared_from_this */ template std::shared_ptr record( - std::vector> tensors, TArgs&&... params) + std::vector> tensors, + TArgs&&... params) { std::shared_ptr op{ new T(tensors, std::forward(params)...) }; return this->record(op); @@ -94,8 +95,9 @@ class Sequence : public std::enable_shared_from_this std::shared_ptr eval(); /** - * Resets all the recorded and stored operations, records the operation - * provided and submits into the gpu as a submit job synchronously (with a barrier). + * Resets all the recorded and stored operations, records the operation + * provided and submits into the gpu as a submit job synchronously (with a + * barrier). * * @return shared_ptr of the Sequence class itself */ @@ -138,16 +140,18 @@ class Sequence : public std::enable_shared_from_this /** * Eval Async sends all the recorded and stored operations in the vector of - * operations into the gpu as a submit job without a barrier. EvalAwait() must - * ALWAYS be called after to ensure the sequence is terminated correctly. + * operations into the gpu as a submit job without a barrier. EvalAwait() + * must ALWAYS be called after to ensure the sequence is terminated + * correctly. * * @return Boolean stating whether execution was successful. */ std::shared_ptr evalAsync(); /** * Clears currnet operations to record provided one in the vector of - * operations into the gpu as a submit job without a barrier. EvalAwait() must - * ALWAYS be called after to ensure the sequence is terminated correctly. + * operations into the gpu as a submit job without a barrier. EvalAwait() + * must ALWAYS be called after to ensure the sequence is terminated + * correctly. * * @return Boolean stating whether execution was successful. */ @@ -241,9 +245,9 @@ class Sequence : public std::enable_shared_from_this bool isInit(); /** - * Clears command buffer and triggers re-record of all the current operations - * saved, which is useful if the underlying kp::Tensors or kp::Algorithms - * are modified and need to be re-recorded. + * Clears command buffer and triggers re-record of all the current + * operations saved, which is useful if the underlying kp::Tensors or + * kp::Algorithms are modified and need to be re-recorded. */ void rerecord(); diff --git a/src/include/kompute/Shader.hpp b/src/include/kompute/Shader.hpp index 9ecab24cd..2cd240424 100644 --- a/src/include/kompute/Shader.hpp +++ b/src/include/kompute/Shader.hpp @@ -18,7 +18,6 @@ namespace kp { class Shader { public: - // The default resource limit for the GLSL compiler, can be overwritten // Has been adopted by: // https://github.com/KhronosGroup/glslang/blob/master/StandAlone/ResourceLimits.cpp diff --git a/src/include/kompute/Tensor.hpp b/src/include/kompute/Tensor.hpp index 0194e208f..dc4a4f51a 100644 --- a/src/include/kompute/Tensor.hpp +++ b/src/include/kompute/Tensor.hpp @@ -160,41 +160,33 @@ class Tensor * @return Unsigned integer representing the total number of elements */ // TODO: move to cpp - uint32_t size() { - return this->mSize; - } + uint32_t size() { return this->mSize; } // TODO: move to cpp - uint32_t dataTypeMemorySize() { - return this->mDataTypeMemorySize; - } + uint32_t dataTypeMemorySize() { return this->mDataTypeMemorySize; } // TODO: move to cpp - uint32_t memorySize() { - return this->mSize * this->mDataTypeMemorySize; - } + uint32_t memorySize() { return this->mSize * this->mDataTypeMemorySize; } /** * Retrieve the underlying data type of the Tensor * * @return Data type of tensor of type kp::Tensor::TensorDataTypes */ - TensorDataTypes dataType() { - return this->mDataType; - } + TensorDataTypes dataType() { return this->mDataType; } - void* rawData() { - return this->mRawData; - } + void* rawData() { return this->mRawData; } // TODO: move to cpp - template - T* data() { + template + T* data() + { return (T*)this->mRawData; } - template - std::vector vector() { + template + std::vector vector() + { return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() }; } @@ -202,9 +194,9 @@ class Tensor * Sets / resets the vector data of the tensor. This function does not * perform any copies into GPU memory and is only performed on the host. */ - void setRawData(const void* data) + void setRawData(const void* data) { - // Copy data + // Copy data memcpy(this->mRawData, data, this->memorySize()); } @@ -217,7 +209,8 @@ class Tensor void* mRawData; private: - void mapRawData() { + void mapRawData() + { KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer"); @@ -235,14 +228,17 @@ class Tensor vk::DeviceSize bufferSize = this->memorySize(); - // Given we request coherent host memory we don't need to invalidate / flush + // Given we request coherent host memory we don't need to invalidate / + // flush this->mRawData = this->mDevice->mapMemory( *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags()); - vk::MappedMemoryRange mappedMemoryRange(*hostVisibleMemory, 0, bufferSize); + vk::MappedMemoryRange mappedMemoryRange( + *hostVisibleMemory, 0, bufferSize); } - void unmapRawData() { + void unmapRawData() + { KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer"); @@ -296,49 +292,46 @@ class Tensor vk::MemoryPropertyFlags getPrimaryMemoryPropertyFlags(); vk::BufferUsageFlags getStagingBufferUsageFlags(); vk::MemoryPropertyFlags getStagingMemoryPropertyFlags(); - }; // TODO: Limit T to be only float, bool, double, etc -template -class TensorT: public Tensor +template +class TensorT : public Tensor { public: TensorT(std::shared_ptr physicalDevice, - std::shared_ptr device, - const std::vector& data, - const TensorTypes& tensorType = TensorTypes::eDevice) - : Tensor(physicalDevice, - device, - (void*)data.data(), - data.size(), - sizeof(T), - this->dataType(), - tensorType) + std::shared_ptr device, + const std::vector& data, + const TensorTypes& tensorType = TensorTypes::eDevice) + : Tensor(physicalDevice, + device, + (void*)data.data(), + data.size(), + sizeof(T), + this->dataType(), + tensorType) { - KP_LOG_DEBUG("Kompute TensorT constructor with data size {}", data.size()); + KP_LOG_DEBUG("Kompute TensorT constructor with data size {}", + data.size()); } - ~TensorT() { - KP_LOG_DEBUG("Kompute TensorT destructor"); - } + ~TensorT() { KP_LOG_DEBUG("Kompute TensorT destructor"); } - T* data() { - return (T*)this->mRawData; - } + T* data() { return (T*)this->mRawData; } - std::vector vector() { + std::vector vector() + { return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() }; } - T& operator[](int index) { - return *(((T*)this->mRawData) + index); - } + T& operator[](int index) { return *(((T*)this->mRawData) + index); } - void setData(const std::vector& data) { + void setData(const std::vector& data) + { - KP_LOG_DEBUG("Kompute TensorT setting data with data size {}", data.size()); + KP_LOG_DEBUG("Kompute TensorT setting data with data size {}", + data.size()); if (data.size() != this->mSize) { throw std::runtime_error( @@ -349,7 +342,6 @@ class TensorT: public Tensor } TensorDataTypes dataType(); - }; } // End namespace kp diff --git a/test/TestDestroy.cpp b/test/TestDestroy.cpp index defd40998..36127f84e 100644 --- a/test/TestDestroy.cpp +++ b/test/TestDestroy.cpp @@ -7,7 +7,7 @@ TEST(TestDestroy, TestDestroyTensorSingle) { std::shared_ptr> tensorA = nullptr; - std::string shader(R"( + std::string shader(R"( #version 450 layout (local_size_x = 1) in; layout(set = 0, binding = 0) buffer a { float pa[]; }; diff --git a/test/TestLogisticRegression.cpp b/test/TestLogisticRegression.cpp index a4402637f..71eeaafde 100644 --- a/test/TestLogisticRegression.cpp +++ b/test/TestLogisticRegression.cpp @@ -20,13 +20,17 @@ TEST(TestLogisticRegression, TestMainLogisticRegression) std::shared_ptr> y = mgr.tensor({ 0, 0, 0, 1, 1 }); std::shared_ptr> wIn = mgr.tensor({ 0.001, 0.001 }); - std::shared_ptr> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 }); - std::shared_ptr> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 }); + std::shared_ptr> wOutI = + mgr.tensor({ 0, 0, 0, 0, 0 }); + std::shared_ptr> wOutJ = + mgr.tensor({ 0, 0, 0, 0, 0 }); std::shared_ptr> bIn = mgr.tensor({ 0 }); - std::shared_ptr> bOut = mgr.tensor({ 0, 0, 0, 0, 0 }); + std::shared_ptr> bOut = + mgr.tensor({ 0, 0, 0, 0, 0 }); - std::shared_ptr> lOut = mgr.tensor({ 0, 0, 0, 0, 0 }); + std::shared_ptr> lOut = + mgr.tensor({ 0, 0, 0, 0, 0 }); std::vector> params = { xI, xJ, y, wIn, wOutI, wOutJ, @@ -95,14 +99,18 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy) std::shared_ptr> wIn = mgr.tensor({ 0.001, 0.001 }, kp::Tensor::TensorTypes::eHost); - std::shared_ptr> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 }); - std::shared_ptr> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 }); + std::shared_ptr> wOutI = + mgr.tensor({ 0, 0, 0, 0, 0 }); + std::shared_ptr> wOutJ = + mgr.tensor({ 0, 0, 0, 0, 0 }); std::shared_ptr> bIn = mgr.tensor({ 0 }, kp::Tensor::TensorTypes::eHost); - std::shared_ptr> bOut = mgr.tensor({ 0, 0, 0, 0, 0 }); + std::shared_ptr> bOut = + mgr.tensor({ 0, 0, 0, 0, 0 }); - std::shared_ptr> lOut = mgr.tensor({ 0, 0, 0, 0, 0 }); + std::shared_ptr> lOut = + mgr.tensor({ 0, 0, 0, 0, 0 }); std::vector> params = { xI, xJ, y, wIn, wOutI, wOutJ, diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp index f9e066f47..932661dd4 100644 --- a/test/TestMultipleAlgoExecutions.cpp +++ b/test/TestMultipleAlgoExecutions.cpp @@ -50,8 +50,11 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality) kp::Constants pushConstsA({ 2.0 }); kp::Constants pushConstsB({ 3.0 }); - auto algorithm = mgr.algorithm( - params, kp::Shader::compileSource(shader), workgroup, specConsts, pushConstsA); + auto algorithm = mgr.algorithm(params, + kp::Shader::compileSource(shader), + workgroup, + specConsts, + pushConstsA); // 3. Run operation with string shader synchronously mgr.sequence() @@ -202,4 +205,3 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval) EXPECT_EQ(tensorA->vector(), std::vector({ 3, 3, 3 })); } - diff --git a/test/TestOpShadersFromStringAndFile.cpp b/test/TestOpShadersFromStringAndFile.cpp index a1f8eda99..bf2ed8587 100644 --- a/test/TestOpShadersFromStringAndFile.cpp +++ b/test/TestOpShadersFromStringAndFile.cpp @@ -71,9 +71,9 @@ TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor) //{ // kp::Manager mgr; // -// std::shared_ptr> tensorA{ new kp::Tensor({ 3, 4, 5 }) }; -// std::shared_ptr> tensorB{ new kp::Tensor({ 0, 0, 0 }) }; -// mgr.rebuild({ tensorA, tensorB }); +// std::shared_ptr> tensorA{ new kp::Tensor({ 3, 4, 5 }) +// }; std::shared_ptr> tensorB{ new kp::Tensor({ 0, 0, 0 +// }) }; mgr.rebuild({ tensorA, tensorB }); // // mgr.evalOpDefault( // { tensorA, tensorB }, diff --git a/test/TestPushConstant.cpp b/test/TestPushConstant.cpp index 9599596ed..66f798afe 100644 --- a/test/TestPushConstant.cpp +++ b/test/TestPushConstant.cpp @@ -29,15 +29,17 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchOverride) { kp::Manager mgr; - std::shared_ptr> tensor = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensor = + mgr.tensor({ 0, 0, 0 }); - std::shared_ptr algo = - mgr.algorithm({ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.0, 0.0, 0.0 }); + std::shared_ptr algo = mgr.algorithm( + { tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.0, 0.0, 0.0 }); sq = mgr.sequence()->eval({ tensor }); // We need to run this in sequence to avoid race condition - // We can't use atomicAdd as swiftshader doesn't support it for float + // We can't use atomicAdd as swiftshader doesn't support it for + // float sq->eval(algo, kp::Constants{ 0.1, 0.2, 0.3 }); sq->eval(algo, kp::Constants{ 0.3, 0.2, 0.1 }); sq->eval({ tensor }); @@ -72,15 +74,17 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchNoOverride) { kp::Manager mgr; - std::shared_ptr> tensor = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensor = + mgr.tensor({ 0, 0, 0 }); - std::shared_ptr algo = - mgr.algorithm({ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.1, 0.2, 0.3 }); + std::shared_ptr algo = mgr.algorithm( + { tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.1, 0.2, 0.3 }); sq = mgr.sequence()->eval({ tensor }); // We need to run this in sequence to avoid race condition - // We can't use atomicAdd as swiftshader doesn't support it for float + // We can't use atomicAdd as swiftshader doesn't support it for + // float sq->eval(algo); sq->eval(algo, kp::Constants{ 0.3, 0.2, 0.1 }); sq->eval({ tensor }); @@ -115,15 +119,17 @@ TEST(TestPushConstants, TestConstantsWrongSize) { kp::Manager mgr; - std::shared_ptr> tensor = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensor = + mgr.tensor({ 0, 0, 0 }); - std::shared_ptr algo = - mgr.algorithm({ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.0 }); + std::shared_ptr algo = mgr.algorithm( + { tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.0 }); - sq = mgr.sequence() - ->record({ tensor }); + sq = mgr.sequence()->record({ tensor }); - EXPECT_THROW(sq->record(algo, kp::Constants{ 0.1, 0.2, 0.3 }), std::runtime_error); + EXPECT_THROW(sq->record( + algo, kp::Constants{ 0.1, 0.2, 0.3 }), + std::runtime_error); } } } diff --git a/test/TestSequence.cpp b/test/TestSequence.cpp index ca3b9a485..19d96c893 100644 --- a/test/TestSequence.cpp +++ b/test/TestSequence.cpp @@ -60,9 +60,9 @@ TEST(TestSequence, RerecordSequence) std::shared_ptr sq = mgr.sequence(); - std::shared_ptr> tensorA = mgr.tensor({1, 2, 3}); - std::shared_ptr> tensorB = mgr.tensor({2, 2, 2}); - std::shared_ptr> tensorOut = mgr.tensor({0, 0, 0}); + std::shared_ptr> tensorA = mgr.tensor({ 1, 2, 3 }); + std::shared_ptr> tensorB = mgr.tensor({ 2, 2, 2 }); + std::shared_ptr> tensorOut = mgr.tensor({ 0, 0, 0 }); sq->eval({ tensorA, tensorB, tensorOut }); @@ -83,25 +83,24 @@ TEST(TestSequence, RerecordSequence) )"); std::shared_ptr algo = - mgr.algorithm({tensorA, tensorB, tensorOut}, spirv); + mgr.algorithm({ tensorA, tensorB, tensorOut }, spirv); - sq->record(algo) - ->record({tensorA, tensorB, tensorOut}); + sq->record(algo)->record( + { tensorA, tensorB, tensorOut }); sq->eval(); - EXPECT_EQ(tensorOut->vector(), std::vector({2, 4, 6})); + EXPECT_EQ(tensorOut->vector(), std::vector({ 2, 4, 6 })); - algo->rebuild({tensorOut, tensorA, tensorB}, spirv); + algo->rebuild({ tensorOut, tensorA, tensorB }, spirv); // Refresh and trigger a rerecord sq->rerecord(); sq->eval(); - EXPECT_EQ(tensorB->vector(), std::vector({2, 8, 18})); + EXPECT_EQ(tensorB->vector(), std::vector({ 2, 8, 18 })); } - TEST(TestSequence, SequenceTimestamps) { kp::Manager mgr; @@ -118,15 +117,16 @@ TEST(TestSequence, SequenceTimestamps) })"); std::vector spirv = kp::Shader::compileSource(shader); - - auto seq = mgr.sequence(0, 100); //100 timestamps + + auto seq = mgr.sequence(0, 100); // 100 timestamps seq->record({ tensorA }) - ->record(mgr.algorithm({ tensorA }, spirv)) - ->record(mgr.algorithm({ tensorA }, spirv)) - ->record(mgr.algorithm({ tensorA }, spirv)) - ->record({ tensorA }) - ->eval(); + ->record(mgr.algorithm({ tensorA }, spirv)) + ->record(mgr.algorithm({ tensorA }, spirv)) + ->record(mgr.algorithm({ tensorA }, spirv)) + ->record({ tensorA }) + ->eval(); const std::vector timestamps = seq->getTimestamps(); - - EXPECT_EQ(timestamps.size(), 6); //1 timestamp at start + 1 after each operation + + EXPECT_EQ(timestamps.size(), + 6); // 1 timestamp at start + 1 after each operation } diff --git a/test/TestShaderResources.cpp b/test/TestShaderResources.cpp index 536f4ca0c..6faddb39e 100644 --- a/test/TestShaderResources.cpp +++ b/test/TestShaderResources.cpp @@ -24,34 +24,43 @@ static const std::string shaderString = (R"( } )"); -void compileShaderWithGivenResources(const std::string shaderString, const TBuiltInResource resources) { - kp::Shader::compileSource(shaderString, std::string("main"), std::vector>({}), resources); +void +compileShaderWithGivenResources(const std::string shaderString, + const TBuiltInResource resources) +{ + kp::Shader::compileSource( + shaderString, + std::string("main"), + std::vector>({}), + resources); } - - TEST(TestShaderResources, TestNoMaxLight) { TBuiltInResource noMaxLightResources = kp::Shader::defaultResource; - noMaxLightResources.maxLights=0; - - EXPECT_NO_THROW(compileShaderWithGivenResources(shaderString, noMaxLightResources)); -} + noMaxLightResources.maxLights = 0; + EXPECT_NO_THROW( + compileShaderWithGivenResources(shaderString, noMaxLightResources)); +} TEST(TestShaderResources, TestSmallComputeWorkGroupSizeX) { - TBuiltInResource smallComputeWorkGroupSizeXResources = kp::Shader::defaultResource; - smallComputeWorkGroupSizeXResources.maxComputeWorkGroupSizeX=0; - - ASSERT_THROW(compileShaderWithGivenResources(shaderString, smallComputeWorkGroupSizeXResources), std::runtime_error); -} + TBuiltInResource smallComputeWorkGroupSizeXResources = + kp::Shader::defaultResource; + smallComputeWorkGroupSizeXResources.maxComputeWorkGroupSizeX = 0; + ASSERT_THROW(compileShaderWithGivenResources( + shaderString, smallComputeWorkGroupSizeXResources), + std::runtime_error); +} TEST(TestShaderResources, TestNoWhileLoopLimit) { TBuiltInResource noWhileLoopLimitResources = kp::Shader::defaultResource; - noWhileLoopLimitResources.limits.whileLoops=0; - - ASSERT_THROW(compileShaderWithGivenResources(shaderString, noWhileLoopLimitResources), std::runtime_error); -} + noWhileLoopLimitResources.limits.whileLoops = 0; + + ASSERT_THROW( + compileShaderWithGivenResources(shaderString, noWhileLoopLimitResources), + std::runtime_error); +} diff --git a/test/TestSpecializationConstant.cpp b/test/TestSpecializationConstant.cpp index fe40fb5ea..7654c8aaf 100644 --- a/test/TestSpecializationConstant.cpp +++ b/test/TestSpecializationConstant.cpp @@ -25,8 +25,10 @@ TEST(TestSpecializationConstants, TestTwoConstants) { kp::Manager mgr; - std::shared_ptr> tensorA = mgr.tensor({ 0, 0, 0 }); - std::shared_ptr> tensorB = mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorA = + mgr.tensor({ 0, 0, 0 }); + std::shared_ptr> tensorB = + mgr.tensor({ 0, 0, 0 }); std::vector> params = { tensorA, tensorB }; diff --git a/test/TestWorkgroup.cpp b/test/TestWorkgroup.cpp index 8836840a6..baa5e14da 100644 --- a/test/TestWorkgroup.cpp +++ b/test/TestWorkgroup.cpp @@ -52,12 +52,13 @@ TEST(TestWorkgroup, TestSimpleWorkgroup) }; std::vector expectedB = { - 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, - 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, - 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, - 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, - 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, - 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 + 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, + 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, + 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, + 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, + 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, + 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, + 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 }; EXPECT_EQ(tensorA->vector(), expectedA);