This commit is contained in:
Alejandro Saucedo 2021-03-07 14:45:31 +00:00
parent 1d2d33b269
commit 1d1018fa0c
22 changed files with 326 additions and 259 deletions

View file

@ -55,7 +55,7 @@ The example below shows how you can enable the "VK_EXT_shader_atomic_float" exte
atomicAdd(pa[2], pcs.z);
})");
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
std::shared_ptr<kp::Sequence> sq = nullptr;
@ -102,7 +102,7 @@ We also provide tools that allow you to `convert shaders into C++ headers <https
throw std::runtime_error("Kompute OpMult expected 3 tensors but got " + tensors.size());
}
std::vector<uint32_t> spirv = kp::Shader::compile_source(R"(
std::vector<uint32_t> spirv = kp::Shader::compileSource(R"(
#version 450
layout(set = 0, binding = 0) buffer tensorLhs {
@ -215,7 +215,7 @@ In this case we create a shader that should take a couple of milliseconds to run
}
)");
auto algo = mgr.algorithm({tensor}, kp::Shader::compile_source(shader));
auto algo = mgr.algorithm({tensor}, kp::Shader::compileSource(shader));
Now we are able to run the await function on the default sequence.
@ -361,7 +361,7 @@ Similar to the asyncrhonous usecase above, we can still run synchronous commands
}
)");
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm({tensorA, tenssorB}, spirv);

View file

@ -20,7 +20,8 @@ Algorithm::Algorithm(std::shared_ptr<vk::Device> device,
"spirv size: {}",
tensors.size(),
spirv.size());
this->rebuild(tensors, spirv, workgroup, specializationConstants, pushConstants);
this->rebuild(
tensors, spirv, workgroup, specializationConstants, pushConstants);
} else {
KP_LOG_INFO("Kompute Algorithm constructor with empty tensors and or "
"spirv so not rebuilding vulkan components");
@ -425,15 +426,18 @@ Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize)
}
void
Algorithm::setPush(const Constants& pushConstants) {
Algorithm::setPush(const Constants& pushConstants)
{
if (pushConstants.size() != this->mPushConstants.size()) {
throw std::runtime_error(fmt::format("Kompute Algorithm push "
"constant provided is size {} but expected size {}",
pushConstants.size(), this->mPushConstants.size()));
}
if (pushConstants.size() != this->mPushConstants.size()) {
throw std::runtime_error(
fmt::format("Kompute Algorithm push "
"constant provided is size {} but expected size {}",
pushConstants.size(),
this->mPushConstants.size()));
}
this->mPushConstants = pushConstants;
this->mPushConstants = pushConstants;
}
const Workgroup&
@ -449,7 +453,8 @@ Algorithm::getSpecializationConstants()
}
const Constants&
Algorithm::getPush() {
Algorithm::getPush()
{
return this->mPushConstants;
}

View file

@ -1,8 +1,8 @@
#include <set>
#include <string>
#include <sstream>
#include <iterator>
#include <set>
#include <sstream>
#include <string>
#include "kompute/Manager.hpp"
@ -39,7 +39,8 @@ Manager::Manager(uint32_t physicalDeviceIndex,
this->mManageResources = true;
this->createInstance();
this->createDevice(familyQueueIndices, physicalDeviceIndex, desiredExtensions);
this->createDevice(
familyQueueIndices, physicalDeviceIndex, desiredExtensions);
}
Manager::Manager(std::shared_ptr<vk::Instance> instance,
@ -177,7 +178,8 @@ Manager::createInstance()
};
std::vector<std::string> envLayerNames;
const char* envLayerNamesVal = std::getenv("KOMPUTE_ENV_DEBUG_LAYERS");
KP_LOG_DEBUG("Kompute Manager adding environment layers: {}", envLayerNamesVal);
KP_LOG_DEBUG("Kompute Manager adding environment layers: {}",
envLayerNamesVal);
if (envLayerNamesVal != NULL && *envLayerNamesVal != '\0') {
std::istringstream iss(envLayerNamesVal);
std::istream_iterator<std::string> beg(iss), end;
@ -206,13 +208,15 @@ Manager::createInstance()
}
if (validLayerNames.size() > 0) {
KP_LOG_DEBUG("Kompute Manager Initializing instance with valid layers: {}", validLayerNames);
KP_LOG_DEBUG(
"Kompute Manager Initializing instance with valid layers: {}",
validLayerNames);
computeInstanceCreateInfo.enabledLayerCount =
(uint32_t)validLayerNames.size();
computeInstanceCreateInfo.ppEnabledLayerNames = validLayerNames.data();
}
else {
KP_LOG_WARN("Kompute Manager no valid layer names found from desired layer names");
} else {
KP_LOG_WARN("Kompute Manager no valid layer names found from desired "
"layer names");
}
#endif
#endif
@ -347,16 +351,19 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices,
deviceQueueCreateInfos.push_back(deviceQueueCreateInfo);
}
KP_LOG_DEBUG("Kompute Manager desired extension layers {}", desiredExtensions);
KP_LOG_DEBUG("Kompute Manager desired extension layers {}",
desiredExtensions);
std::vector<vk::ExtensionProperties> deviceExtensions = this->mPhysicalDevice->enumerateDeviceExtensionProperties();
std::vector<vk::ExtensionProperties> deviceExtensions =
this->mPhysicalDevice->enumerateDeviceExtensionProperties();
std::set<std::string> uniqueExtensionNames;
for (const vk::ExtensionProperties& ext : deviceExtensions) {
std::string extName(ext.extensionName.data());
uniqueExtensionNames.insert(extName);
}
KP_LOG_DEBUG("Kompute Manager available extensions {}", uniqueExtensionNames);
KP_LOG_DEBUG("Kompute Manager available extensions {}",
uniqueExtensionNames);
std::vector<const char*> validExtensions;
for (std::string ext : desiredExtensions) {
if (uniqueExtensionNames.count(ext) != 0) {
@ -364,7 +371,8 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices,
}
}
if (desiredExtensions.size() != validExtensions.size()) {
KP_LOG_ERROR("Kompute Manager not all extensions were added: {}", validExtensions);
KP_LOG_ERROR("Kompute Manager not all extensions were added: {}",
validExtensions);
}
vk::DeviceCreateInfo deviceCreateInfo(vk::DeviceCreateFlags(),
@ -406,7 +414,12 @@ Manager::algorithm(const std::vector<std::shared_ptr<Tensor>>& tensors,
KP_LOG_DEBUG("Kompute Manager algorithm creation triggered");
std::shared_ptr<Algorithm> algorithm{ new kp::Algorithm(
this->mDevice, tensors, spirv, workgroup, specializationConstants, pushConstants) };
this->mDevice,
tensors,
spirv,
workgroup,
specializationConstants,
pushConstants) };
if (this->mManageResources) {
this->mManagedAlgorithms.push_back(algorithm);

View file

@ -18,13 +18,16 @@ OpTensorCopy::OpTensorCopy(const std::vector<std::shared_ptr<Tensor>>& tensors)
uint32_t size = this->mTensors[0]->size();
for (const std::shared_ptr<Tensor>& tensor : tensors) {
if (tensor->dataType() != dataType) {
throw std::runtime_error(fmt::format("Attempting to copy tensors of different types from {} to {}",
dataType, tensor->dataType()));
throw std::runtime_error(fmt::format(
"Attempting to copy tensors of different types from {} to {}",
dataType,
tensor->dataType()));
}
if (tensor->size() != size) {
throw std::runtime_error(fmt::format("Attempting to copy tensors of different sizes from {} to {}",
size, tensor->size()));
throw std::runtime_error(fmt::format(
"Attempting to copy tensors of different sizes from {} to {}",
size,
tensor->size()));
}
}
}

View file

@ -40,7 +40,6 @@ void
OpTensorSyncDevice::preEval(const vk::CommandBuffer& commandBuffer)
{
KP_LOG_DEBUG("Kompute OpTensorSyncDevice preEval called");
}
void

View file

@ -18,8 +18,9 @@ Sequence::Sequence(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
this->createCommandPool();
this->createCommandBuffer();
if(totalTimestamps>0)
this->createTimestampQueryPool(totalTimestamps+1); //+1 for the first one
if (totalTimestamps > 0)
this->createTimestampQueryPool(totalTimestamps +
1); //+1 for the first one
}
Sequence::~Sequence()
@ -48,12 +49,12 @@ Sequence::begin()
this->mCommandBuffer->begin(vk::CommandBufferBeginInfo());
this->mRecording = true;
//latch the first timestamp before any commands are submitted
if(this->timestampQueryPool)
// latch the first timestamp before any commands are submitted
if (this->timestampQueryPool)
this->mCommandBuffer->writeTimestamp(
vk::PipelineStageFlagBits::eAllCommands,
*this->timestampQueryPool, 0
);
vk::PipelineStageFlagBits::eAllCommands,
*this->timestampQueryPool,
0);
}
void
@ -246,12 +247,12 @@ Sequence::destroy()
this->mOperations.clear();
}
if(this->timestampQueryPool){
if (this->timestampQueryPool) {
KP_LOG_INFO("Destroying QueryPool");
this->mDevice->destroy(
*this->timestampQueryPool,
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
*this->timestampQueryPool,
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
this->timestampQueryPool = nullptr;
KP_LOG_DEBUG("Kompute Sequence Destroyed QueryPool");
}
@ -281,12 +282,12 @@ Sequence::record(std::shared_ptr<OpBase> op)
this->mOperations.push_back(op);
if(this->timestampQueryPool)
this->mCommandBuffer->writeTimestamp(
vk::PipelineStageFlagBits::eAllCommands,
*this->timestampQueryPool, this->mOperations.size()
);
if (this->timestampQueryPool)
this->mCommandBuffer->writeTimestamp(
vk::PipelineStageFlagBits::eAllCommands,
*this->timestampQueryPool,
this->mOperations.size());
return shared_from_this();
}
@ -339,7 +340,8 @@ Sequence::createTimestampQueryPool(uint32_t totalTimestamps)
{
KP_LOG_DEBUG("Kompute Sequence creating query pool");
if (!this->isInit()) {
throw std::runtime_error("createTimestampQueryPool() called on uninitialized Sequence");
throw std::runtime_error(
"createTimestampQueryPool() called on uninitialized Sequence");
}
if (!this->mPhysicalDevice) {
throw std::runtime_error("Kompute Sequence physical device is null");
@ -347,16 +349,16 @@ Sequence::createTimestampQueryPool(uint32_t totalTimestamps)
vk::PhysicalDeviceProperties physicalDeviceProperties =
this->mPhysicalDevice->getProperties();
if(physicalDeviceProperties.limits.timestampComputeAndGraphics){
if (physicalDeviceProperties.limits.timestampComputeAndGraphics) {
vk::QueryPoolCreateInfo queryPoolInfo;
queryPoolInfo.setQueryCount(totalTimestamps);
queryPoolInfo.setQueryType(vk::QueryType::eTimestamp);
this->timestampQueryPool = std::make_shared<vk::QueryPool>(this->mDevice->createQueryPool(queryPoolInfo));
this->timestampQueryPool = std::make_shared<vk::QueryPool>(
this->mDevice->createQueryPool(queryPoolInfo));
KP_LOG_DEBUG("Query pool for timestamps created");
}
else{
} else {
throw std::runtime_error("Device does not support timestamps");
}
}
@ -364,14 +366,19 @@ Sequence::createTimestampQueryPool(uint32_t totalTimestamps)
std::vector<std::uint64_t>
Sequence::getTimestamps()
{
if(!this->timestampQueryPool)
if (!this->timestampQueryPool)
throw std::runtime_error("Timestamp latching not enabled");
const auto n = this->mOperations.size()+1;
const auto n = this->mOperations.size() + 1;
std::vector<std::uint64_t> timestamps(n, 0);
this->mDevice->getQueryPoolResults(*this->timestampQueryPool,
0, n, timestamps.size()*sizeof(std::uint64_t), timestamps.data(),
sizeof(uint64_t), vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait);
this->mDevice->getQueryPoolResults(
*this->timestampQueryPool,
0,
n,
timestamps.size() * sizeof(std::uint64_t),
timestamps.data(),
sizeof(uint64_t),
vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait);
return timestamps;
}

View file

@ -99,10 +99,10 @@ Shader::compileSource(
const TBuiltInResource& resource)
{
return compileSources({ source },
std::vector<std::string>({}),
entryPoint,
definitions,
resource);
std::vector<std::string>({}),
entryPoint,
definitions,
resource);
}
const TBuiltInResource Shader::defaultResource = {

View file

@ -64,13 +64,10 @@ Tensor::tensorType()
bool
Tensor::isInit()
{
return this->mDevice
&& this->mPrimaryBuffer
&& this->mPrimaryMemory
&& this->mRawData;
return this->mDevice && this->mPrimaryBuffer && this->mPrimaryMemory &&
this->mRawData;
}
void
Tensor::recordCopyFrom(const vk::CommandBuffer& commandBuffer,
std::shared_ptr<Tensor> copyFromTensor,
@ -175,7 +172,8 @@ Tensor::recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
vk::DescriptorBufferInfo
Tensor::constructDescriptorBufferInfo()
{
KP_LOG_DEBUG("Kompute Tensor construct descriptor buffer info size {}", this->memorySize());
KP_LOG_DEBUG("Kompute Tensor construct descriptor buffer info size {}",
this->memorySize());
vk::DeviceSize bufferSize = this->memorySize();
return vk::DescriptorBufferInfo(*this->mPrimaryBuffer,
0, // offset
@ -213,7 +211,7 @@ Tensor::getPrimaryMemoryPropertyFlags()
break;
case TensorTypes::eHost:
return vk::MemoryPropertyFlagBits::eHostVisible |
vk::MemoryPropertyFlagBits::eHostCoherent;
vk::MemoryPropertyFlagBits::eHostCoherent;
break;
case TensorTypes::eStorage:
return vk::MemoryPropertyFlagBits::eDeviceLocal;
@ -363,7 +361,8 @@ Tensor::destroy()
{
KP_LOG_DEBUG("Kompute Tensor started destroy()");
// Setting raw data to null regardless whether device is available to invalidate Tensor
// Setting raw data to null regardless whether device is available to
// invalidate Tensor
this->mRawData = nullptr;
this->mSize = 0;
this->mDataTypeMemorySize = 0;
@ -442,31 +441,36 @@ Tensor::destroy()
template<>
Tensor::TensorDataTypes
TensorT<bool>::dataType() {
TensorT<bool>::dataType()
{
return Tensor::TensorDataTypes::eBool;
}
template<>
Tensor::TensorDataTypes
TensorT<int32_t>::dataType() {
TensorT<int32_t>::dataType()
{
return Tensor::TensorDataTypes::eInt;
}
template<>
Tensor::TensorDataTypes
TensorT<uint32_t>::dataType() {
TensorT<uint32_t>::dataType()
{
return Tensor::TensorDataTypes::eUnsignedInt;
}
template<>
Tensor::TensorDataTypes
TensorT<float>::dataType() {
TensorT<float>::dataType()
{
return Tensor::TensorDataTypes::eFloat;
}
template<>
Tensor::TensorDataTypes
TensorT<double>::dataType() {
TensorT<double>::dataType()
{
return Tensor::TensorDataTypes::eDouble;
}

View file

@ -18,15 +18,17 @@ class Algorithm
* the underlying resources.
*
* @param device The Vulkan device to use for creating resources
* @param tensors (optional) The tensors to use to create the descriptor resources
* @param tensors (optional) The tensors to use to create the descriptor
* resources
* @param spirv (optional) The spirv code to use to create the algorithm
* @param workgroup (optional) The kp::Workgroup to use for the dispatch which defaults to
* kp::Workgroup(tensor[0].size(), 1, 1) if not set.
* @param specializationConstants (optional) The kp::Constants to use to initialize
* the specialization constants which cannot be changed once set.
* @param pushConstants (optional) The kp::Constants to use when initializing the
* pipeline, which set the size of the push constants - these can be modified but
* all new values must have the same vector size as this initial value.
* @param workgroup (optional) The kp::Workgroup to use for the dispatch
* which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set.
* @param specializationConstants (optional) The kp::Constants to use to
* initialize the specialization constants which cannot be changed once set.
* @param pushConstants (optional) The kp::Constants to use when
* initializing the pipeline, which set the size of the push constants -
* these can be modified but all new values must have the same vector size
* as this initial value.
*/
Algorithm(std::shared_ptr<vk::Device> device,
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
@ -36,18 +38,19 @@ class Algorithm
const Constants& pushConstants = {});
/**
* Rebuild function to reconstruct algorithm with configuration parameters to create
* the underlying resources.
* Rebuild function to reconstruct algorithm with configuration parameters
* to create the underlying resources.
*
* @param tensors The tensors to use to create the descriptor resources
* @param spirv The spirv code to use to create the algorithm
* @param workgroup (optional) The kp::Workgroup to use for the dispatch which defaults to
* kp::Workgroup(tensor[0].size(), 1, 1) if not set.
* @param specializationConstants (optional) The kp::Constants to use to initialize
* the specialization constants which cannot be changed once set.
* @param pushConstants (optional) The kp::Constants to use when initializing the
* pipeline, which set the size of the push constants - these can be modified but
* all new values must have the same vector size as this initial value.
* @param workgroup (optional) The kp::Workgroup to use for the dispatch
* which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set.
* @param specializationConstants (optional) The kp::Constants to use to
* initialize the specialization constants which cannot be changed once set.
* @param pushConstants (optional) The kp::Constants to use when
* initializing the pipeline, which set the size of the push constants -
* these can be modified but all new values must have the same vector size
* as this initial value.
*/
void rebuild(const std::vector<std::shared_ptr<Tensor>>& tensors,
const std::vector<uint32_t>& spirv,
@ -70,25 +73,26 @@ class Algorithm
void recordDispatch(const vk::CommandBuffer& commandBuffer);
/**
* Records command that binds the "core" algorithm components which consist of
* binding the pipeline and binding the descriptorsets.
* Records command that binds the "core" algorithm components which consist
* of binding the pipeline and binding the descriptorsets.
*
* @param commandBuffer Command buffer to record the algorithm resources to
*/
void recordBindCore(const vk::CommandBuffer& commandBuffer);
/**
* Records command that binds the push constants to the command buffer provided
* - it is required that the pushConstants provided are of the same size as the
* ones provided during initialization.
* Records command that binds the push constants to the command buffer
* provided
* - it is required that the pushConstants provided are of the same size as
* the ones provided during initialization.
*
* @param commandBuffer Command buffer to record the algorithm resources to
*/
void recordBindPush(const vk::CommandBuffer& commandBuffer);
/**
* function that checks all the gpu resource components to verify if these have
* been created and returns true if all are valid.
* function that checks all the gpu resource components to verify if these
* have been created and returns true if all are valid.
*
* @returns returns true if the algorithm is currently initialized.
*/
@ -97,26 +101,28 @@ class Algorithm
/**
* Sets the work group to use in the recordDispatch
*
* @param workgroup The kp::Workgroup value to use to update the algorithm. It
* must have a value greater than 1 on the x value (index 1) otherwise it will
* be initialized on the size of the first tensor (ie. this->mTensor[0]->size())
* @param workgroup The kp::Workgroup value to use to update the algorithm.
* It must have a value greater than 1 on the x value (index 1) otherwise it
* will be initialized on the size of the first tensor (ie.
* this->mTensor[0]->size())
*/
void setWorkgroup(const Workgroup& workgroup, uint32_t minSize = 1);
/**
* Sets the push constants to the new value provided to use in the next bindPush()
* Sets the push constants to the new value provided to use in the next
* bindPush()
*
* @param The kp::Constant to use to set the push constants to use in the next
* bindPush(...) calls. The constants provided must be of the same size as the
* ones created during initialization.
* @param The kp::Constant to use to set the push constants to use in the
* next bindPush(...) calls. The constants provided must be of the same size
* as the ones created during initialization.
*/
void setPush(const Constants& pushConstants);
/**
* Gets the current workgroup from the algorithm.
*
* @param The kp::Constant to use to set the push constants to use in the next
* bindPush(...) calls. The constants provided must be of the same size as the
* ones created during initialization.
* @param The kp::Constant to use to set the push constants to use in the
* next bindPush(...) calls. The constants provided must be of the same size
* as the ones created during initialization.
*/
const Workgroup& getWorkgroup();
/**

View file

@ -24,13 +24,14 @@ class Manager
Manager();
/**
* Similar to base constructor but allows for further configuration to use when
* creating the Vulkan resources.
* Similar to base constructor but allows for further configuration to use
* when creating the Vulkan resources.
*
* @param physicalDeviceIndex The index of the physical device to use
* @param familyQueueIndices (Optional) List of queue indices to add for
* explicit allocation
* @param desiredExtensions The desired extensions to load from physicalDevice
* @param desiredExtensions The desired extensions to load from
* physicalDevice
*/
Manager(uint32_t physicalDeviceIndex,
const std::vector<uint32_t>& familyQueueIndices = {},
@ -64,7 +65,8 @@ class Manager
* If zero (default), disables latching of timestamps.
* @returns Shared pointer with initialised sequence
*/
std::shared_ptr<Sequence> sequence(uint32_t queueIndex = 0, uint32_t totalTimestamps = 0);
std::shared_ptr<Sequence> sequence(uint32_t queueIndex = 0,
uint32_t totalTimestamps = 0);
/**
* Create a managed tensor that will be destroyed by this manager
@ -74,7 +76,7 @@ class Manager
* @param tensorType The type of tensor to initialize
* @returns Shared pointer with initialised tensor
*/
template <typename T>
template<typename T>
std::shared_ptr<TensorT<T>> tensorT(
const std::vector<T>& data,
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
@ -105,8 +107,13 @@ class Manager
const Tensor::TensorDataTypes& dataType,
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
{
std::shared_ptr<Tensor> tensor{ new kp::Tensor(
this->mPhysicalDevice, this->mDevice, data, elementTotalCount, elementMemorySize, dataType, tensorType) };
std::shared_ptr<Tensor> tensor{ new kp::Tensor(this->mPhysicalDevice,
this->mDevice,
data,
elementTotalCount,
elementMemorySize,
dataType,
tensorType) };
if (this->mManageResources) {
this->mManagedTensors.push_back(tensor);

View file

@ -2,8 +2,8 @@
#include "kompute/Core.hpp"
#include "kompute/operations/OpBase.hpp"
#include "kompute/operations/OpAlgoDispatch.hpp"
#include "kompute/operations/OpBase.hpp"
namespace kp {
@ -40,8 +40,8 @@ class Sequence : public std::enable_shared_from_this<Sequence>
* function also requires the Sequence to be recording, otherwise it will
* not be able to add the operation.
*
* @param op Object derived from kp::BaseOp that will be recoreded by the sequence
* which will be used when the operation is evaluated.
* @param op Object derived from kp::BaseOp that will be recoreded by the
* sequence which will be used when the operation is evaluated.
* @return shared_ptr<Sequence> of the Sequence class itself
*/
std::shared_ptr<Sequence> record(std::shared_ptr<OpBase> op);
@ -59,7 +59,8 @@ class Sequence : public std::enable_shared_from_this<Sequence>
*/
template<typename T, typename... TArgs>
std::shared_ptr<Sequence> record(
std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
std::vector<std::shared_ptr<Tensor>> tensors,
TArgs&&... params)
{
std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
return this->record(op);
@ -94,8 +95,9 @@ class Sequence : public std::enable_shared_from_this<Sequence>
std::shared_ptr<Sequence> eval();
/**
* Resets all the recorded and stored operations, records the operation
* provided and submits into the gpu as a submit job synchronously (with a barrier).
* Resets all the recorded and stored operations, records the operation
* provided and submits into the gpu as a submit job synchronously (with a
* barrier).
*
* @return shared_ptr<Sequence> of the Sequence class itself
*/
@ -138,16 +140,18 @@ class Sequence : public std::enable_shared_from_this<Sequence>
/**
* Eval Async sends all the recorded and stored operations in the vector of
* operations into the gpu as a submit job without a barrier. EvalAwait() must
* ALWAYS be called after to ensure the sequence is terminated correctly.
* operations into the gpu as a submit job without a barrier. EvalAwait()
* must ALWAYS be called after to ensure the sequence is terminated
* correctly.
*
* @return Boolean stating whether execution was successful.
*/
std::shared_ptr<Sequence> evalAsync();
/**
* Clears currnet operations to record provided one in the vector of
* operations into the gpu as a submit job without a barrier. EvalAwait() must
* ALWAYS be called after to ensure the sequence is terminated correctly.
* operations into the gpu as a submit job without a barrier. EvalAwait()
* must ALWAYS be called after to ensure the sequence is terminated
* correctly.
*
* @return Boolean stating whether execution was successful.
*/
@ -241,9 +245,9 @@ class Sequence : public std::enable_shared_from_this<Sequence>
bool isInit();
/**
* Clears command buffer and triggers re-record of all the current operations
* saved, which is useful if the underlying kp::Tensors or kp::Algorithms
* are modified and need to be re-recorded.
* Clears command buffer and triggers re-record of all the current
* operations saved, which is useful if the underlying kp::Tensors or
* kp::Algorithms are modified and need to be re-recorded.
*/
void rerecord();

View file

@ -18,7 +18,6 @@ namespace kp {
class Shader
{
public:
// The default resource limit for the GLSL compiler, can be overwritten
// Has been adopted by:
// https://github.com/KhronosGroup/glslang/blob/master/StandAlone/ResourceLimits.cpp

View file

@ -160,41 +160,33 @@ class Tensor
* @return Unsigned integer representing the total number of elements
*/
// TODO: move to cpp
uint32_t size() {
return this->mSize;
}
uint32_t size() { return this->mSize; }
// TODO: move to cpp
uint32_t dataTypeMemorySize() {
return this->mDataTypeMemorySize;
}
uint32_t dataTypeMemorySize() { return this->mDataTypeMemorySize; }
// TODO: move to cpp
uint32_t memorySize() {
return this->mSize * this->mDataTypeMemorySize;
}
uint32_t memorySize() { return this->mSize * this->mDataTypeMemorySize; }
/**
* Retrieve the underlying data type of the Tensor
*
* @return Data type of tensor of type kp::Tensor::TensorDataTypes
*/
TensorDataTypes dataType() {
return this->mDataType;
}
TensorDataTypes dataType() { return this->mDataType; }
void* rawData() {
return this->mRawData;
}
void* rawData() { return this->mRawData; }
// TODO: move to cpp
template <typename T>
T* data() {
template<typename T>
T* data()
{
return (T*)this->mRawData;
}
template <typename T>
std::vector<T> vector() {
template<typename T>
std::vector<T> vector()
{
return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() };
}
@ -202,9 +194,9 @@ class Tensor
* Sets / resets the vector data of the tensor. This function does not
* perform any copies into GPU memory and is only performed on the host.
*/
void setRawData(const void* data)
void setRawData(const void* data)
{
// Copy data
// Copy data
memcpy(this->mRawData, data, this->memorySize());
}
@ -217,7 +209,8 @@ class Tensor
void* mRawData;
private:
void mapRawData() {
void mapRawData()
{
KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
@ -235,14 +228,17 @@ class Tensor
vk::DeviceSize bufferSize = this->memorySize();
// Given we request coherent host memory we don't need to invalidate / flush
// Given we request coherent host memory we don't need to invalidate /
// flush
this->mRawData = this->mDevice->mapMemory(
*hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());
vk::MappedMemoryRange mappedMemoryRange(*hostVisibleMemory, 0, bufferSize);
vk::MappedMemoryRange mappedMemoryRange(
*hostVisibleMemory, 0, bufferSize);
}
void unmapRawData() {
void unmapRawData()
{
KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
@ -296,49 +292,46 @@ class Tensor
vk::MemoryPropertyFlags getPrimaryMemoryPropertyFlags();
vk::BufferUsageFlags getStagingBufferUsageFlags();
vk::MemoryPropertyFlags getStagingMemoryPropertyFlags();
};
// TODO: Limit T to be only float, bool, double, etc
template <typename T>
class TensorT: public Tensor
template<typename T>
class TensorT : public Tensor
{
public:
TensorT(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
const std::vector<T>& data,
const TensorTypes& tensorType = TensorTypes::eDevice)
: Tensor(physicalDevice,
device,
(void*)data.data(),
data.size(),
sizeof(T),
this->dataType(),
tensorType)
std::shared_ptr<vk::Device> device,
const std::vector<T>& data,
const TensorTypes& tensorType = TensorTypes::eDevice)
: Tensor(physicalDevice,
device,
(void*)data.data(),
data.size(),
sizeof(T),
this->dataType(),
tensorType)
{
KP_LOG_DEBUG("Kompute TensorT constructor with data size {}", data.size());
KP_LOG_DEBUG("Kompute TensorT constructor with data size {}",
data.size());
}
~TensorT() {
KP_LOG_DEBUG("Kompute TensorT destructor");
}
~TensorT() { KP_LOG_DEBUG("Kompute TensorT destructor"); }
T* data() {
return (T*)this->mRawData;
}
T* data() { return (T*)this->mRawData; }
std::vector<T> vector() {
std::vector<T> vector()
{
return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() };
}
T& operator[](int index) {
return *(((T*)this->mRawData) + index);
}
T& operator[](int index) { return *(((T*)this->mRawData) + index); }
void setData(const std::vector<T>& data) {
void setData(const std::vector<T>& data)
{
KP_LOG_DEBUG("Kompute TensorT setting data with data size {}", data.size());
KP_LOG_DEBUG("Kompute TensorT setting data with data size {}",
data.size());
if (data.size() != this->mSize) {
throw std::runtime_error(
@ -349,7 +342,6 @@ class TensorT: public Tensor
}
TensorDataTypes dataType();
};
} // End namespace kp

View file

@ -7,7 +7,7 @@ TEST(TestDestroy, TestDestroyTensorSingle)
{
std::shared_ptr<kp::TensorT<float>> tensorA = nullptr;
std::string shader(R"(
std::string shader(R"(
#version 450
layout (local_size_x = 1) in;
layout(set = 0, binding = 0) buffer a { float pa[]; };

View file

@ -20,13 +20,17 @@ TEST(TestLogisticRegression, TestMainLogisticRegression)
std::shared_ptr<kp::TensorT<float>> y = mgr.tensor({ 0, 0, 0, 1, 1 });
std::shared_ptr<kp::TensorT<float>> wIn = mgr.tensor({ 0.001, 0.001 });
std::shared_ptr<kp::TensorT<float>> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> wOutI =
mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> wOutJ =
mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> bIn = mgr.tensor({ 0 });
std::shared_ptr<kp::TensorT<float>> bOut = mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> bOut =
mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> lOut =
mgr.tensor({ 0, 0, 0, 0, 0 });
std::vector<std::shared_ptr<kp::Tensor>> params = { xI, xJ, y,
wIn, wOutI, wOutJ,
@ -95,14 +99,18 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy)
std::shared_ptr<kp::TensorT<float>> wIn =
mgr.tensor({ 0.001, 0.001 }, kp::Tensor::TensorTypes::eHost);
std::shared_ptr<kp::TensorT<float>> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> wOutI =
mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> wOutJ =
mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> bIn =
mgr.tensor({ 0 }, kp::Tensor::TensorTypes::eHost);
std::shared_ptr<kp::TensorT<float>> bOut = mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> bOut =
mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> lOut =
mgr.tensor({ 0, 0, 0, 0, 0 });
std::vector<std::shared_ptr<kp::Tensor>> params = { xI, xJ, y,
wIn, wOutI, wOutJ,

View file

@ -50,8 +50,11 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality)
kp::Constants pushConstsA({ 2.0 });
kp::Constants pushConstsB({ 3.0 });
auto algorithm = mgr.algorithm(
params, kp::Shader::compileSource(shader), workgroup, specConsts, pushConstsA);
auto algorithm = mgr.algorithm(params,
kp::Shader::compileSource(shader),
workgroup,
specConsts,
pushConstsA);
// 3. Run operation with string shader synchronously
mgr.sequence()
@ -202,4 +205,3 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)
EXPECT_EQ(tensorA->vector(), std::vector<float>({ 3, 3, 3 }));
}

View file

@ -71,9 +71,9 @@ TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor)
//{
// kp::Manager mgr;
//
// std::shared_ptr<kp::TensorT<float>> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
// std::shared_ptr<kp::TensorT<float>> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
// mgr.rebuild({ tensorA, tensorB });
// std::shared_ptr<kp::TensorT<float>> tensorA{ new kp::Tensor({ 3, 4, 5 })
// }; std::shared_ptr<kp::TensorT<float>> tensorB{ new kp::Tensor({ 0, 0, 0
// }) }; mgr.rebuild({ tensorA, tensorB });
//
// mgr.evalOpDefault<kp::OpAlgoCreate>(
// { tensorA, tensorB },

View file

@ -29,15 +29,17 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchOverride)
{
kp::Manager mgr;
std::shared_ptr<kp::TensorT<float>> tensor = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> tensor =
mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::Algorithm> algo =
mgr.algorithm({ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.0, 0.0, 0.0 });
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(
{ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.0, 0.0, 0.0 });
sq = mgr.sequence()->eval<kp::OpTensorSyncDevice>({ tensor });
// We need to run this in sequence to avoid race condition
// We can't use atomicAdd as swiftshader doesn't support it for float
// We can't use atomicAdd as swiftshader doesn't support it for
// float
sq->eval<kp::OpAlgoDispatch>(algo, kp::Constants{ 0.1, 0.2, 0.3 });
sq->eval<kp::OpAlgoDispatch>(algo, kp::Constants{ 0.3, 0.2, 0.1 });
sq->eval<kp::OpTensorSyncLocal>({ tensor });
@ -72,15 +74,17 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchNoOverride)
{
kp::Manager mgr;
std::shared_ptr<kp::TensorT<float>> tensor = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> tensor =
mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::Algorithm> algo =
mgr.algorithm({ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.1, 0.2, 0.3 });
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(
{ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.1, 0.2, 0.3 });
sq = mgr.sequence()->eval<kp::OpTensorSyncDevice>({ tensor });
// We need to run this in sequence to avoid race condition
// We can't use atomicAdd as swiftshader doesn't support it for float
// We can't use atomicAdd as swiftshader doesn't support it for
// float
sq->eval<kp::OpAlgoDispatch>(algo);
sq->eval<kp::OpAlgoDispatch>(algo, kp::Constants{ 0.3, 0.2, 0.1 });
sq->eval<kp::OpTensorSyncLocal>({ tensor });
@ -115,15 +119,17 @@ TEST(TestPushConstants, TestConstantsWrongSize)
{
kp::Manager mgr;
std::shared_ptr<kp::TensorT<float>> tensor = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> tensor =
mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::Algorithm> algo =
mgr.algorithm({ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.0 });
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(
{ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.0 });
sq = mgr.sequence()
->record<kp::OpTensorSyncDevice>({ tensor });
sq = mgr.sequence()->record<kp::OpTensorSyncDevice>({ tensor });
EXPECT_THROW(sq->record<kp::OpAlgoDispatch>(algo, kp::Constants{ 0.1, 0.2, 0.3 }), std::runtime_error);
EXPECT_THROW(sq->record<kp::OpAlgoDispatch>(
algo, kp::Constants{ 0.1, 0.2, 0.3 }),
std::runtime_error);
}
}
}

View file

@ -60,9 +60,9 @@ TEST(TestSequence, RerecordSequence)
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({1, 2, 3});
std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor({2, 2, 2});
std::shared_ptr<kp::TensorT<float>> tensorOut = mgr.tensor({0, 0, 0});
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({ 1, 2, 3 });
std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor({ 2, 2, 2 });
std::shared_ptr<kp::TensorT<float>> tensorOut = mgr.tensor({ 0, 0, 0 });
sq->eval<kp::OpTensorSyncDevice>({ tensorA, tensorB, tensorOut });
@ -83,25 +83,24 @@ TEST(TestSequence, RerecordSequence)
)");
std::shared_ptr<kp::Algorithm> algo =
mgr.algorithm({tensorA, tensorB, tensorOut}, spirv);
mgr.algorithm({ tensorA, tensorB, tensorOut }, spirv);
sq->record<kp::OpAlgoDispatch>(algo)
->record<kp::OpTensorSyncLocal>({tensorA, tensorB, tensorOut});
sq->record<kp::OpAlgoDispatch>(algo)->record<kp::OpTensorSyncLocal>(
{ tensorA, tensorB, tensorOut });
sq->eval();
EXPECT_EQ(tensorOut->vector(), std::vector<float>({2, 4, 6}));
EXPECT_EQ(tensorOut->vector(), std::vector<float>({ 2, 4, 6 }));
algo->rebuild({tensorOut, tensorA, tensorB}, spirv);
algo->rebuild({ tensorOut, tensorA, tensorB }, spirv);
// Refresh and trigger a rerecord
sq->rerecord();
sq->eval();
EXPECT_EQ(tensorB->vector(), std::vector<float>({2, 8, 18}));
EXPECT_EQ(tensorB->vector(), std::vector<float>({ 2, 8, 18 }));
}
TEST(TestSequence, SequenceTimestamps)
{
kp::Manager mgr;
@ -118,15 +117,16 @@ TEST(TestSequence, SequenceTimestamps)
})");
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
auto seq = mgr.sequence(0, 100); //100 timestamps
auto seq = mgr.sequence(0, 100); // 100 timestamps
seq->record<kp::OpTensorSyncDevice>({ tensorA })
->record<kp::OpAlgoDispatch>(mgr.algorithm({ tensorA }, spirv))
->record<kp::OpAlgoDispatch>(mgr.algorithm({ tensorA }, spirv))
->record<kp::OpAlgoDispatch>(mgr.algorithm({ tensorA }, spirv))
->record<kp::OpTensorSyncLocal>({ tensorA })
->eval();
->record<kp::OpAlgoDispatch>(mgr.algorithm({ tensorA }, spirv))
->record<kp::OpAlgoDispatch>(mgr.algorithm({ tensorA }, spirv))
->record<kp::OpAlgoDispatch>(mgr.algorithm({ tensorA }, spirv))
->record<kp::OpTensorSyncLocal>({ tensorA })
->eval();
const std::vector<uint64_t> timestamps = seq->getTimestamps();
EXPECT_EQ(timestamps.size(), 6); //1 timestamp at start + 1 after each operation
EXPECT_EQ(timestamps.size(),
6); // 1 timestamp at start + 1 after each operation
}

View file

@ -24,34 +24,43 @@ static const std::string shaderString = (R"(
}
)");
void compileShaderWithGivenResources(const std::string shaderString, const TBuiltInResource resources) {
kp::Shader::compileSource(shaderString, std::string("main"), std::vector<std::pair<std::string,std::string>>({}), resources);
void
compileShaderWithGivenResources(const std::string shaderString,
const TBuiltInResource resources)
{
kp::Shader::compileSource(
shaderString,
std::string("main"),
std::vector<std::pair<std::string, std::string>>({}),
resources);
}
TEST(TestShaderResources, TestNoMaxLight)
{
TBuiltInResource noMaxLightResources = kp::Shader::defaultResource;
noMaxLightResources.maxLights=0;
EXPECT_NO_THROW(compileShaderWithGivenResources(shaderString, noMaxLightResources));
}
noMaxLightResources.maxLights = 0;
EXPECT_NO_THROW(
compileShaderWithGivenResources(shaderString, noMaxLightResources));
}
TEST(TestShaderResources, TestSmallComputeWorkGroupSizeX)
{
TBuiltInResource smallComputeWorkGroupSizeXResources = kp::Shader::defaultResource;
smallComputeWorkGroupSizeXResources.maxComputeWorkGroupSizeX=0;
ASSERT_THROW(compileShaderWithGivenResources(shaderString, smallComputeWorkGroupSizeXResources), std::runtime_error);
}
TBuiltInResource smallComputeWorkGroupSizeXResources =
kp::Shader::defaultResource;
smallComputeWorkGroupSizeXResources.maxComputeWorkGroupSizeX = 0;
ASSERT_THROW(compileShaderWithGivenResources(
shaderString, smallComputeWorkGroupSizeXResources),
std::runtime_error);
}
TEST(TestShaderResources, TestNoWhileLoopLimit)
{
TBuiltInResource noWhileLoopLimitResources = kp::Shader::defaultResource;
noWhileLoopLimitResources.limits.whileLoops=0;
ASSERT_THROW(compileShaderWithGivenResources(shaderString, noWhileLoopLimitResources), std::runtime_error);
}
noWhileLoopLimitResources.limits.whileLoops = 0;
ASSERT_THROW(
compileShaderWithGivenResources(shaderString, noWhileLoopLimitResources),
std::runtime_error);
}

View file

@ -25,8 +25,10 @@ TEST(TestSpecializationConstants, TestTwoConstants)
{
kp::Manager mgr;
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> tensorA =
mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> tensorB =
mgr.tensor({ 0, 0, 0 });
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorA,
tensorB };

View file

@ -52,12 +52,13 @@ TEST(TestWorkgroup, TestSimpleWorkgroup)
};
std::vector<float> expectedB = {
0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3,
4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1,
2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7,
0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7
0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2,
3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0,
1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3,
4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6,
7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1,
2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7
};
EXPECT_EQ(tensorA->vector(), expectedA);