Reformat
This commit is contained in:
parent
1d2d33b269
commit
1d1018fa0c
22 changed files with 326 additions and 259 deletions
|
|
@ -55,7 +55,7 @@ The example below shows how you can enable the "VK_EXT_shader_atomic_float" exte
|
|||
atomicAdd(pa[2], pcs.z);
|
||||
})");
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = nullptr;
|
||||
|
||||
|
|
@ -102,7 +102,7 @@ We also provide tools that allow you to `convert shaders into C++ headers <https
|
|||
throw std::runtime_error("Kompute OpMult expected 3 tensors but got " + tensors.size());
|
||||
}
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(R"(
|
||||
std::vector<uint32_t> spirv = kp::Shader::compileSource(R"(
|
||||
#version 450
|
||||
|
||||
layout(set = 0, binding = 0) buffer tensorLhs {
|
||||
|
|
@ -215,7 +215,7 @@ In this case we create a shader that should take a couple of milliseconds to run
|
|||
}
|
||||
)");
|
||||
|
||||
auto algo = mgr.algorithm({tensor}, kp::Shader::compile_source(shader));
|
||||
auto algo = mgr.algorithm({tensor}, kp::Shader::compileSource(shader));
|
||||
|
||||
Now we are able to run the await function on the default sequence.
|
||||
|
||||
|
|
@ -361,7 +361,7 @@ Similar to the asyncrhonous usecase above, we can still run synchronous commands
|
|||
}
|
||||
)");
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
|
||||
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm({tensorA, tenssorB}, spirv);
|
||||
|
||||
|
|
|
|||
|
|
@ -20,7 +20,8 @@ Algorithm::Algorithm(std::shared_ptr<vk::Device> device,
|
|||
"spirv size: {}",
|
||||
tensors.size(),
|
||||
spirv.size());
|
||||
this->rebuild(tensors, spirv, workgroup, specializationConstants, pushConstants);
|
||||
this->rebuild(
|
||||
tensors, spirv, workgroup, specializationConstants, pushConstants);
|
||||
} else {
|
||||
KP_LOG_INFO("Kompute Algorithm constructor with empty tensors and or "
|
||||
"spirv so not rebuilding vulkan components");
|
||||
|
|
@ -425,15 +426,18 @@ Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize)
|
|||
}
|
||||
|
||||
void
|
||||
Algorithm::setPush(const Constants& pushConstants) {
|
||||
Algorithm::setPush(const Constants& pushConstants)
|
||||
{
|
||||
|
||||
if (pushConstants.size() != this->mPushConstants.size()) {
|
||||
throw std::runtime_error(fmt::format("Kompute Algorithm push "
|
||||
"constant provided is size {} but expected size {}",
|
||||
pushConstants.size(), this->mPushConstants.size()));
|
||||
}
|
||||
if (pushConstants.size() != this->mPushConstants.size()) {
|
||||
throw std::runtime_error(
|
||||
fmt::format("Kompute Algorithm push "
|
||||
"constant provided is size {} but expected size {}",
|
||||
pushConstants.size(),
|
||||
this->mPushConstants.size()));
|
||||
}
|
||||
|
||||
this->mPushConstants = pushConstants;
|
||||
this->mPushConstants = pushConstants;
|
||||
}
|
||||
|
||||
const Workgroup&
|
||||
|
|
@ -449,7 +453,8 @@ Algorithm::getSpecializationConstants()
|
|||
}
|
||||
|
||||
const Constants&
|
||||
Algorithm::getPush() {
|
||||
Algorithm::getPush()
|
||||
{
|
||||
return this->mPushConstants;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <iterator>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include "kompute/Manager.hpp"
|
||||
|
||||
|
|
@ -39,7 +39,8 @@ Manager::Manager(uint32_t physicalDeviceIndex,
|
|||
this->mManageResources = true;
|
||||
|
||||
this->createInstance();
|
||||
this->createDevice(familyQueueIndices, physicalDeviceIndex, desiredExtensions);
|
||||
this->createDevice(
|
||||
familyQueueIndices, physicalDeviceIndex, desiredExtensions);
|
||||
}
|
||||
|
||||
Manager::Manager(std::shared_ptr<vk::Instance> instance,
|
||||
|
|
@ -177,7 +178,8 @@ Manager::createInstance()
|
|||
};
|
||||
std::vector<std::string> envLayerNames;
|
||||
const char* envLayerNamesVal = std::getenv("KOMPUTE_ENV_DEBUG_LAYERS");
|
||||
KP_LOG_DEBUG("Kompute Manager adding environment layers: {}", envLayerNamesVal);
|
||||
KP_LOG_DEBUG("Kompute Manager adding environment layers: {}",
|
||||
envLayerNamesVal);
|
||||
if (envLayerNamesVal != NULL && *envLayerNamesVal != '\0') {
|
||||
std::istringstream iss(envLayerNamesVal);
|
||||
std::istream_iterator<std::string> beg(iss), end;
|
||||
|
|
@ -206,13 +208,15 @@ Manager::createInstance()
|
|||
}
|
||||
|
||||
if (validLayerNames.size() > 0) {
|
||||
KP_LOG_DEBUG("Kompute Manager Initializing instance with valid layers: {}", validLayerNames);
|
||||
KP_LOG_DEBUG(
|
||||
"Kompute Manager Initializing instance with valid layers: {}",
|
||||
validLayerNames);
|
||||
computeInstanceCreateInfo.enabledLayerCount =
|
||||
(uint32_t)validLayerNames.size();
|
||||
computeInstanceCreateInfo.ppEnabledLayerNames = validLayerNames.data();
|
||||
}
|
||||
else {
|
||||
KP_LOG_WARN("Kompute Manager no valid layer names found from desired layer names");
|
||||
} else {
|
||||
KP_LOG_WARN("Kompute Manager no valid layer names found from desired "
|
||||
"layer names");
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -347,16 +351,19 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices,
|
|||
deviceQueueCreateInfos.push_back(deviceQueueCreateInfo);
|
||||
}
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager desired extension layers {}", desiredExtensions);
|
||||
KP_LOG_DEBUG("Kompute Manager desired extension layers {}",
|
||||
desiredExtensions);
|
||||
|
||||
std::vector<vk::ExtensionProperties> deviceExtensions = this->mPhysicalDevice->enumerateDeviceExtensionProperties();
|
||||
std::vector<vk::ExtensionProperties> deviceExtensions =
|
||||
this->mPhysicalDevice->enumerateDeviceExtensionProperties();
|
||||
|
||||
std::set<std::string> uniqueExtensionNames;
|
||||
for (const vk::ExtensionProperties& ext : deviceExtensions) {
|
||||
std::string extName(ext.extensionName.data());
|
||||
uniqueExtensionNames.insert(extName);
|
||||
}
|
||||
KP_LOG_DEBUG("Kompute Manager available extensions {}", uniqueExtensionNames);
|
||||
KP_LOG_DEBUG("Kompute Manager available extensions {}",
|
||||
uniqueExtensionNames);
|
||||
std::vector<const char*> validExtensions;
|
||||
for (std::string ext : desiredExtensions) {
|
||||
if (uniqueExtensionNames.count(ext) != 0) {
|
||||
|
|
@ -364,7 +371,8 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices,
|
|||
}
|
||||
}
|
||||
if (desiredExtensions.size() != validExtensions.size()) {
|
||||
KP_LOG_ERROR("Kompute Manager not all extensions were added: {}", validExtensions);
|
||||
KP_LOG_ERROR("Kompute Manager not all extensions were added: {}",
|
||||
validExtensions);
|
||||
}
|
||||
|
||||
vk::DeviceCreateInfo deviceCreateInfo(vk::DeviceCreateFlags(),
|
||||
|
|
@ -406,7 +414,12 @@ Manager::algorithm(const std::vector<std::shared_ptr<Tensor>>& tensors,
|
|||
KP_LOG_DEBUG("Kompute Manager algorithm creation triggered");
|
||||
|
||||
std::shared_ptr<Algorithm> algorithm{ new kp::Algorithm(
|
||||
this->mDevice, tensors, spirv, workgroup, specializationConstants, pushConstants) };
|
||||
this->mDevice,
|
||||
tensors,
|
||||
spirv,
|
||||
workgroup,
|
||||
specializationConstants,
|
||||
pushConstants) };
|
||||
|
||||
if (this->mManageResources) {
|
||||
this->mManagedAlgorithms.push_back(algorithm);
|
||||
|
|
|
|||
|
|
@ -18,13 +18,16 @@ OpTensorCopy::OpTensorCopy(const std::vector<std::shared_ptr<Tensor>>& tensors)
|
|||
uint32_t size = this->mTensors[0]->size();
|
||||
for (const std::shared_ptr<Tensor>& tensor : tensors) {
|
||||
if (tensor->dataType() != dataType) {
|
||||
throw std::runtime_error(fmt::format("Attempting to copy tensors of different types from {} to {}",
|
||||
dataType, tensor->dataType()));
|
||||
throw std::runtime_error(fmt::format(
|
||||
"Attempting to copy tensors of different types from {} to {}",
|
||||
dataType,
|
||||
tensor->dataType()));
|
||||
}
|
||||
if (tensor->size() != size) {
|
||||
throw std::runtime_error(fmt::format("Attempting to copy tensors of different sizes from {} to {}",
|
||||
size, tensor->size()));
|
||||
|
||||
throw std::runtime_error(fmt::format(
|
||||
"Attempting to copy tensors of different sizes from {} to {}",
|
||||
size,
|
||||
tensor->size()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -40,7 +40,6 @@ void
|
|||
OpTensorSyncDevice::preEval(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncDevice preEval called");
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -18,8 +18,9 @@ Sequence::Sequence(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
|||
|
||||
this->createCommandPool();
|
||||
this->createCommandBuffer();
|
||||
if(totalTimestamps>0)
|
||||
this->createTimestampQueryPool(totalTimestamps+1); //+1 for the first one
|
||||
if (totalTimestamps > 0)
|
||||
this->createTimestampQueryPool(totalTimestamps +
|
||||
1); //+1 for the first one
|
||||
}
|
||||
|
||||
Sequence::~Sequence()
|
||||
|
|
@ -48,12 +49,12 @@ Sequence::begin()
|
|||
this->mCommandBuffer->begin(vk::CommandBufferBeginInfo());
|
||||
this->mRecording = true;
|
||||
|
||||
//latch the first timestamp before any commands are submitted
|
||||
if(this->timestampQueryPool)
|
||||
// latch the first timestamp before any commands are submitted
|
||||
if (this->timestampQueryPool)
|
||||
this->mCommandBuffer->writeTimestamp(
|
||||
vk::PipelineStageFlagBits::eAllCommands,
|
||||
*this->timestampQueryPool, 0
|
||||
);
|
||||
vk::PipelineStageFlagBits::eAllCommands,
|
||||
*this->timestampQueryPool,
|
||||
0);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -246,12 +247,12 @@ Sequence::destroy()
|
|||
this->mOperations.clear();
|
||||
}
|
||||
|
||||
if(this->timestampQueryPool){
|
||||
if (this->timestampQueryPool) {
|
||||
KP_LOG_INFO("Destroying QueryPool");
|
||||
this->mDevice->destroy(
|
||||
*this->timestampQueryPool,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
|
||||
*this->timestampQueryPool,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
|
||||
this->timestampQueryPool = nullptr;
|
||||
KP_LOG_DEBUG("Kompute Sequence Destroyed QueryPool");
|
||||
}
|
||||
|
|
@ -281,12 +282,12 @@ Sequence::record(std::shared_ptr<OpBase> op)
|
|||
|
||||
this->mOperations.push_back(op);
|
||||
|
||||
if(this->timestampQueryPool)
|
||||
this->mCommandBuffer->writeTimestamp(
|
||||
vk::PipelineStageFlagBits::eAllCommands,
|
||||
*this->timestampQueryPool, this->mOperations.size()
|
||||
);
|
||||
|
||||
if (this->timestampQueryPool)
|
||||
this->mCommandBuffer->writeTimestamp(
|
||||
vk::PipelineStageFlagBits::eAllCommands,
|
||||
*this->timestampQueryPool,
|
||||
this->mOperations.size());
|
||||
|
||||
return shared_from_this();
|
||||
}
|
||||
|
||||
|
|
@ -339,7 +340,8 @@ Sequence::createTimestampQueryPool(uint32_t totalTimestamps)
|
|||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence creating query pool");
|
||||
if (!this->isInit()) {
|
||||
throw std::runtime_error("createTimestampQueryPool() called on uninitialized Sequence");
|
||||
throw std::runtime_error(
|
||||
"createTimestampQueryPool() called on uninitialized Sequence");
|
||||
}
|
||||
if (!this->mPhysicalDevice) {
|
||||
throw std::runtime_error("Kompute Sequence physical device is null");
|
||||
|
|
@ -347,16 +349,16 @@ Sequence::createTimestampQueryPool(uint32_t totalTimestamps)
|
|||
|
||||
vk::PhysicalDeviceProperties physicalDeviceProperties =
|
||||
this->mPhysicalDevice->getProperties();
|
||||
|
||||
if(physicalDeviceProperties.limits.timestampComputeAndGraphics){
|
||||
|
||||
if (physicalDeviceProperties.limits.timestampComputeAndGraphics) {
|
||||
vk::QueryPoolCreateInfo queryPoolInfo;
|
||||
queryPoolInfo.setQueryCount(totalTimestamps);
|
||||
queryPoolInfo.setQueryType(vk::QueryType::eTimestamp);
|
||||
this->timestampQueryPool = std::make_shared<vk::QueryPool>(this->mDevice->createQueryPool(queryPoolInfo));
|
||||
this->timestampQueryPool = std::make_shared<vk::QueryPool>(
|
||||
this->mDevice->createQueryPool(queryPoolInfo));
|
||||
|
||||
KP_LOG_DEBUG("Query pool for timestamps created");
|
||||
}
|
||||
else{
|
||||
} else {
|
||||
throw std::runtime_error("Device does not support timestamps");
|
||||
}
|
||||
}
|
||||
|
|
@ -364,14 +366,19 @@ Sequence::createTimestampQueryPool(uint32_t totalTimestamps)
|
|||
std::vector<std::uint64_t>
|
||||
Sequence::getTimestamps()
|
||||
{
|
||||
if(!this->timestampQueryPool)
|
||||
if (!this->timestampQueryPool)
|
||||
throw std::runtime_error("Timestamp latching not enabled");
|
||||
|
||||
const auto n = this->mOperations.size()+1;
|
||||
|
||||
const auto n = this->mOperations.size() + 1;
|
||||
std::vector<std::uint64_t> timestamps(n, 0);
|
||||
this->mDevice->getQueryPoolResults(*this->timestampQueryPool,
|
||||
0, n, timestamps.size()*sizeof(std::uint64_t), timestamps.data(),
|
||||
sizeof(uint64_t), vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait);
|
||||
this->mDevice->getQueryPoolResults(
|
||||
*this->timestampQueryPool,
|
||||
0,
|
||||
n,
|
||||
timestamps.size() * sizeof(std::uint64_t),
|
||||
timestamps.data(),
|
||||
sizeof(uint64_t),
|
||||
vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait);
|
||||
|
||||
return timestamps;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -99,10 +99,10 @@ Shader::compileSource(
|
|||
const TBuiltInResource& resource)
|
||||
{
|
||||
return compileSources({ source },
|
||||
std::vector<std::string>({}),
|
||||
entryPoint,
|
||||
definitions,
|
||||
resource);
|
||||
std::vector<std::string>({}),
|
||||
entryPoint,
|
||||
definitions,
|
||||
resource);
|
||||
}
|
||||
|
||||
const TBuiltInResource Shader::defaultResource = {
|
||||
|
|
|
|||
|
|
@ -64,13 +64,10 @@ Tensor::tensorType()
|
|||
bool
|
||||
Tensor::isInit()
|
||||
{
|
||||
return this->mDevice
|
||||
&& this->mPrimaryBuffer
|
||||
&& this->mPrimaryMemory
|
||||
&& this->mRawData;
|
||||
return this->mDevice && this->mPrimaryBuffer && this->mPrimaryMemory &&
|
||||
this->mRawData;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
Tensor::recordCopyFrom(const vk::CommandBuffer& commandBuffer,
|
||||
std::shared_ptr<Tensor> copyFromTensor,
|
||||
|
|
@ -175,7 +172,8 @@ Tensor::recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
|
|||
vk::DescriptorBufferInfo
|
||||
Tensor::constructDescriptorBufferInfo()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Tensor construct descriptor buffer info size {}", this->memorySize());
|
||||
KP_LOG_DEBUG("Kompute Tensor construct descriptor buffer info size {}",
|
||||
this->memorySize());
|
||||
vk::DeviceSize bufferSize = this->memorySize();
|
||||
return vk::DescriptorBufferInfo(*this->mPrimaryBuffer,
|
||||
0, // offset
|
||||
|
|
@ -213,7 +211,7 @@ Tensor::getPrimaryMemoryPropertyFlags()
|
|||
break;
|
||||
case TensorTypes::eHost:
|
||||
return vk::MemoryPropertyFlagBits::eHostVisible |
|
||||
vk::MemoryPropertyFlagBits::eHostCoherent;
|
||||
vk::MemoryPropertyFlagBits::eHostCoherent;
|
||||
break;
|
||||
case TensorTypes::eStorage:
|
||||
return vk::MemoryPropertyFlagBits::eDeviceLocal;
|
||||
|
|
@ -363,7 +361,8 @@ Tensor::destroy()
|
|||
{
|
||||
KP_LOG_DEBUG("Kompute Tensor started destroy()");
|
||||
|
||||
// Setting raw data to null regardless whether device is available to invalidate Tensor
|
||||
// Setting raw data to null regardless whether device is available to
|
||||
// invalidate Tensor
|
||||
this->mRawData = nullptr;
|
||||
this->mSize = 0;
|
||||
this->mDataTypeMemorySize = 0;
|
||||
|
|
@ -442,31 +441,36 @@ Tensor::destroy()
|
|||
|
||||
template<>
|
||||
Tensor::TensorDataTypes
|
||||
TensorT<bool>::dataType() {
|
||||
TensorT<bool>::dataType()
|
||||
{
|
||||
return Tensor::TensorDataTypes::eBool;
|
||||
}
|
||||
|
||||
template<>
|
||||
Tensor::TensorDataTypes
|
||||
TensorT<int32_t>::dataType() {
|
||||
TensorT<int32_t>::dataType()
|
||||
{
|
||||
return Tensor::TensorDataTypes::eInt;
|
||||
}
|
||||
|
||||
template<>
|
||||
Tensor::TensorDataTypes
|
||||
TensorT<uint32_t>::dataType() {
|
||||
TensorT<uint32_t>::dataType()
|
||||
{
|
||||
return Tensor::TensorDataTypes::eUnsignedInt;
|
||||
}
|
||||
|
||||
template<>
|
||||
Tensor::TensorDataTypes
|
||||
TensorT<float>::dataType() {
|
||||
TensorT<float>::dataType()
|
||||
{
|
||||
return Tensor::TensorDataTypes::eFloat;
|
||||
}
|
||||
|
||||
template<>
|
||||
Tensor::TensorDataTypes
|
||||
TensorT<double>::dataType() {
|
||||
TensorT<double>::dataType()
|
||||
{
|
||||
return Tensor::TensorDataTypes::eDouble;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -18,15 +18,17 @@ class Algorithm
|
|||
* the underlying resources.
|
||||
*
|
||||
* @param device The Vulkan device to use for creating resources
|
||||
* @param tensors (optional) The tensors to use to create the descriptor resources
|
||||
* @param tensors (optional) The tensors to use to create the descriptor
|
||||
* resources
|
||||
* @param spirv (optional) The spirv code to use to create the algorithm
|
||||
* @param workgroup (optional) The kp::Workgroup to use for the dispatch which defaults to
|
||||
* kp::Workgroup(tensor[0].size(), 1, 1) if not set.
|
||||
* @param specializationConstants (optional) The kp::Constants to use to initialize
|
||||
* the specialization constants which cannot be changed once set.
|
||||
* @param pushConstants (optional) The kp::Constants to use when initializing the
|
||||
* pipeline, which set the size of the push constants - these can be modified but
|
||||
* all new values must have the same vector size as this initial value.
|
||||
* @param workgroup (optional) The kp::Workgroup to use for the dispatch
|
||||
* which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set.
|
||||
* @param specializationConstants (optional) The kp::Constants to use to
|
||||
* initialize the specialization constants which cannot be changed once set.
|
||||
* @param pushConstants (optional) The kp::Constants to use when
|
||||
* initializing the pipeline, which set the size of the push constants -
|
||||
* these can be modified but all new values must have the same vector size
|
||||
* as this initial value.
|
||||
*/
|
||||
Algorithm(std::shared_ptr<vk::Device> device,
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
|
||||
|
|
@ -36,18 +38,19 @@ class Algorithm
|
|||
const Constants& pushConstants = {});
|
||||
|
||||
/**
|
||||
* Rebuild function to reconstruct algorithm with configuration parameters to create
|
||||
* the underlying resources.
|
||||
* Rebuild function to reconstruct algorithm with configuration parameters
|
||||
* to create the underlying resources.
|
||||
*
|
||||
* @param tensors The tensors to use to create the descriptor resources
|
||||
* @param spirv The spirv code to use to create the algorithm
|
||||
* @param workgroup (optional) The kp::Workgroup to use for the dispatch which defaults to
|
||||
* kp::Workgroup(tensor[0].size(), 1, 1) if not set.
|
||||
* @param specializationConstants (optional) The kp::Constants to use to initialize
|
||||
* the specialization constants which cannot be changed once set.
|
||||
* @param pushConstants (optional) The kp::Constants to use when initializing the
|
||||
* pipeline, which set the size of the push constants - these can be modified but
|
||||
* all new values must have the same vector size as this initial value.
|
||||
* @param workgroup (optional) The kp::Workgroup to use for the dispatch
|
||||
* which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set.
|
||||
* @param specializationConstants (optional) The kp::Constants to use to
|
||||
* initialize the specialization constants which cannot be changed once set.
|
||||
* @param pushConstants (optional) The kp::Constants to use when
|
||||
* initializing the pipeline, which set the size of the push constants -
|
||||
* these can be modified but all new values must have the same vector size
|
||||
* as this initial value.
|
||||
*/
|
||||
void rebuild(const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::vector<uint32_t>& spirv,
|
||||
|
|
@ -70,25 +73,26 @@ class Algorithm
|
|||
void recordDispatch(const vk::CommandBuffer& commandBuffer);
|
||||
|
||||
/**
|
||||
* Records command that binds the "core" algorithm components which consist of
|
||||
* binding the pipeline and binding the descriptorsets.
|
||||
* Records command that binds the "core" algorithm components which consist
|
||||
* of binding the pipeline and binding the descriptorsets.
|
||||
*
|
||||
* @param commandBuffer Command buffer to record the algorithm resources to
|
||||
*/
|
||||
void recordBindCore(const vk::CommandBuffer& commandBuffer);
|
||||
|
||||
/**
|
||||
* Records command that binds the push constants to the command buffer provided
|
||||
* - it is required that the pushConstants provided are of the same size as the
|
||||
* ones provided during initialization.
|
||||
* Records command that binds the push constants to the command buffer
|
||||
* provided
|
||||
* - it is required that the pushConstants provided are of the same size as
|
||||
* the ones provided during initialization.
|
||||
*
|
||||
* @param commandBuffer Command buffer to record the algorithm resources to
|
||||
*/
|
||||
void recordBindPush(const vk::CommandBuffer& commandBuffer);
|
||||
|
||||
/**
|
||||
* function that checks all the gpu resource components to verify if these have
|
||||
* been created and returns true if all are valid.
|
||||
* function that checks all the gpu resource components to verify if these
|
||||
* have been created and returns true if all are valid.
|
||||
*
|
||||
* @returns returns true if the algorithm is currently initialized.
|
||||
*/
|
||||
|
|
@ -97,26 +101,28 @@ class Algorithm
|
|||
/**
|
||||
* Sets the work group to use in the recordDispatch
|
||||
*
|
||||
* @param workgroup The kp::Workgroup value to use to update the algorithm. It
|
||||
* must have a value greater than 1 on the x value (index 1) otherwise it will
|
||||
* be initialized on the size of the first tensor (ie. this->mTensor[0]->size())
|
||||
* @param workgroup The kp::Workgroup value to use to update the algorithm.
|
||||
* It must have a value greater than 1 on the x value (index 1) otherwise it
|
||||
* will be initialized on the size of the first tensor (ie.
|
||||
* this->mTensor[0]->size())
|
||||
*/
|
||||
void setWorkgroup(const Workgroup& workgroup, uint32_t minSize = 1);
|
||||
/**
|
||||
* Sets the push constants to the new value provided to use in the next bindPush()
|
||||
* Sets the push constants to the new value provided to use in the next
|
||||
* bindPush()
|
||||
*
|
||||
* @param The kp::Constant to use to set the push constants to use in the next
|
||||
* bindPush(...) calls. The constants provided must be of the same size as the
|
||||
* ones created during initialization.
|
||||
* @param The kp::Constant to use to set the push constants to use in the
|
||||
* next bindPush(...) calls. The constants provided must be of the same size
|
||||
* as the ones created during initialization.
|
||||
*/
|
||||
void setPush(const Constants& pushConstants);
|
||||
|
||||
/**
|
||||
* Gets the current workgroup from the algorithm.
|
||||
*
|
||||
* @param The kp::Constant to use to set the push constants to use in the next
|
||||
* bindPush(...) calls. The constants provided must be of the same size as the
|
||||
* ones created during initialization.
|
||||
* @param The kp::Constant to use to set the push constants to use in the
|
||||
* next bindPush(...) calls. The constants provided must be of the same size
|
||||
* as the ones created during initialization.
|
||||
*/
|
||||
const Workgroup& getWorkgroup();
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -24,13 +24,14 @@ class Manager
|
|||
Manager();
|
||||
|
||||
/**
|
||||
* Similar to base constructor but allows for further configuration to use when
|
||||
* creating the Vulkan resources.
|
||||
* Similar to base constructor but allows for further configuration to use
|
||||
* when creating the Vulkan resources.
|
||||
*
|
||||
* @param physicalDeviceIndex The index of the physical device to use
|
||||
* @param familyQueueIndices (Optional) List of queue indices to add for
|
||||
* explicit allocation
|
||||
* @param desiredExtensions The desired extensions to load from physicalDevice
|
||||
* @param desiredExtensions The desired extensions to load from
|
||||
* physicalDevice
|
||||
*/
|
||||
Manager(uint32_t physicalDeviceIndex,
|
||||
const std::vector<uint32_t>& familyQueueIndices = {},
|
||||
|
|
@ -64,7 +65,8 @@ class Manager
|
|||
* If zero (default), disables latching of timestamps.
|
||||
* @returns Shared pointer with initialised sequence
|
||||
*/
|
||||
std::shared_ptr<Sequence> sequence(uint32_t queueIndex = 0, uint32_t totalTimestamps = 0);
|
||||
std::shared_ptr<Sequence> sequence(uint32_t queueIndex = 0,
|
||||
uint32_t totalTimestamps = 0);
|
||||
|
||||
/**
|
||||
* Create a managed tensor that will be destroyed by this manager
|
||||
|
|
@ -74,7 +76,7 @@ class Manager
|
|||
* @param tensorType The type of tensor to initialize
|
||||
* @returns Shared pointer with initialised tensor
|
||||
*/
|
||||
template <typename T>
|
||||
template<typename T>
|
||||
std::shared_ptr<TensorT<T>> tensorT(
|
||||
const std::vector<T>& data,
|
||||
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
|
||||
|
|
@ -105,8 +107,13 @@ class Manager
|
|||
const Tensor::TensorDataTypes& dataType,
|
||||
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
|
||||
{
|
||||
std::shared_ptr<Tensor> tensor{ new kp::Tensor(
|
||||
this->mPhysicalDevice, this->mDevice, data, elementTotalCount, elementMemorySize, dataType, tensorType) };
|
||||
std::shared_ptr<Tensor> tensor{ new kp::Tensor(this->mPhysicalDevice,
|
||||
this->mDevice,
|
||||
data,
|
||||
elementTotalCount,
|
||||
elementMemorySize,
|
||||
dataType,
|
||||
tensorType) };
|
||||
|
||||
if (this->mManageResources) {
|
||||
this->mManagedTensors.push_back(tensor);
|
||||
|
|
|
|||
|
|
@ -2,8 +2,8 @@
|
|||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
#include "kompute/operations/OpAlgoDispatch.hpp"
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
|
|
@ -40,8 +40,8 @@ class Sequence : public std::enable_shared_from_this<Sequence>
|
|||
* function also requires the Sequence to be recording, otherwise it will
|
||||
* not be able to add the operation.
|
||||
*
|
||||
* @param op Object derived from kp::BaseOp that will be recoreded by the sequence
|
||||
* which will be used when the operation is evaluated.
|
||||
* @param op Object derived from kp::BaseOp that will be recoreded by the
|
||||
* sequence which will be used when the operation is evaluated.
|
||||
* @return shared_ptr<Sequence> of the Sequence class itself
|
||||
*/
|
||||
std::shared_ptr<Sequence> record(std::shared_ptr<OpBase> op);
|
||||
|
|
@ -59,7 +59,8 @@ class Sequence : public std::enable_shared_from_this<Sequence>
|
|||
*/
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence> record(
|
||||
std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
|
||||
std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
TArgs&&... params)
|
||||
{
|
||||
std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
|
||||
return this->record(op);
|
||||
|
|
@ -94,8 +95,9 @@ class Sequence : public std::enable_shared_from_this<Sequence>
|
|||
std::shared_ptr<Sequence> eval();
|
||||
|
||||
/**
|
||||
* Resets all the recorded and stored operations, records the operation
|
||||
* provided and submits into the gpu as a submit job synchronously (with a barrier).
|
||||
* Resets all the recorded and stored operations, records the operation
|
||||
* provided and submits into the gpu as a submit job synchronously (with a
|
||||
* barrier).
|
||||
*
|
||||
* @return shared_ptr<Sequence> of the Sequence class itself
|
||||
*/
|
||||
|
|
@ -138,16 +140,18 @@ class Sequence : public std::enable_shared_from_this<Sequence>
|
|||
|
||||
/**
|
||||
* Eval Async sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job without a barrier. EvalAwait() must
|
||||
* ALWAYS be called after to ensure the sequence is terminated correctly.
|
||||
* operations into the gpu as a submit job without a barrier. EvalAwait()
|
||||
* must ALWAYS be called after to ensure the sequence is terminated
|
||||
* correctly.
|
||||
*
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
std::shared_ptr<Sequence> evalAsync();
|
||||
/**
|
||||
* Clears currnet operations to record provided one in the vector of
|
||||
* operations into the gpu as a submit job without a barrier. EvalAwait() must
|
||||
* ALWAYS be called after to ensure the sequence is terminated correctly.
|
||||
* operations into the gpu as a submit job without a barrier. EvalAwait()
|
||||
* must ALWAYS be called after to ensure the sequence is terminated
|
||||
* correctly.
|
||||
*
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
|
|
@ -241,9 +245,9 @@ class Sequence : public std::enable_shared_from_this<Sequence>
|
|||
bool isInit();
|
||||
|
||||
/**
|
||||
* Clears command buffer and triggers re-record of all the current operations
|
||||
* saved, which is useful if the underlying kp::Tensors or kp::Algorithms
|
||||
* are modified and need to be re-recorded.
|
||||
* Clears command buffer and triggers re-record of all the current
|
||||
* operations saved, which is useful if the underlying kp::Tensors or
|
||||
* kp::Algorithms are modified and need to be re-recorded.
|
||||
*/
|
||||
void rerecord();
|
||||
|
||||
|
|
|
|||
|
|
@ -18,7 +18,6 @@ namespace kp {
|
|||
class Shader
|
||||
{
|
||||
public:
|
||||
|
||||
// The default resource limit for the GLSL compiler, can be overwritten
|
||||
// Has been adopted by:
|
||||
// https://github.com/KhronosGroup/glslang/blob/master/StandAlone/ResourceLimits.cpp
|
||||
|
|
|
|||
|
|
@ -160,41 +160,33 @@ class Tensor
|
|||
* @return Unsigned integer representing the total number of elements
|
||||
*/
|
||||
// TODO: move to cpp
|
||||
uint32_t size() {
|
||||
return this->mSize;
|
||||
}
|
||||
uint32_t size() { return this->mSize; }
|
||||
|
||||
// TODO: move to cpp
|
||||
uint32_t dataTypeMemorySize() {
|
||||
return this->mDataTypeMemorySize;
|
||||
}
|
||||
uint32_t dataTypeMemorySize() { return this->mDataTypeMemorySize; }
|
||||
|
||||
// TODO: move to cpp
|
||||
uint32_t memorySize() {
|
||||
return this->mSize * this->mDataTypeMemorySize;
|
||||
}
|
||||
uint32_t memorySize() { return this->mSize * this->mDataTypeMemorySize; }
|
||||
|
||||
/**
|
||||
* Retrieve the underlying data type of the Tensor
|
||||
*
|
||||
* @return Data type of tensor of type kp::Tensor::TensorDataTypes
|
||||
*/
|
||||
TensorDataTypes dataType() {
|
||||
return this->mDataType;
|
||||
}
|
||||
TensorDataTypes dataType() { return this->mDataType; }
|
||||
|
||||
void* rawData() {
|
||||
return this->mRawData;
|
||||
}
|
||||
void* rawData() { return this->mRawData; }
|
||||
|
||||
// TODO: move to cpp
|
||||
template <typename T>
|
||||
T* data() {
|
||||
template<typename T>
|
||||
T* data()
|
||||
{
|
||||
return (T*)this->mRawData;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::vector<T> vector() {
|
||||
template<typename T>
|
||||
std::vector<T> vector()
|
||||
{
|
||||
return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() };
|
||||
}
|
||||
|
||||
|
|
@ -202,9 +194,9 @@ class Tensor
|
|||
* Sets / resets the vector data of the tensor. This function does not
|
||||
* perform any copies into GPU memory and is only performed on the host.
|
||||
*/
|
||||
void setRawData(const void* data)
|
||||
void setRawData(const void* data)
|
||||
{
|
||||
// Copy data
|
||||
// Copy data
|
||||
memcpy(this->mRawData, data, this->memorySize());
|
||||
}
|
||||
|
||||
|
|
@ -217,7 +209,8 @@ class Tensor
|
|||
void* mRawData;
|
||||
|
||||
private:
|
||||
void mapRawData() {
|
||||
void mapRawData()
|
||||
{
|
||||
|
||||
KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
|
||||
|
||||
|
|
@ -235,14 +228,17 @@ class Tensor
|
|||
|
||||
vk::DeviceSize bufferSize = this->memorySize();
|
||||
|
||||
// Given we request coherent host memory we don't need to invalidate / flush
|
||||
// Given we request coherent host memory we don't need to invalidate /
|
||||
// flush
|
||||
this->mRawData = this->mDevice->mapMemory(
|
||||
*hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());
|
||||
|
||||
vk::MappedMemoryRange mappedMemoryRange(*hostVisibleMemory, 0, bufferSize);
|
||||
vk::MappedMemoryRange mappedMemoryRange(
|
||||
*hostVisibleMemory, 0, bufferSize);
|
||||
}
|
||||
|
||||
void unmapRawData() {
|
||||
void unmapRawData()
|
||||
{
|
||||
|
||||
KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
|
||||
|
||||
|
|
@ -296,49 +292,46 @@ class Tensor
|
|||
vk::MemoryPropertyFlags getPrimaryMemoryPropertyFlags();
|
||||
vk::BufferUsageFlags getStagingBufferUsageFlags();
|
||||
vk::MemoryPropertyFlags getStagingMemoryPropertyFlags();
|
||||
|
||||
};
|
||||
|
||||
// TODO: Limit T to be only float, bool, double, etc
|
||||
template <typename T>
|
||||
class TensorT: public Tensor
|
||||
template<typename T>
|
||||
class TensorT : public Tensor
|
||||
{
|
||||
|
||||
public:
|
||||
TensorT(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
const std::vector<T>& data,
|
||||
const TensorTypes& tensorType = TensorTypes::eDevice)
|
||||
: Tensor(physicalDevice,
|
||||
device,
|
||||
(void*)data.data(),
|
||||
data.size(),
|
||||
sizeof(T),
|
||||
this->dataType(),
|
||||
tensorType)
|
||||
std::shared_ptr<vk::Device> device,
|
||||
const std::vector<T>& data,
|
||||
const TensorTypes& tensorType = TensorTypes::eDevice)
|
||||
: Tensor(physicalDevice,
|
||||
device,
|
||||
(void*)data.data(),
|
||||
data.size(),
|
||||
sizeof(T),
|
||||
this->dataType(),
|
||||
tensorType)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute TensorT constructor with data size {}", data.size());
|
||||
KP_LOG_DEBUG("Kompute TensorT constructor with data size {}",
|
||||
data.size());
|
||||
}
|
||||
|
||||
~TensorT() {
|
||||
KP_LOG_DEBUG("Kompute TensorT destructor");
|
||||
}
|
||||
~TensorT() { KP_LOG_DEBUG("Kompute TensorT destructor"); }
|
||||
|
||||
T* data() {
|
||||
return (T*)this->mRawData;
|
||||
}
|
||||
T* data() { return (T*)this->mRawData; }
|
||||
|
||||
std::vector<T> vector() {
|
||||
std::vector<T> vector()
|
||||
{
|
||||
return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() };
|
||||
}
|
||||
|
||||
T& operator[](int index) {
|
||||
return *(((T*)this->mRawData) + index);
|
||||
}
|
||||
T& operator[](int index) { return *(((T*)this->mRawData) + index); }
|
||||
|
||||
void setData(const std::vector<T>& data) {
|
||||
void setData(const std::vector<T>& data)
|
||||
{
|
||||
|
||||
KP_LOG_DEBUG("Kompute TensorT setting data with data size {}", data.size());
|
||||
KP_LOG_DEBUG("Kompute TensorT setting data with data size {}",
|
||||
data.size());
|
||||
|
||||
if (data.size() != this->mSize) {
|
||||
throw std::runtime_error(
|
||||
|
|
@ -349,7 +342,6 @@ class TensorT: public Tensor
|
|||
}
|
||||
|
||||
TensorDataTypes dataType();
|
||||
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ TEST(TestDestroy, TestDestroyTensorSingle)
|
|||
{
|
||||
std::shared_ptr<kp::TensorT<float>> tensorA = nullptr;
|
||||
|
||||
std::string shader(R"(
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
layout (local_size_x = 1) in;
|
||||
layout(set = 0, binding = 0) buffer a { float pa[]; };
|
||||
|
|
|
|||
|
|
@ -20,13 +20,17 @@ TEST(TestLogisticRegression, TestMainLogisticRegression)
|
|||
std::shared_ptr<kp::TensorT<float>> y = mgr.tensor({ 0, 0, 0, 1, 1 });
|
||||
|
||||
std::shared_ptr<kp::TensorT<float>> wIn = mgr.tensor({ 0.001, 0.001 });
|
||||
std::shared_ptr<kp::TensorT<float>> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> wOutI =
|
||||
mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> wOutJ =
|
||||
mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
|
||||
std::shared_ptr<kp::TensorT<float>> bIn = mgr.tensor({ 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> bOut = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> bOut =
|
||||
mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
|
||||
std::shared_ptr<kp::TensorT<float>> lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> lOut =
|
||||
mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { xI, xJ, y,
|
||||
wIn, wOutI, wOutJ,
|
||||
|
|
@ -95,14 +99,18 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy)
|
|||
|
||||
std::shared_ptr<kp::TensorT<float>> wIn =
|
||||
mgr.tensor({ 0.001, 0.001 }, kp::Tensor::TensorTypes::eHost);
|
||||
std::shared_ptr<kp::TensorT<float>> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> wOutI =
|
||||
mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> wOutJ =
|
||||
mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
|
||||
std::shared_ptr<kp::TensorT<float>> bIn =
|
||||
mgr.tensor({ 0 }, kp::Tensor::TensorTypes::eHost);
|
||||
std::shared_ptr<kp::TensorT<float>> bOut = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> bOut =
|
||||
mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
|
||||
std::shared_ptr<kp::TensorT<float>> lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> lOut =
|
||||
mgr.tensor({ 0, 0, 0, 0, 0 });
|
||||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { xI, xJ, y,
|
||||
wIn, wOutI, wOutJ,
|
||||
|
|
|
|||
|
|
@ -50,8 +50,11 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality)
|
|||
kp::Constants pushConstsA({ 2.0 });
|
||||
kp::Constants pushConstsB({ 3.0 });
|
||||
|
||||
auto algorithm = mgr.algorithm(
|
||||
params, kp::Shader::compileSource(shader), workgroup, specConsts, pushConstsA);
|
||||
auto algorithm = mgr.algorithm(params,
|
||||
kp::Shader::compileSource(shader),
|
||||
workgroup,
|
||||
specConsts,
|
||||
pushConstsA);
|
||||
|
||||
// 3. Run operation with string shader synchronously
|
||||
mgr.sequence()
|
||||
|
|
@ -202,4 +205,3 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)
|
|||
|
||||
EXPECT_EQ(tensorA->vector(), std::vector<float>({ 3, 3, 3 }));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -71,9 +71,9 @@ TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor)
|
|||
//{
|
||||
// kp::Manager mgr;
|
||||
//
|
||||
// std::shared_ptr<kp::TensorT<float>> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
|
||||
// std::shared_ptr<kp::TensorT<float>> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
|
||||
// mgr.rebuild({ tensorA, tensorB });
|
||||
// std::shared_ptr<kp::TensorT<float>> tensorA{ new kp::Tensor({ 3, 4, 5 })
|
||||
// }; std::shared_ptr<kp::TensorT<float>> tensorB{ new kp::Tensor({ 0, 0, 0
|
||||
// }) }; mgr.rebuild({ tensorA, tensorB });
|
||||
//
|
||||
// mgr.evalOpDefault<kp::OpAlgoCreate>(
|
||||
// { tensorA, tensorB },
|
||||
|
|
|
|||
|
|
@ -29,15 +29,17 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchOverride)
|
|||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::TensorT<float>> tensor = mgr.tensor({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensor =
|
||||
mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo =
|
||||
mgr.algorithm({ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.0, 0.0, 0.0 });
|
||||
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(
|
||||
{ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.0, 0.0, 0.0 });
|
||||
|
||||
sq = mgr.sequence()->eval<kp::OpTensorSyncDevice>({ tensor });
|
||||
|
||||
// We need to run this in sequence to avoid race condition
|
||||
// We can't use atomicAdd as swiftshader doesn't support it for float
|
||||
// We can't use atomicAdd as swiftshader doesn't support it for
|
||||
// float
|
||||
sq->eval<kp::OpAlgoDispatch>(algo, kp::Constants{ 0.1, 0.2, 0.3 });
|
||||
sq->eval<kp::OpAlgoDispatch>(algo, kp::Constants{ 0.3, 0.2, 0.1 });
|
||||
sq->eval<kp::OpTensorSyncLocal>({ tensor });
|
||||
|
|
@ -72,15 +74,17 @@ TEST(TestPushConstants, TestConstantsAlgoDispatchNoOverride)
|
|||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::TensorT<float>> tensor = mgr.tensor({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensor =
|
||||
mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo =
|
||||
mgr.algorithm({ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.1, 0.2, 0.3 });
|
||||
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(
|
||||
{ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.1, 0.2, 0.3 });
|
||||
|
||||
sq = mgr.sequence()->eval<kp::OpTensorSyncDevice>({ tensor });
|
||||
|
||||
// We need to run this in sequence to avoid race condition
|
||||
// We can't use atomicAdd as swiftshader doesn't support it for float
|
||||
// We can't use atomicAdd as swiftshader doesn't support it for
|
||||
// float
|
||||
sq->eval<kp::OpAlgoDispatch>(algo);
|
||||
sq->eval<kp::OpAlgoDispatch>(algo, kp::Constants{ 0.3, 0.2, 0.1 });
|
||||
sq->eval<kp::OpTensorSyncLocal>({ tensor });
|
||||
|
|
@ -115,15 +119,17 @@ TEST(TestPushConstants, TestConstantsWrongSize)
|
|||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::TensorT<float>> tensor = mgr.tensor({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensor =
|
||||
mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo =
|
||||
mgr.algorithm({ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.0 });
|
||||
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(
|
||||
{ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.0 });
|
||||
|
||||
sq = mgr.sequence()
|
||||
->record<kp::OpTensorSyncDevice>({ tensor });
|
||||
sq = mgr.sequence()->record<kp::OpTensorSyncDevice>({ tensor });
|
||||
|
||||
EXPECT_THROW(sq->record<kp::OpAlgoDispatch>(algo, kp::Constants{ 0.1, 0.2, 0.3 }), std::runtime_error);
|
||||
EXPECT_THROW(sq->record<kp::OpAlgoDispatch>(
|
||||
algo, kp::Constants{ 0.1, 0.2, 0.3 }),
|
||||
std::runtime_error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -60,9 +60,9 @@ TEST(TestSequence, RerecordSequence)
|
|||
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
|
||||
|
||||
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({1, 2, 3});
|
||||
std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor({2, 2, 2});
|
||||
std::shared_ptr<kp::TensorT<float>> tensorOut = mgr.tensor({0, 0, 0});
|
||||
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({ 1, 2, 3 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor({ 2, 2, 2 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensorOut = mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
sq->eval<kp::OpTensorSyncDevice>({ tensorA, tensorB, tensorOut });
|
||||
|
||||
|
|
@ -83,25 +83,24 @@ TEST(TestSequence, RerecordSequence)
|
|||
)");
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo =
|
||||
mgr.algorithm({tensorA, tensorB, tensorOut}, spirv);
|
||||
mgr.algorithm({ tensorA, tensorB, tensorOut }, spirv);
|
||||
|
||||
sq->record<kp::OpAlgoDispatch>(algo)
|
||||
->record<kp::OpTensorSyncLocal>({tensorA, tensorB, tensorOut});
|
||||
sq->record<kp::OpAlgoDispatch>(algo)->record<kp::OpTensorSyncLocal>(
|
||||
{ tensorA, tensorB, tensorOut });
|
||||
|
||||
sq->eval();
|
||||
|
||||
EXPECT_EQ(tensorOut->vector(), std::vector<float>({2, 4, 6}));
|
||||
EXPECT_EQ(tensorOut->vector(), std::vector<float>({ 2, 4, 6 }));
|
||||
|
||||
algo->rebuild({tensorOut, tensorA, tensorB}, spirv);
|
||||
algo->rebuild({ tensorOut, tensorA, tensorB }, spirv);
|
||||
|
||||
// Refresh and trigger a rerecord
|
||||
sq->rerecord();
|
||||
sq->eval();
|
||||
|
||||
EXPECT_EQ(tensorB->vector(), std::vector<float>({2, 8, 18}));
|
||||
EXPECT_EQ(tensorB->vector(), std::vector<float>({ 2, 8, 18 }));
|
||||
}
|
||||
|
||||
|
||||
TEST(TestSequence, SequenceTimestamps)
|
||||
{
|
||||
kp::Manager mgr;
|
||||
|
|
@ -118,15 +117,16 @@ TEST(TestSequence, SequenceTimestamps)
|
|||
})");
|
||||
|
||||
std::vector<uint32_t> spirv = kp::Shader::compileSource(shader);
|
||||
|
||||
auto seq = mgr.sequence(0, 100); //100 timestamps
|
||||
|
||||
auto seq = mgr.sequence(0, 100); // 100 timestamps
|
||||
seq->record<kp::OpTensorSyncDevice>({ tensorA })
|
||||
->record<kp::OpAlgoDispatch>(mgr.algorithm({ tensorA }, spirv))
|
||||
->record<kp::OpAlgoDispatch>(mgr.algorithm({ tensorA }, spirv))
|
||||
->record<kp::OpAlgoDispatch>(mgr.algorithm({ tensorA }, spirv))
|
||||
->record<kp::OpTensorSyncLocal>({ tensorA })
|
||||
->eval();
|
||||
->record<kp::OpAlgoDispatch>(mgr.algorithm({ tensorA }, spirv))
|
||||
->record<kp::OpAlgoDispatch>(mgr.algorithm({ tensorA }, spirv))
|
||||
->record<kp::OpAlgoDispatch>(mgr.algorithm({ tensorA }, spirv))
|
||||
->record<kp::OpTensorSyncLocal>({ tensorA })
|
||||
->eval();
|
||||
const std::vector<uint64_t> timestamps = seq->getTimestamps();
|
||||
|
||||
EXPECT_EQ(timestamps.size(), 6); //1 timestamp at start + 1 after each operation
|
||||
|
||||
EXPECT_EQ(timestamps.size(),
|
||||
6); // 1 timestamp at start + 1 after each operation
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,34 +24,43 @@ static const std::string shaderString = (R"(
|
|||
}
|
||||
)");
|
||||
|
||||
void compileShaderWithGivenResources(const std::string shaderString, const TBuiltInResource resources) {
|
||||
kp::Shader::compileSource(shaderString, std::string("main"), std::vector<std::pair<std::string,std::string>>({}), resources);
|
||||
void
|
||||
compileShaderWithGivenResources(const std::string shaderString,
|
||||
const TBuiltInResource resources)
|
||||
{
|
||||
kp::Shader::compileSource(
|
||||
shaderString,
|
||||
std::string("main"),
|
||||
std::vector<std::pair<std::string, std::string>>({}),
|
||||
resources);
|
||||
}
|
||||
|
||||
|
||||
|
||||
TEST(TestShaderResources, TestNoMaxLight)
|
||||
{
|
||||
TBuiltInResource noMaxLightResources = kp::Shader::defaultResource;
|
||||
noMaxLightResources.maxLights=0;
|
||||
|
||||
EXPECT_NO_THROW(compileShaderWithGivenResources(shaderString, noMaxLightResources));
|
||||
}
|
||||
noMaxLightResources.maxLights = 0;
|
||||
|
||||
EXPECT_NO_THROW(
|
||||
compileShaderWithGivenResources(shaderString, noMaxLightResources));
|
||||
}
|
||||
|
||||
TEST(TestShaderResources, TestSmallComputeWorkGroupSizeX)
|
||||
{
|
||||
TBuiltInResource smallComputeWorkGroupSizeXResources = kp::Shader::defaultResource;
|
||||
smallComputeWorkGroupSizeXResources.maxComputeWorkGroupSizeX=0;
|
||||
|
||||
ASSERT_THROW(compileShaderWithGivenResources(shaderString, smallComputeWorkGroupSizeXResources), std::runtime_error);
|
||||
}
|
||||
TBuiltInResource smallComputeWorkGroupSizeXResources =
|
||||
kp::Shader::defaultResource;
|
||||
smallComputeWorkGroupSizeXResources.maxComputeWorkGroupSizeX = 0;
|
||||
|
||||
ASSERT_THROW(compileShaderWithGivenResources(
|
||||
shaderString, smallComputeWorkGroupSizeXResources),
|
||||
std::runtime_error);
|
||||
}
|
||||
|
||||
TEST(TestShaderResources, TestNoWhileLoopLimit)
|
||||
{
|
||||
TBuiltInResource noWhileLoopLimitResources = kp::Shader::defaultResource;
|
||||
noWhileLoopLimitResources.limits.whileLoops=0;
|
||||
|
||||
ASSERT_THROW(compileShaderWithGivenResources(shaderString, noWhileLoopLimitResources), std::runtime_error);
|
||||
}
|
||||
noWhileLoopLimitResources.limits.whileLoops = 0;
|
||||
|
||||
ASSERT_THROW(
|
||||
compileShaderWithGivenResources(shaderString, noWhileLoopLimitResources),
|
||||
std::runtime_error);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,8 +25,10 @@ TEST(TestSpecializationConstants, TestTwoConstants)
|
|||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensorA =
|
||||
mgr.tensor({ 0, 0, 0 });
|
||||
std::shared_ptr<kp::TensorT<float>> tensorB =
|
||||
mgr.tensor({ 0, 0, 0 });
|
||||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorA,
|
||||
tensorB };
|
||||
|
|
|
|||
|
|
@ -52,12 +52,13 @@ TEST(TestWorkgroup, TestSimpleWorkgroup)
|
|||
};
|
||||
|
||||
std::vector<float> expectedB = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
|
||||
6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3,
|
||||
4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1,
|
||||
2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7,
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
|
||||
6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2,
|
||||
3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
|
||||
6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0,
|
||||
1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3,
|
||||
4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6,
|
||||
7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1,
|
||||
2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7
|
||||
};
|
||||
|
||||
EXPECT_EQ(tensorA->vector(), expectedA);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue