This commit is contained in:
Alejandro Saucedo 2021-02-28 16:02:37 +00:00
parent 75315db943
commit 63e220a8a4
26 changed files with 667 additions and 624 deletions

View file

@ -4,23 +4,25 @@
namespace kp {
Algorithm::Algorithm(
std::shared_ptr<vk::Device> device,
const std::vector<std::shared_ptr<Tensor>>& tensors,
const std::vector<uint32_t>& spirv,
const Workgroup& workgroup,
const Constants& specializationConstants)
Algorithm::Algorithm(std::shared_ptr<vk::Device> device,
const std::vector<std::shared_ptr<Tensor>>& tensors,
const std::vector<uint32_t>& spirv,
const Workgroup& workgroup,
const Constants& specializationConstants)
{
KP_LOG_DEBUG("Kompute Algorithm Constructor with device");
this->mDevice = device;
if (tensors.size() && spirv.size()) {
KP_LOG_INFO("Kompute Algorithm initialising with tensor size: {} and spirv size: {}", tensors.size(), spirv.size());
KP_LOG_INFO("Kompute Algorithm initialising with tensor size: {} and "
"spirv size: {}",
tensors.size(),
spirv.size());
this->rebuild(tensors, spirv, workgroup, specializationConstants);
}
else {
KP_LOG_INFO("Kompute Algorithm constructor with empty tensors and or spirv so not rebuilding vulkan components");
} else {
KP_LOG_INFO("Kompute Algorithm constructor with empty tensors and or "
"spirv so not rebuilding vulkan components");
}
}
@ -32,20 +34,21 @@ Algorithm::~Algorithm()
}
void
Algorithm::rebuild(
const std::vector<std::shared_ptr<Tensor>>& tensors,
const std::vector<uint32_t>& spirv,
const Workgroup& workgroup,
const Constants& specializationConstants)
Algorithm::rebuild(const std::vector<std::shared_ptr<Tensor>>& tensors,
const std::vector<uint32_t>& spirv,
const Workgroup& workgroup,
const Constants& specializationConstants)
{
KP_LOG_DEBUG("Kompute Algorithm rebuild started");
this->mTensors = tensors;
this->mSpirv = spirv;
this->mSpecializationConstants = specializationConstants;
this->setWorkgroup(workgroup, this->mTensors.size() ? this->mTensors[0]->size() : 1);
this->setWorkgroup(workgroup,
this->mTensors.size() ? this->mTensors[0]->size() : 1);
// Descriptor pool is created first so if available then destroy all before rebuild
// Descriptor pool is created first so if available then destroy all before
// rebuild
if (this->isInit()) {
this->destroy();
}
@ -56,22 +59,20 @@ Algorithm::rebuild(
}
bool
Algorithm::isInit() {
return this->mPipeline &&
this->mPipelineCache &&
this->mPipelineLayout &&
this->mDescriptorPool &&
this->mDescriptorSet &&
this->mDescriptorSetLayout &&
this->mShaderModule;
Algorithm::isInit()
{
return this->mPipeline && this->mPipelineCache && this->mPipelineLayout &&
this->mDescriptorPool && this->mDescriptorSet &&
this->mDescriptorSetLayout && this->mShaderModule;
}
void
Algorithm::destroy() {
Algorithm::destroy()
{
if (!this->mDevice) {
KP_LOG_WARN(
"Kompute Algorithm destroy function reached with null Device pointer");
KP_LOG_WARN("Kompute Algorithm destroy function reached with null "
"Device pointer");
return;
}
@ -79,7 +80,7 @@ Algorithm::destroy() {
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline");
if (!this->mPipeline) {
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
"pipeline but it is null");
"pipeline but it is null");
}
this->mDevice->destroy(
*this->mPipeline,
@ -91,7 +92,7 @@ Algorithm::destroy() {
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline cache");
if (!this->mPipelineCache) {
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
"pipeline cache but it is null");
"pipeline cache but it is null");
}
this->mDevice->destroy(
*this->mPipelineCache,
@ -103,7 +104,7 @@ Algorithm::destroy() {
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline layout");
if (!this->mPipelineLayout) {
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
"pipeline layout but it is null");
"pipeline layout but it is null");
}
this->mDevice->destroy(
*this->mPipelineLayout,
@ -115,7 +116,7 @@ Algorithm::destroy() {
KP_LOG_DEBUG("Kompute Algorithm Destroying shader module");
if (!this->mShaderModule) {
KP_LOG_WARN("Kompute Algorithm Error requested to destroy shader "
"module but it is null");
"module but it is null");
}
this->mDevice->destroy(
*this->mShaderModule,
@ -123,10 +124,10 @@ Algorithm::destroy() {
this->mShaderModule = nullptr;
}
// We don't call freeDescriptorSet as the descriptor pool is not created with
// VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT more at
// We don't call freeDescriptorSet as the descriptor pool is not created
// with VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT more at
// (https://www.khronos.org/registry/vulkan/specs/1.0/html/vkspec.html#VUID-vkFreeDescriptorSets-descriptorPool-00312))
//if (this->mFreeDescriptorSet && this->mDescriptorSet) {
// if (this->mFreeDescriptorSet && this->mDescriptorSet) {
// KP_LOG_DEBUG("Kompute Algorithm Freeing Descriptor Set");
// if (!this->mDescriptorSet) {
// KP_LOG_WARN(
@ -141,7 +142,7 @@ Algorithm::destroy() {
KP_LOG_DEBUG("Kompute Algorithm Destroying Descriptor Set Layout");
if (!this->mDescriptorSetLayout) {
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
"descriptor set layout but it is null");
"descriptor set layout but it is null");
}
this->mDevice->destroy(
*this->mDescriptorSetLayout,
@ -153,7 +154,7 @@ Algorithm::destroy() {
KP_LOG_DEBUG("Kompute Algorithm Destroying Descriptor Pool");
if (!this->mDescriptorPool) {
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
"descriptor pool but it is null");
"descriptor pool but it is null");
}
this->mDevice->destroy(
*this->mDescriptorPool,
@ -246,10 +247,10 @@ Algorithm::createShaderModule()
{
KP_LOG_DEBUG("Kompute Algorithm createShaderModule started");
vk::ShaderModuleCreateInfo shaderModuleInfo(
vk::ShaderModuleCreateFlags(),
sizeof(uint32_t) * this->mSpirv.size(),
this->mSpirv.data());
vk::ShaderModuleCreateInfo shaderModuleInfo(vk::ShaderModuleCreateFlags(),
sizeof(uint32_t) *
this->mSpirv.size(),
this->mSpirv.data());
KP_LOG_DEBUG("Kompute Algorithm Creating shader module. ShaderFileSize: {}",
this->mSpirv.size());
@ -281,14 +282,14 @@ Algorithm::createPipeline()
for (uint32_t i = 0; i < this->mSpecializationConstants.size(); i++) {
vk::SpecializationMapEntry specializationEntry(
static_cast<uint32_t>(i),
static_cast<uint32_t>(sizeof(float) * i),
sizeof(float));
static_cast<uint32_t>(i),
static_cast<uint32_t>(sizeof(float) * i),
sizeof(float));
specializationEntries.push_back(specializationEntry);
}
// This passes ownership of the memory so we remove ownership from
// This passes ownership of the memory so we remove ownership from
// specialization container by using "transferDataOwnership"
vk::SpecializationInfo specializationInfo(
static_cast<uint32_t>(specializationEntries.size()),
@ -338,7 +339,8 @@ Algorithm::createPipeline()
// TODO: Update to consistent
// this->mPipeline = std::make_shared<vk::Pipeline>();
// this->mDevice->createComputePipelines(
// *this->mPipelineCache, 1, &pipelineInfo, nullptr, this->mPipeline.get());
// *this->mPipelineCache, 1, &pipelineInfo, nullptr,
// this->mPipeline.get());
KP_LOG_DEBUG("Kompute Algorithm Create Pipeline Success");
}
@ -349,29 +351,31 @@ Algorithm::bindCore(const vk::CommandBuffer& commandBuffer)
KP_LOG_DEBUG("Kompute Algorithm binding pipeline");
commandBuffer.bindPipeline(vk::PipelineBindPoint::eCompute,
*this->mPipeline);
*this->mPipeline);
KP_LOG_DEBUG("Kompute Algorithm binding descriptor sets");
commandBuffer.bindDescriptorSets(vk::PipelineBindPoint::eCompute,
*this->mPipelineLayout,
0, // First set
*this->mDescriptorSet,
nullptr // Dispatcher
*this->mPipelineLayout,
0, // First set
*this->mDescriptorSet,
nullptr // Dispatcher
);
}
void
Algorithm::bindPush(const vk::CommandBuffer& commandBuffer, const Constants& pushConstants)
Algorithm::bindPush(const vk::CommandBuffer& commandBuffer,
const Constants& pushConstants)
{
if (pushConstants.size()) {
KP_LOG_DEBUG("Kompute Algorithm binding push constants size: {}", pushConstants.size());
KP_LOG_DEBUG("Kompute Algorithm binding push constants size: {}",
pushConstants.size());
commandBuffer.pushConstants(*this->mPipelineLayout,
vk::ShaderStageFlagBits::eCompute,
0,
pushConstants.size() * sizeof(float),
pushConstants.data());
vk::ShaderStageFlagBits::eCompute,
0,
pushConstants.size() * sizeof(float),
pushConstants.data());
}
}
@ -380,11 +384,13 @@ Algorithm::recordDispatch(const vk::CommandBuffer& commandBuffer)
{
KP_LOG_DEBUG("Kompute Algorithm recording dispatch");
commandBuffer.dispatch(this->mWorkgroup[0], this->mWorkgroup[1], this->mWorkgroup[2]);
commandBuffer.dispatch(
this->mWorkgroup[0], this->mWorkgroup[1], this->mWorkgroup[2]);
}
void
Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize) {
Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize)
{
KP_LOG_INFO("Kompute OpAlgoCreate setting dispatch size");
@ -393,11 +399,9 @@ Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize) {
if (workgroup[0] > 0) {
// If at least the x value is provided we use mainly the parameters
// provided
this->mWorkgroup = {
workgroup[0],
workgroup[1] > 0 ? workgroup[1] : 1,
workgroup[2] > 0 ? workgroup[2] : 1
};
this->mWorkgroup = { workgroup[0],
workgroup[1] > 0 ? workgroup[1] : 1,
workgroup[2] > 0 ? workgroup[2] : 1 };
} else {
this->mWorkgroup = { minSize, 1, 1 };
}
@ -409,17 +413,20 @@ Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize) {
}
const Workgroup&
Algorithm::getWorkgroup() {
Algorithm::getWorkgroup()
{
return this->mWorkgroup;
}
const Constants&
Algorithm::getSpecializationConstants() {
Algorithm::getSpecializationConstants()
{
return this->mSpecializationConstants;
}
const std::vector<std::shared_ptr<Tensor>>&
Algorithm::getTensors() {
Algorithm::getTensors()
{
return this->mTensors;
}

View file

@ -55,7 +55,8 @@ Manager::~Manager()
}
void
Manager::destroy() {
Manager::destroy()
{
KP_LOG_DEBUG("Kompute Manager destroy() started");
@ -78,7 +79,8 @@ Manager::destroy() {
if (this->mManageResources && this->mManagedAlgorithms.size()) {
KP_LOG_DEBUG("Kompute Manager explicitly freeing algorithms");
for (const std::weak_ptr<Algorithm>& weakAlgorithm : this->mManagedAlgorithms) {
for (const std::weak_ptr<Algorithm>& weakAlgorithm :
this->mManagedAlgorithms) {
if (std::shared_ptr<Algorithm> algorithm = weakAlgorithm.lock()) {
algorithm->destroy();
}
@ -214,31 +216,31 @@ Manager::createInstance()
}
void
Manager::clear() {
Manager::clear()
{
if (this->mManageResources) {
this->mManagedTensors.erase(
std::remove_if(
begin(this->mManagedTensors),
end(this->mManagedTensors),
[](std::weak_ptr<Tensor> t) {return t.expired();}),
end(this->mManagedTensors));
std::remove_if(begin(this->mManagedTensors),
end(this->mManagedTensors),
[](std::weak_ptr<Tensor> t) { return t.expired(); }),
end(this->mManagedTensors));
this->mManagedAlgorithms.erase(
std::remove_if(
begin(this->mManagedAlgorithms),
end(this->mManagedAlgorithms),
[](std::weak_ptr<Algorithm> t) {return t.expired();}),
end(this->mManagedAlgorithms));
std::remove_if(
begin(this->mManagedAlgorithms),
end(this->mManagedAlgorithms),
[](std::weak_ptr<Algorithm> t) { return t.expired(); }),
end(this->mManagedAlgorithms));
this->mManagedSequences.erase(
std::remove_if(
begin(this->mManagedSequences),
end(this->mManagedSequences),
[](std::weak_ptr<Sequence> t) {return t.expired();}),
end(this->mManagedSequences));
std::remove_if(begin(this->mManagedSequences),
end(this->mManagedSequences),
[](std::weak_ptr<Sequence> t) { return t.expired(); }),
end(this->mManagedSequences));
}
}
void
Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices, uint32_t physicalDeviceIndex)
Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices,
uint32_t physicalDeviceIndex)
{
KP_LOG_DEBUG("Kompute Manager creating Device");
@ -256,8 +258,7 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices, uint32_t
std::vector<vk::PhysicalDevice> physicalDevices =
this->mInstance->enumeratePhysicalDevices();
vk::PhysicalDevice physicalDevice =
physicalDevices[physicalDeviceIndex];
vk::PhysicalDevice physicalDevice = physicalDevices[physicalDeviceIndex];
this->mPhysicalDevice =
std::make_shared<vk::PhysicalDevice>(physicalDevice);
@ -342,16 +343,14 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices, uint32_t
}
std::shared_ptr<Tensor>
Manager::tensor(
const std::vector<float>& data,
Tensor::TensorTypes tensorType)
Manager::tensor(const std::vector<float>& data, Tensor::TensorTypes tensorType)
{
KP_LOG_DEBUG("Kompute Manager tensor creation triggered");
std::shared_ptr<Tensor> tensor{
new kp::Tensor(this->mPhysicalDevice, this->mDevice, data, tensorType) };
std::shared_ptr<Tensor> tensor{ new kp::Tensor(
this->mPhysicalDevice, this->mDevice, data, tensorType) };
if (this->mManageResources) {
if (this->mManageResources) {
this->mManagedTensors.push_back(tensor);
}
@ -359,23 +358,18 @@ Manager::tensor(
}
std::shared_ptr<Algorithm>
Manager::algorithm(
const std::vector<std::shared_ptr<Tensor>>& tensors,
const std::vector<uint32_t>& spirv,
const Workgroup& workgroup,
const Constants& specializationConstants) {
Manager::algorithm(const std::vector<std::shared_ptr<Tensor>>& tensors,
const std::vector<uint32_t>& spirv,
const Workgroup& workgroup,
const Constants& specializationConstants)
{
KP_LOG_DEBUG("Kompute Manager algorithm creation triggered");
std::shared_ptr<Algorithm> algorithm{
new kp::Algorithm(
this->mDevice,
tensors,
spirv,
workgroup,
specializationConstants)};
std::shared_ptr<Algorithm> algorithm{ new kp::Algorithm(
this->mDevice, tensors, spirv, workgroup, specializationConstants) };
if (this->mManageResources) {
if (this->mManageResources) {
this->mManagedAlgorithms.push_back(algorithm);
}
@ -385,16 +379,15 @@ Manager::algorithm(
std::shared_ptr<Sequence>
Manager::sequence(uint32_t queueIndex)
{
KP_LOG_DEBUG("Kompute Manager sequence() with queueIndex: {}",
queueIndex);
KP_LOG_DEBUG("Kompute Manager sequence() with queueIndex: {}", queueIndex);
std::shared_ptr<Sequence> sq{
new kp::Sequence(this->mPhysicalDevice,
this->mDevice,
this->mComputeQueues[queueIndex],
this->mComputeQueueFamilyIndices[queueIndex]) };
std::shared_ptr<Sequence> sq{ new kp::Sequence(
this->mPhysicalDevice,
this->mDevice,
this->mComputeQueues[queueIndex],
this->mComputeQueueFamilyIndices[queueIndex]) };
if (this->mManageResources) {
if (this->mManageResources) {
this->mManagedSequences.push_back(sq);
}

View file

@ -5,7 +5,7 @@
namespace kp {
OpAlgoDispatch::OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm,
const kp::Constants& pushConstants)
const kp::Constants& pushConstants)
{
KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor");
@ -24,7 +24,8 @@ OpAlgoDispatch::record(const vk::CommandBuffer& commandBuffer)
KP_LOG_DEBUG("Kompute OpAlgoDispatch record called");
// Barrier to ensure the data is finished writing to buffer memory
for (const std::shared_ptr<Tensor>& tensor : this->mAlgorithm->getTensors()) {
for (const std::shared_ptr<Tensor>& tensor :
this->mAlgorithm->getTensors()) {
tensor->recordBufferMemoryBarrier(
commandBuffer,
vk::AccessFlagBits::eHostWrite,

View file

@ -30,8 +30,8 @@ OpTensorSyncDevice::record(const vk::CommandBuffer& commandBuffer)
for (size_t i = 0; i < this->mTensors.size(); i++) {
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
this->mTensors[i]->recordCopyFromStagingToDevice(
commandBuffer, false);
this->mTensors[i]->recordCopyFromStagingToDevice(commandBuffer,
false);
}
}
}

View file

@ -30,8 +30,8 @@ OpTensorSyncLocal::record(const vk::CommandBuffer& commandBuffer)
for (size_t i = 0; i < this->mTensors.size(); i++) {
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
this->mTensors[i]->recordCopyFromDeviceToStaging(
commandBuffer, true);
this->mTensors[i]->recordCopyFromDeviceToStaging(commandBuffer,
true);
}
}
}

View file

@ -37,7 +37,8 @@ Sequence::begin()
}
if (this->isRunning()) {
throw std::runtime_error("Kompute Sequence begin called when sequence still running");
throw std::runtime_error(
"Kompute Sequence begin called when sequence still running");
}
KP_LOG_INFO("Kompute Sequence command now started recording");
@ -53,8 +54,7 @@ Sequence::end()
if (!this->isRecording()) {
KP_LOG_WARN("Kompute Sequence end called when not recording");
return;
}
else {
} else {
KP_LOG_INFO("Kompute Sequence command recording END");
this->mCommandBuffer->end();
this->mRecording = false;
@ -62,7 +62,8 @@ Sequence::end()
}
void
Sequence::clear() {
Sequence::clear()
{
KP_LOG_DEBUG("Kompute Sequence calling clear");
this->end();
}
@ -76,7 +77,8 @@ Sequence::eval()
}
std::shared_ptr<Sequence>
Sequence::eval(std::shared_ptr<OpBase> op) {
Sequence::eval(std::shared_ptr<OpBase> op)
{
this->clear();
return this->record(op)->eval();
}
@ -89,8 +91,9 @@ Sequence::evalAsync()
}
if (this->mIsRunning) {
throw std::runtime_error("Kompute Sequence evalAsync called when an eval async was "
"called without successful wait");
throw std::runtime_error(
"Kompute Sequence evalAsync called when an eval async was "
"called without successful wait");
}
this->mIsRunning = true;
@ -137,7 +140,8 @@ Sequence::evalAwait(uint64_t waitFor)
this->mIsRunning = false;
if (result == vk::Result::eTimeout) {
KP_LOG_WARN("Kompute Sequence evalAwait reached timeout of {}", waitFor);
KP_LOG_WARN("Kompute Sequence evalAwait reached timeout of {}",
waitFor);
return shared_from_this();
}
@ -161,11 +165,10 @@ Sequence::isRecording()
}
bool
Sequence::isInit() {
return this->mDevice &&
this->mCommandPool &&
this->mCommandBuffer &&
this->mComputeQueue;
Sequence::isInit()
{
return this->mDevice && this->mCommandPool && this->mCommandBuffer &&
this->mComputeQueue;
}
void
@ -175,16 +178,15 @@ Sequence::destroy()
if (!this->mDevice) {
KP_LOG_WARN("Kompute Sequence destroy called "
"with null Device pointer");
"with null Device pointer");
return;
}
if (this->mFreeCommandBuffer) {
KP_LOG_INFO("Freeing CommandBuffer");
if (!this->mCommandBuffer) {
KP_LOG_WARN(
"Kompute Sequence destroy called with null "
"CommandPool pointer");
KP_LOG_WARN("Kompute Sequence destroy called with null "
"CommandPool pointer");
return;
}
this->mDevice->freeCommandBuffers(
@ -199,9 +201,8 @@ Sequence::destroy()
if (this->mFreeCommandPool) {
KP_LOG_INFO("Destroying CommandPool");
if (this->mCommandPool == nullptr) {
KP_LOG_WARN(
"Kompute Sequence destroy called with null "
"CommandPool pointer");
KP_LOG_WARN("Kompute Sequence destroy called with null "
"CommandPool pointer");
return;
}
this->mDevice->destroy(
@ -228,7 +229,6 @@ Sequence::destroy()
if (this->mComputeQueue) {
this->mComputeQueue = nullptr;
}
}
std::shared_ptr<Sequence>

View file

@ -5,11 +5,13 @@
namespace kp {
std::vector<uint32_t>
Shader::compile_sources(const std::vector<std::string>& sources,
const std::vector<std::string>& files,
const std::string& entryPoint,
std::vector<std::pair<std::string,std::string>> definitions,
const TBuiltInResource& resources) {
Shader::compile_sources(
const std::vector<std::string>& sources,
const std::vector<std::string>& files,
const std::string& entryPoint,
std::vector<std::pair<std::string, std::string>> definitions,
const TBuiltInResource& resources)
{
// Initialize glslang library.
glslang::InitializeProcess();
@ -18,27 +20,32 @@ Shader::compile_sources(const std::vector<std::string>& sources,
const EShLanguage language = EShLangCompute;
glslang::TShader shader(language);
std::vector<const char*> filesCStr(files.size()), sourcesCStr(sources.size());
for (size_t i = 0; i < sources.size(); i++) sourcesCStr[i] = sources[i].c_str();
std::vector<const char*> filesCStr(files.size()),
sourcesCStr(sources.size());
for (size_t i = 0; i < sources.size(); i++)
sourcesCStr[i] = sources[i].c_str();
if (files.size() > 1) {
assert(files.size() == sources.size());
for (size_t i = 0; i < files.size(); i++) filesCStr[i] = files[i].c_str();
shader.setStringsWithLengthsAndNames(sourcesCStr.data(), nullptr, filesCStr.data(), filesCStr.size());
}
else {
filesCStr = {""};
shader.setStringsWithLengthsAndNames(sourcesCStr.data(), nullptr, filesCStr.data(), sourcesCStr.size());
for (size_t i = 0; i < files.size(); i++)
filesCStr[i] = files[i].c_str();
shader.setStringsWithLengthsAndNames(
sourcesCStr.data(), nullptr, filesCStr.data(), filesCStr.size());
} else {
filesCStr = { "" };
shader.setStringsWithLengthsAndNames(
sourcesCStr.data(), nullptr, filesCStr.data(), sourcesCStr.size());
}
shader.setEntryPoint(entryPoint.c_str());
shader.setSourceEntryPoint(entryPoint.c_str());
std::string info_log = "";
const EShMessages messages = static_cast<EShMessages>(EShMsgDefault | EShMsgVulkanRules | EShMsgSpvRules);
if (!shader.parse(&resources, 100, false, messages))
{
info_log = std::string(shader.getInfoLog()) + "\n" + std::string(shader.getInfoDebugLog());
const EShMessages messages = static_cast<EShMessages>(
EShMsgDefault | EShMsgVulkanRules | EShMsgSpvRules);
if (!shader.parse(&resources, 100, false, messages)) {
info_log = std::string(shader.getInfoLog()) + "\n" +
std::string(shader.getInfoDebugLog());
KP_LOG_ERROR("Kompute Shader Error: {}", info_log);
throw std::runtime_error(info_log);
}
@ -47,24 +54,23 @@ Shader::compile_sources(const std::vector<std::string>& sources,
glslang::TProgram program;
program.addShader(&shader);
// Link program.
if (!program.link(messages))
{
info_log = std::string(program.getInfoLog()) + "\n" + std::string(program.getInfoDebugLog());
if (!program.link(messages)) {
info_log = std::string(program.getInfoLog()) + "\n" +
std::string(program.getInfoDebugLog());
KP_LOG_ERROR("Kompute Shader Error: {}", info_log);
throw std::runtime_error(info_log);
}
// Save any info log that was generated.
if (shader.getInfoLog())
{
info_log += std::string(shader.getInfoLog()) + "\n" + std::string(shader.getInfoDebugLog()) + "\n";
if (shader.getInfoLog()) {
info_log += std::string(shader.getInfoLog()) + "\n" +
std::string(shader.getInfoDebugLog()) + "\n";
KP_LOG_INFO("Kompute Shader Information: {}", info_log);
}
glslang::TIntermediate *intermediate = program.getIntermediate(language);
glslang::TIntermediate* intermediate = program.getIntermediate(language);
// Translate to SPIRV.
if (!intermediate)
{
if (!intermediate) {
info_log += "Failed to get shared intermediate code.\n";
KP_LOG_ERROR("Kompute Shader Error: {}", info_log);
throw std::runtime_error(info_log);
@ -74,8 +80,7 @@ Shader::compile_sources(const std::vector<std::string>& sources,
std::vector<std::uint32_t> spirv;
glslang::GlslangToSpv(*intermediate, spirv, &logger);
if (shader.getInfoLog())
{
if (shader.getInfoLog()) {
info_log += logger.getAllMessages() + "\n";
KP_LOG_DEBUG("Kompute Shader all result messages: {}", info_log);
}
@ -87,11 +92,17 @@ Shader::compile_sources(const std::vector<std::string>& sources,
}
std::vector<uint32_t>
Shader::compile_source(const std::string& source,
const std::string& entryPoint,
std::vector<std::pair<std::string,std::string>> definitions,
const TBuiltInResource& resource) {
return compile_sources({source}, std::vector<std::string>({}), entryPoint, definitions, resource);
Shader::compile_source(
const std::string& source,
const std::string& entryPoint,
std::vector<std::pair<std::string, std::string>> definitions,
const TBuiltInResource& resource)
{
return compile_sources({ source },
std::vector<std::string>({}),
entryPoint,
definitions,
resource);
}
}

View file

@ -4,9 +4,9 @@
namespace kp {
Tensor::Tensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
const std::vector<float>& data,
const TensorTypes& tensorType)
std::shared_ptr<vk::Device> device,
const std::vector<float>& data,
const TensorTypes& tensorType)
{
KP_LOG_DEBUG("Kompute Tensor constructor data length: {}, and type: {}",
data.size(),
@ -29,17 +29,16 @@ Tensor::~Tensor()
}
void
Tensor::rebuild(const std::vector<float>& data,
TensorTypes tensorType)
Tensor::rebuild(const std::vector<float>& data, TensorTypes tensorType)
{
KP_LOG_DEBUG("Kompute Tensor rebuilding with size {}",
data.size());
KP_LOG_DEBUG("Kompute Tensor rebuilding with size {}", data.size());
this->mData = data;
this->mTensorType = tensorType;
if (this->mPrimaryBuffer || this->mPrimaryMemory) {
KP_LOG_DEBUG("Kompute Tensor destroying existing resources before rebuild");
KP_LOG_DEBUG(
"Kompute Tensor destroying existing resources before rebuild");
this->destroy();
}
@ -77,10 +76,9 @@ Tensor::tensorType()
}
bool
Tensor::isInit() {
return this->mDevice &&
this->mPrimaryBuffer &&
this->mPrimaryMemory;
Tensor::isInit()
{
return this->mDevice && this->mPrimaryBuffer && this->mPrimaryMemory;
}
void
@ -105,17 +103,16 @@ Tensor::recordCopyFrom(const vk::CommandBuffer& commandBuffer,
KP_LOG_DEBUG("Kompute Tensor recordCopyFrom data size {}.", bufferSize);
this->recordCopyBuffer(commandBuffer,
copyFromTensor->mPrimaryBuffer,
this->mPrimaryBuffer,
bufferSize,
copyRegion,
createBarrier);
copyFromTensor->mPrimaryBuffer,
this->mPrimaryBuffer,
bufferSize,
copyRegion,
createBarrier);
}
void
Tensor::recordCopyFromStagingToDevice(
const vk::CommandBuffer& commandBuffer,
bool createBarrier)
Tensor::recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer,
bool createBarrier)
{
vk::DeviceSize bufferSize(this->memorySize());
vk::BufferCopy copyRegion(0, 0, bufferSize);
@ -123,17 +120,16 @@ Tensor::recordCopyFromStagingToDevice(
KP_LOG_DEBUG("Kompute Tensor copying data size {}.", bufferSize);
this->recordCopyBuffer(commandBuffer,
this->mStagingBuffer,
this->mPrimaryBuffer,
bufferSize,
copyRegion,
createBarrier);
this->mStagingBuffer,
this->mPrimaryBuffer,
bufferSize,
copyRegion,
createBarrier);
}
void
Tensor::recordCopyFromDeviceToStaging(
const vk::CommandBuffer& commandBuffer,
bool createBarrier)
Tensor::recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer,
bool createBarrier)
{
vk::DeviceSize bufferSize(this->memorySize());
vk::BufferCopy copyRegion(0, 0, bufferSize);
@ -141,20 +137,20 @@ Tensor::recordCopyFromDeviceToStaging(
KP_LOG_DEBUG("Kompute Tensor copying data size {}.", bufferSize);
this->recordCopyBuffer(commandBuffer,
this->mPrimaryBuffer,
this->mStagingBuffer,
bufferSize,
copyRegion,
createBarrier);
this->mPrimaryBuffer,
this->mStagingBuffer,
bufferSize,
copyRegion,
createBarrier);
}
void
Tensor::recordCopyBuffer(const vk::CommandBuffer& commandBuffer,
std::shared_ptr<vk::Buffer> bufferFrom,
std::shared_ptr<vk::Buffer> bufferTo,
vk::DeviceSize bufferSize,
vk::BufferCopy copyRegion,
bool createBarrier)
std::shared_ptr<vk::Buffer> bufferFrom,
std::shared_ptr<vk::Buffer> bufferTo,
vk::DeviceSize bufferSize,
vk::BufferCopy copyRegion,
bool createBarrier)
{
commandBuffer.copyBuffer(*bufferFrom, *bufferTo, copyRegion);
@ -170,12 +166,11 @@ Tensor::recordCopyBuffer(const vk::CommandBuffer& commandBuffer,
}
void
Tensor::recordBufferMemoryBarrier(
const vk::CommandBuffer& commandBuffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask)
Tensor::recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask)
{
KP_LOG_DEBUG("Kompute Tensor recording buffer memory barrier");
@ -190,11 +185,11 @@ Tensor::recordBufferMemoryBarrier(
bufferMemoryBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
commandBuffer.pipelineBarrier(srcStageMask,
dstStageMask,
vk::DependencyFlags(),
nullptr,
bufferMemoryBarrier,
nullptr);
dstStageMask,
vk::DependencyFlags(),
nullptr,
bufferMemoryBarrier,
nullptr);
}
vk::DescriptorBufferInfo
@ -449,7 +444,7 @@ Tensor::destroy()
if (this->mFreePrimaryBuffer) {
if (!this->mPrimaryBuffer) {
KP_LOG_WARN("Kompose Tensor expected to destroy primary buffer "
"but got null buffer");
"but got null buffer");
} else {
KP_LOG_DEBUG("Kompose Tensor destroying primary buffer");
this->mDevice->destroy(
@ -463,7 +458,7 @@ Tensor::destroy()
if (this->mFreeStagingBuffer) {
if (!this->mStagingBuffer) {
KP_LOG_WARN("Kompose Tensor expected to destroy staging buffer "
"but got null buffer");
"but got null buffer");
} else {
KP_LOG_DEBUG("Kompose Tensor destroying staging buffer");
this->mDevice->destroy(
@ -477,7 +472,7 @@ Tensor::destroy()
if (this->mFreePrimaryMemory) {
if (!this->mPrimaryMemory) {
KP_LOG_WARN("Kompose Tensor expected to free primary memory but "
"got null memory");
"got null memory");
} else {
KP_LOG_DEBUG("Kompose Tensor freeing primary memory");
this->mDevice->freeMemory(
@ -491,7 +486,7 @@ Tensor::destroy()
if (this->mFreeStagingMemory) {
if (!this->mStagingMemory) {
KP_LOG_WARN("Kompose Tensor expected to free staging memory but "
"got null memory");
"got null memory");
} else {
KP_LOG_DEBUG("Kompose Tensor freeing staging memory");
this->mDevice->freeMemory(

View file

@ -12,8 +12,7 @@ namespace kp {
*/
class Algorithm
{
public:
public:
/**
* Default constructor for Algorithm
*
@ -21,12 +20,11 @@ public:
* @param commandBuffer The vulkan command buffer to bind the pipeline and
* shaders
*/
Algorithm(
std::shared_ptr<vk::Device> device,
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
const std::vector<uint32_t>& spirv = {},
const Workgroup& workgroup = {},
const Constants& specializationConstants = {});
Algorithm(std::shared_ptr<vk::Device> device,
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
const std::vector<uint32_t>& spirv = {},
const Workgroup& workgroup = {},
const Constants& specializationConstants = {});
/**
* Initialiser for the shader data provided to the algorithm as well as
@ -34,14 +32,13 @@ public:
*
* @param shaderFileData The bytes in spir-v format of the shader
* @tensorParams The Tensors to be used in the Algorithm / shader for
* @specalizationInstalces The specialization parameters to pass to the function
* processing
* @specalizationInstalces The specialization parameters to pass to the
* function processing
*/
void rebuild(
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
const std::vector<uint32_t>& spirv = {},
const Workgroup& workgroup = {},
const Constants& specializationConstants = {});
void rebuild(const std::vector<std::shared_ptr<Tensor>>& tensors = {},
const std::vector<uint32_t>& spirv = {},
const Workgroup& workgroup = {},
const Constants& specializationConstants = {});
/**
* Destructor for Algorithm which is responsible for freeing and desroying
@ -61,7 +58,8 @@ public:
void bindCore(const vk::CommandBuffer& commandBuffer);
void bindPush(const vk::CommandBuffer& commandBuffer, const Constants& pushConstants);
void bindPush(const vk::CommandBuffer& commandBuffer,
const Constants& pushConstants);
bool isInit();
@ -73,7 +71,7 @@ public:
void destroy();
private:
private:
// -------------- NEVER OWNED RESOURCES
std::shared_ptr<vk::Device> mDevice;
std::vector<std::shared_ptr<Tensor>> mTensors;

View file

@ -60,12 +60,19 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error;
#define KP_LOG_DEBUG(...)
#else
#if defined(VK_USE_PLATFORM_ANDROID_KHR)
#define KP_LOG_DEBUG(...) \
((void)__android_log_print(ANDROID_LOG_DEBUG, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
#define KP_LOG_DEBUG(...) \
((void)__android_log_print( \
ANDROID_LOG_DEBUG, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
#elif defined(KOMPUTE_BUILD_PYTHON)
#define KP_LOG_DEBUG(...) kp_debug(fmt::format(__VA_ARGS__))
#else
#define KP_LOG_DEBUG(...) fmt::print("[{} {}] [debug] [{}:{}] {}\n", __DATE__, __TIME__, __FILE__, __LINE__, fmt::format(__VA_ARGS__))
#define KP_LOG_DEBUG(...) \
fmt::print("[{} {}] [debug] [{}:{}] {}\n", \
__DATE__, \
__TIME__, \
__FILE__, \
__LINE__, \
fmt::format(__VA_ARGS__))
#endif // VK_USE_PLATFORM_ANDROID_KHR
#endif // SPDLOG_ACTIVE_LEVEL > 1
@ -73,12 +80,19 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error;
#define KP_LOG_INFO(...)
#else
#if defined(VK_USE_PLATFORM_ANDROID_KHR)
#define KP_LOG_INFO(...) \
((void)__android_log_print(ANDROID_LOG_INFO, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
#define KP_LOG_INFO(...) \
((void)__android_log_print( \
ANDROID_LOG_INFO, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
#elif defined(KOMPUTE_BUILD_PYTHON)
#define KP_LOG_INFO(...) kp_info(fmt::format(__VA_ARGS__))
#else
#define KP_LOG_INFO(...) fmt::print("[{} {}] [debug] [{}:{}] {}\n", __DATE__, __TIME__, __FILE__, __LINE__, fmt::format(__VA_ARGS__))
#define KP_LOG_INFO(...) \
fmt::print("[{} {}] [debug] [{}:{}] {}\n", \
__DATE__, \
__TIME__, \
__FILE__, \
__LINE__, \
fmt::format(__VA_ARGS__))
#endif // VK_USE_PLATFORM_ANDROID_KHR
#endif // SPDLOG_ACTIVE_LEVEL > 2
@ -86,12 +100,19 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error;
#define KP_LOG_WARN(...)
#else
#if defined(VK_USE_PLATFORM_ANDROID_KHR)
#define KP_LOG_WARN(...) \
((void)__android_log_print(ANDROID_LOG_WARN, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
#define KP_LOG_WARN(...) \
((void)__android_log_print( \
ANDROID_LOG_WARN, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
#elif defined(KOMPUTE_BUILD_PYTHON)
#define KP_LOG_WARN(...) kp_warning(fmt::format(__VA_ARGS__))
#else
#define KP_LOG_WARN(...) fmt::print("[{} {}] [debug] [{}:{}] {}\n", __DATE__, __TIME__, __FILE__, __LINE__, fmt::format(__VA_ARGS__))
#define KP_LOG_WARN(...) \
fmt::print("[{} {}] [debug] [{}:{}] {}\n", \
__DATE__, \
__TIME__, \
__FILE__, \
__LINE__, \
fmt::format(__VA_ARGS__))
#endif // VK_USE_PLATFORM_ANDROID_KHR
#endif // SPDLOG_ACTIVE_LEVEL > 3
@ -99,12 +120,19 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error;
#define KP_LOG_ERROR(...)
#else
#if defined(VK_USE_PLATFORM_ANDROID_KHR)
#define KP_LOG_ERROR(...) \
((void)__android_log_print(ANDROID_LOG_ERROR, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
#define KP_LOG_ERROR(...) \
((void)__android_log_print( \
ANDROID_LOG_ERROR, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
#elif defined(KOMPUTE_BUILD_PYTHON)
#define KP_LOG_ERROR(...) kp_error(fmt::format(__VA_ARGS__))
#else
#define KP_LOG_ERROR(...) fmt::print("[{} {}] [debug] [{}:{}] {}\n", __DATE__, __TIME__, __FILE__, __LINE__, fmt::format(__VA_ARGS__))
#define KP_LOG_ERROR(...) \
fmt::print("[{} {}] [debug] [{}:{}] {}\n", \
__DATE__, \
__TIME__, \
__FILE__, \
__LINE__, \
fmt::format(__VA_ARGS__))
#endif // VK_USE_PLATFORM_ANDROID_KHR
#endif // SPDLOG_ACTIVE_LEVEL > 4
#endif // KOMPUTE_SPDLOG_ENABLED

View file

@ -84,10 +84,10 @@ class Manager
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice);
std::shared_ptr<Algorithm> algorithm(
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
const std::vector<uint32_t>& spirv = {},
const Workgroup& workgroup = {},
const Constants& specializationConstants = {});
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
const std::vector<uint32_t>& spirv = {},
const Workgroup& workgroup = {},
const Constants& specializationConstants = {});
void destroy();
void clear();
@ -119,7 +119,8 @@ class Manager
// Create functions
void createInstance();
void createDevice(const std::vector<uint32_t>& familyQueueIndices = {}, uint32_t hysicalDeviceIndex = 0);
void createDevice(const std::vector<uint32_t>& familyQueueIndices = {},
uint32_t hysicalDeviceIndex = 0);
};
} // End namespace kp

View file

@ -9,7 +9,7 @@ namespace kp {
/**
* Container of operations that can be sent to GPU as batch
*/
class Sequence: public std::enable_shared_from_this<Sequence>
class Sequence : public std::enable_shared_from_this<Sequence>
{
public:
/**
@ -46,8 +46,9 @@ class Sequence: public std::enable_shared_from_this<Sequence>
* which allows for extensible configurations on initialisation.
*/
template<typename T, typename... TArgs>
std::shared_ptr<Sequence>
record(std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
std::shared_ptr<Sequence> record(
std::vector<std::shared_ptr<Tensor>> tensors,
TArgs&&... params)
{
KP_LOG_DEBUG("Kompute Sequence record function started");
@ -56,14 +57,13 @@ class Sequence: public std::enable_shared_from_this<Sequence>
"OpBase derived classes");
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
std::shared_ptr<T> op{
new T(tensors, std::forward<TArgs>(params)...) };
std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
return this->record(op);
}
template<typename T, typename... TArgs>
std::shared_ptr<Sequence>
record(std::shared_ptr<Algorithm> algorithm, TArgs&&... params)
std::shared_ptr<Sequence> record(std::shared_ptr<Algorithm> algorithm,
TArgs&&... params)
{
KP_LOG_DEBUG("Kompute Sequence record function started");
@ -72,8 +72,8 @@ class Sequence: public std::enable_shared_from_this<Sequence>
"OpBase derived classes");
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
std::shared_ptr<T> op{
new T(algorithm, std::forward<TArgs>(params)...) };
std::shared_ptr<T> op{ new T(algorithm,
std::forward<TArgs>(params)...) };
return this->record(op);
}
@ -96,8 +96,8 @@ class Sequence: public std::enable_shared_from_this<Sequence>
*/
// TODO: Aim to have only a single function with tensors/algorithm
template<typename T, typename... TArgs>
std::shared_ptr<Sequence>
eval(std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
std::shared_ptr<Sequence> eval(std::vector<std::shared_ptr<Tensor>> tensors,
TArgs&&... params)
{
KP_LOG_DEBUG("Kompute Sequence record function started");
@ -106,16 +106,16 @@ class Sequence: public std::enable_shared_from_this<Sequence>
"OpBase derived classes");
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
std::shared_ptr<T> op{
new T(tensors, std::forward<TArgs>(params)...) };
std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
// TODO: Aim to be able to handle errors when returning without throw except
// TODO: Aim to be able to handle errors when returning without throw
// except
return this->eval(op);
}
// Needded as otherise can't use initialiser list
template<typename T, typename... TArgs>
std::shared_ptr<Sequence>
eval(std::shared_ptr<Algorithm> algorithm, TArgs&&... params)
std::shared_ptr<Sequence> eval(std::shared_ptr<Algorithm> algorithm,
TArgs&&... params)
{
KP_LOG_DEBUG("Kompute Sequence record function started");
@ -124,8 +124,8 @@ class Sequence: public std::enable_shared_from_this<Sequence>
"OpBase derived classes");
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
std::shared_ptr<T> op{
new T(algorithm, std::forward<TArgs>(params)...) };
std::shared_ptr<T> op{ new T(algorithm,
std::forward<TArgs>(params)...) };
return this->eval(op);
}
@ -147,8 +147,9 @@ class Sequence: public std::enable_shared_from_this<Sequence>
* @return shared_ptr<Sequence> of the Sequence class itself
*/
template<typename T, typename... TArgs>
std::shared_ptr<Sequence>
evalAsync(std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
std::shared_ptr<Sequence> evalAsync(
std::vector<std::shared_ptr<Tensor>> tensors,
TArgs&&... params)
{
KP_LOG_DEBUG("Kompute Sequence record function started");
@ -157,15 +158,14 @@ class Sequence: public std::enable_shared_from_this<Sequence>
"OpBase derived classes");
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
std::shared_ptr<T> op{
new T(tensors, std::forward<TArgs>(params)...) };
std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
return this->evalAsync(op);
}
// Needed as otherwise it's not possible to use initializer lists
template<typename T, typename... TArgs>
std::shared_ptr<Sequence>
evalAsync(std::shared_ptr<Algorithm> algorithm, TArgs&&... params)
std::shared_ptr<Sequence> evalAsync(std::shared_ptr<Algorithm> algorithm,
TArgs&&... params)
{
KP_LOG_DEBUG("Kompute Sequence record function started");
@ -174,8 +174,8 @@ class Sequence: public std::enable_shared_from_this<Sequence>
"OpBase derived classes");
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
std::shared_ptr<T> op{
new T(algorithm, std::forward<TArgs>(params)...) };
std::shared_ptr<T> op{ new T(algorithm,
std::forward<TArgs>(params)...) };
return this->evalAsync(op);
}
@ -190,7 +190,8 @@ class Sequence: public std::enable_shared_from_this<Sequence>
std::shared_ptr<Sequence> evalAwait(uint64_t waitFor = UINT64_MAX);
/**
* Clear function clears all operations currently recorded and starts recording again.
* Clear function clears all operations currently recorded and starts
* recording again.
*/
void clear();
@ -217,7 +218,6 @@ class Sequence: public std::enable_shared_from_this<Sequence>
*/
bool isRecording();
bool isInit();
/**

View file

@ -4,9 +4,9 @@
#include <iostream>
#include <vector>
#include <SPIRV/GlslangToSpv.h>
#include <glslang/Include/ResourceLimits.h>
#include <glslang/Public/ShaderLang.h>
#include <SPIRV/GlslangToSpv.h>
#include "kompute/Core.hpp"
@ -16,161 +16,162 @@ namespace kp {
// Has been adobted by:
// https://github.com/KhronosGroup/glslang/blob/master/StandAlone/ResourceLimits.cpp
const TBuiltInResource defaultResource = {
/* .MaxLights = */ 0,
/* .MaxClipPlanes = */ 0,
/* .MaxTextureUnits = */ 0,
/* .MaxTextureCoords = */ 0,
/* .MaxVertexAttribs = */ 64,
/* .MaxVertexUniformComponents = */ 4096,
/* .MaxVaryingFloats = */ 64,
/* .MaxVertexTextureImageUnits = */ 0,
/* .MaxCombinedTextureImageUnits = */ 0,
/* .MaxTextureImageUnits = */ 0,
/* .MaxFragmentUniformComponents = */ 0,
/* .MaxDrawBuffers = */ 0,
/* .MaxVertexUniformVectors = */ 128,
/* .MaxVaryingVectors = */ 8,
/* .MaxFragmentUniformVectors = */ 0,
/* .MaxVertexOutputVectors = */ 16,
/* .MaxFragmentInputVectors = */ 0,
/* .MinProgramTexelOffset = */ -8,
/* .MaxProgramTexelOffset = */ 7,
/* .MaxClipDistances = */ 8,
/* .MaxComputeWorkGroupCountX = */ 65535,
/* .MaxComputeWorkGroupCountY = */ 65535,
/* .MaxComputeWorkGroupCountZ = */ 65535,
/* .MaxComputeWorkGroupSizeX = */ 1024,
/* .MaxComputeWorkGroupSizeY = */ 1024,
/* .MaxComputeWorkGroupSizeZ = */ 64,
/* .MaxComputeUniformComponents = */ 1024,
/* .MaxComputeTextureImageUnits = */ 16,
/* .MaxComputeImageUniforms = */ 8,
/* .MaxComputeAtomicCounters = */ 8,
/* .MaxComputeAtomicCounterBuffers = */ 1,
/* .MaxVaryingComponents = */ 60,
/* .MaxVertexOutputComponents = */ 64,
/* .MaxGeometryInputComponents = */ 64,
/* .MaxGeometryOutputComponents = */ 128,
/* .MaxFragmentInputComponents = */ 0,
/* .MaxImageUnits = */ 0,
/* .MaxCombinedImageUnitsAndFragmentOutputs = */ 0,
/* .MaxCombinedShaderOutputResources = */ 8,
/* .MaxImageSamples = */ 0,
/* .MaxVertexImageUniforms = */ 0,
/* .MaxTessControlImageUniforms = */ 0,
/* .MaxTessEvaluationImageUniforms = */ 0,
/* .MaxGeometryImageUniforms = */ 0,
/* .MaxFragmentImageUniforms = */ 0,
/* .MaxCombinedImageUniforms = */ 0,
/* .MaxGeometryTextureImageUnits = */ 0,
/* .MaxGeometryOutputVertices = */ 256,
/* .MaxGeometryTotalOutputComponents = */ 1024,
/* .MaxGeometryUniformComponents = */ 1024,
/* .MaxGeometryVaryingComponents = */ 64,
/* .MaxTessControlInputComponents = */ 128,
/* .MaxTessControlOutputComponents = */ 128,
/* .MaxTessControlTextureImageUnits = */ 0,
/* .MaxTessControlUniformComponents = */ 1024,
/* .MaxTessControlTotalOutputComponents = */ 4096,
/* .MaxTessEvaluationInputComponents = */ 128,
/* .MaxTessEvaluationOutputComponents = */ 128,
/* .MaxTessEvaluationTextureImageUnits = */ 16,
/* .MaxTessEvaluationUniformComponents = */ 1024,
/* .MaxTessPatchComponents = */ 120,
/* .MaxPatchVertices = */ 32,
/* .MaxTessGenLevel = */ 64,
/* .MaxViewports = */ 16,
/* .MaxVertexAtomicCounters = */ 0,
/* .MaxTessControlAtomicCounters = */ 0,
/* .MaxTessEvaluationAtomicCounters = */ 0,
/* .MaxGeometryAtomicCounters = */ 0,
/* .MaxFragmentAtomicCounters = */ 0,
/* .MaxCombinedAtomicCounters = */ 8,
/* .MaxAtomicCounterBindings = */ 1,
/* .MaxVertexAtomicCounterBuffers = */ 0,
/* .MaxTessControlAtomicCounterBuffers = */ 0,
/* .MaxTessEvaluationAtomicCounterBuffers = */ 0,
/* .MaxGeometryAtomicCounterBuffers = */ 0,
/* .MaxFragmentAtomicCounterBuffers = */ 0,
/* .MaxCombinedAtomicCounterBuffers = */ 1,
/* .MaxAtomicCounterBufferSize = */ 16384,
/* .MaxTransformFeedbackBuffers = */ 4,
/* .MaxTransformFeedbackInterleavedComponents = */ 64,
/* .MaxCullDistances = */ 8,
/* .MaxCombinedClipAndCullDistances = */ 8,
/* .MaxSamples = */ 4,
/* .maxMeshOutputVerticesNV = */ 256,
/* .maxMeshOutputPrimitivesNV = */ 512,
/* .maxMeshWorkGroupSizeX_NV = */ 32,
/* .maxMeshWorkGroupSizeY_NV = */ 1,
/* .maxMeshWorkGroupSizeZ_NV = */ 1,
/* .maxTaskWorkGroupSizeX_NV = */ 32,
/* .maxTaskWorkGroupSizeY_NV = */ 1,
/* .maxTaskWorkGroupSizeZ_NV = */ 1,
/* .maxMeshViewCountNV = */ 4,
/* .maxDualSourceDrawBuffersEXT = */ 1,
/* .MaxLights = */ 0,
/* .MaxClipPlanes = */ 0,
/* .MaxTextureUnits = */ 0,
/* .MaxTextureCoords = */ 0,
/* .MaxVertexAttribs = */ 64,
/* .MaxVertexUniformComponents = */ 4096,
/* .MaxVaryingFloats = */ 64,
/* .MaxVertexTextureImageUnits = */ 0,
/* .MaxCombinedTextureImageUnits = */ 0,
/* .MaxTextureImageUnits = */ 0,
/* .MaxFragmentUniformComponents = */ 0,
/* .MaxDrawBuffers = */ 0,
/* .MaxVertexUniformVectors = */ 128,
/* .MaxVaryingVectors = */ 8,
/* .MaxFragmentUniformVectors = */ 0,
/* .MaxVertexOutputVectors = */ 16,
/* .MaxFragmentInputVectors = */ 0,
/* .MinProgramTexelOffset = */ -8,
/* .MaxProgramTexelOffset = */ 7,
/* .MaxClipDistances = */ 8,
/* .MaxComputeWorkGroupCountX = */ 65535,
/* .MaxComputeWorkGroupCountY = */ 65535,
/* .MaxComputeWorkGroupCountZ = */ 65535,
/* .MaxComputeWorkGroupSizeX = */ 1024,
/* .MaxComputeWorkGroupSizeY = */ 1024,
/* .MaxComputeWorkGroupSizeZ = */ 64,
/* .MaxComputeUniformComponents = */ 1024,
/* .MaxComputeTextureImageUnits = */ 16,
/* .MaxComputeImageUniforms = */ 8,
/* .MaxComputeAtomicCounters = */ 8,
/* .MaxComputeAtomicCounterBuffers = */ 1,
/* .MaxVaryingComponents = */ 60,
/* .MaxVertexOutputComponents = */ 64,
/* .MaxGeometryInputComponents = */ 64,
/* .MaxGeometryOutputComponents = */ 128,
/* .MaxFragmentInputComponents = */ 0,
/* .MaxImageUnits = */ 0,
/* .MaxCombinedImageUnitsAndFragmentOutputs = */ 0,
/* .MaxCombinedShaderOutputResources = */ 8,
/* .MaxImageSamples = */ 0,
/* .MaxVertexImageUniforms = */ 0,
/* .MaxTessControlImageUniforms = */ 0,
/* .MaxTessEvaluationImageUniforms = */ 0,
/* .MaxGeometryImageUniforms = */ 0,
/* .MaxFragmentImageUniforms = */ 0,
/* .MaxCombinedImageUniforms = */ 0,
/* .MaxGeometryTextureImageUnits = */ 0,
/* .MaxGeometryOutputVertices = */ 256,
/* .MaxGeometryTotalOutputComponents = */ 1024,
/* .MaxGeometryUniformComponents = */ 1024,
/* .MaxGeometryVaryingComponents = */ 64,
/* .MaxTessControlInputComponents = */ 128,
/* .MaxTessControlOutputComponents = */ 128,
/* .MaxTessControlTextureImageUnits = */ 0,
/* .MaxTessControlUniformComponents = */ 1024,
/* .MaxTessControlTotalOutputComponents = */ 4096,
/* .MaxTessEvaluationInputComponents = */ 128,
/* .MaxTessEvaluationOutputComponents = */ 128,
/* .MaxTessEvaluationTextureImageUnits = */ 16,
/* .MaxTessEvaluationUniformComponents = */ 1024,
/* .MaxTessPatchComponents = */ 120,
/* .MaxPatchVertices = */ 32,
/* .MaxTessGenLevel = */ 64,
/* .MaxViewports = */ 16,
/* .MaxVertexAtomicCounters = */ 0,
/* .MaxTessControlAtomicCounters = */ 0,
/* .MaxTessEvaluationAtomicCounters = */ 0,
/* .MaxGeometryAtomicCounters = */ 0,
/* .MaxFragmentAtomicCounters = */ 0,
/* .MaxCombinedAtomicCounters = */ 8,
/* .MaxAtomicCounterBindings = */ 1,
/* .MaxVertexAtomicCounterBuffers = */ 0,
/* .MaxTessControlAtomicCounterBuffers = */ 0,
/* .MaxTessEvaluationAtomicCounterBuffers = */ 0,
/* .MaxGeometryAtomicCounterBuffers = */ 0,
/* .MaxFragmentAtomicCounterBuffers = */ 0,
/* .MaxCombinedAtomicCounterBuffers = */ 1,
/* .MaxAtomicCounterBufferSize = */ 16384,
/* .MaxTransformFeedbackBuffers = */ 4,
/* .MaxTransformFeedbackInterleavedComponents = */ 64,
/* .MaxCullDistances = */ 8,
/* .MaxCombinedClipAndCullDistances = */ 8,
/* .MaxSamples = */ 4,
/* .maxMeshOutputVerticesNV = */ 256,
/* .maxMeshOutputPrimitivesNV = */ 512,
/* .maxMeshWorkGroupSizeX_NV = */ 32,
/* .maxMeshWorkGroupSizeY_NV = */ 1,
/* .maxMeshWorkGroupSizeZ_NV = */ 1,
/* .maxTaskWorkGroupSizeX_NV = */ 32,
/* .maxTaskWorkGroupSizeY_NV = */ 1,
/* .maxTaskWorkGroupSizeZ_NV = */ 1,
/* .maxMeshViewCountNV = */ 4,
/* .maxDualSourceDrawBuffersEXT = */ 1,
/* .limits = */
{
/* .nonInductiveForLoops = */ 1,
/* .whileLoops = */ 1,
/* .doWhileLoops = */ 1,
/* .generalUniformIndexing = */ 1,
/* .generalAttributeMatrixVectorIndexing = */ 1,
/* .generalVaryingIndexing = */ 1,
/* .generalSamplerIndexing = */ 1,
/* .generalVariableIndexing = */ 1,
/* .generalConstantMatrixVectorIndexing = */ 1,
}
};
/* .limits = */ {
/* .nonInductiveForLoops = */ 1,
/* .whileLoops = */ 1,
/* .doWhileLoops = */ 1,
/* .generalUniformIndexing = */ 1,
/* .generalAttributeMatrixVectorIndexing = */ 1,
/* .generalVaryingIndexing = */ 1,
/* .generalSamplerIndexing = */ 1,
/* .generalVariableIndexing = */ 1,
/* .generalConstantMatrixVectorIndexing = */ 1,
}};
/**
Shader utily class with functions to compile and process glsl files.
*/
class Shader {
public:
class Shader
{
public:
/**
* Compile multiple sources with optional filenames. Currently this function
* uses the glslang C++ interface which is not thread safe so this funciton
* should not be called from multiple threads concurrently. If you have a
* online shader processing multithreading use-case that can't use offline
* online shader processing multithreading use-case that can't use offline
* compilation please open an issue.
*
* @param sources A list of raw glsl shaders in string format
* @param files A list of file names respective to each of the sources
* @param entryPoint The function name to use as entry point
* @param definitions List of pairs containing key value definitions
* @param resourcesLimit A list that contains the resource limits for the GLSL compiler
* @param resourcesLimit A list that contains the resource limits for the
* GLSL compiler
* @return The compiled SPIR-V binary in unsigned int32 format
*/
static std::vector<uint32_t> compile_sources(
const std::vector<std::string>& sources,
const std::vector<std::string>& files = {},
const std::string& entryPoint = "main",
std::vector<std::pair<std::string,std::string>> definitions = {},
const TBuiltInResource& resources = defaultResource);
const std::vector<std::string>& sources,
const std::vector<std::string>& files = {},
const std::string& entryPoint = "main",
std::vector<std::pair<std::string, std::string>> definitions = {},
const TBuiltInResource& resources = defaultResource);
/**
* Compile a single glslang source from string value. Currently this function
* uses the glslang C++ interface which is not thread safe so this funciton
* should not be called from multiple threads concurrently. If you have a
* online shader processing multithreading use-case that can't use offline
* compilation please open an issue.
* Compile a single glslang source from string value. Currently this
* function uses the glslang C++ interface which is not thread safe so this
* funciton should not be called from multiple threads concurrently. If you
* have a online shader processing multithreading use-case that can't use
* offline compilation please open an issue.
*
* @param source An individual raw glsl shader in string format
* @param entryPoint The function name to use as entry point
* @param definitions List of pairs containing key value definitions
* @param resourcesLimit A list that contains the resource limits for the GLSL compiler
* @param resourcesLimit A list that contains the resource limits for the
* GLSL compiler
* @return The compiled SPIR-V binary in unsigned int32 format
*/
static std::vector<uint32_t> compile_source(
const std::string& source,
const std::string& entryPoint = "main",
std::vector<std::pair<std::string,std::string>> definitions = {},
const TBuiltInResource& resources = defaultResource);
const std::string& source,
const std::string& entryPoint = "main",
std::vector<std::pair<std::string, std::string>> definitions = {},
const TBuiltInResource& resources = defaultResource);
};
}
#endif // DKOMPUTE_DISABLE_SHADER_UTILS

View file

@ -54,7 +54,7 @@ class Tensor
* otherwise there is no need to copy from host memory.
*/
void rebuild(const std::vector<float>& data,
TensorTypes tensorType = TensorTypes::eDevice);
TensorTypes tensorType = TensorTypes::eDevice);
/**
* Destroys and frees the GPU resources which include the buffer and memory.
@ -125,9 +125,8 @@ class Tensor
* @param createBarrier Whether to create a barrier that ensures the data is
* copied before further operations. Default is true.
*/
void recordCopyFromStagingToDevice(
const vk::CommandBuffer& commandBuffer,
bool createBarrier);
void recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer,
bool createBarrier);
/**
* Records a copy from the internal device memory to the staging memory
@ -138,9 +137,8 @@ class Tensor
* @param createBarrier Whether to create a barrier that ensures the data is
* copied before further operations. Default is true.
*/
void recordCopyFromDeviceToStaging(
const vk::CommandBuffer& commandBuffer,
bool createBarrier);
void recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer,
bool createBarrier);
/**
* Records the buffer memory barrier into the command buffer which
@ -152,12 +150,11 @@ class Tensor
* @param scrStageMask Pipeline stage flags for source stage mask
* @param dstStageMask Pipeline stage flags for destination stage mask
*/
void recordBufferMemoryBarrier(
const vk::CommandBuffer& commandBuffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask);
void recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask);
/**
* Constructs a vulkan descriptor buffer info which can be used to specify
@ -205,11 +202,11 @@ class Tensor
std::shared_ptr<vk::DeviceMemory> memory,
vk::MemoryPropertyFlags memoryPropertyFlags);
void recordCopyBuffer(const vk::CommandBuffer& commandBuffer,
std::shared_ptr<vk::Buffer> bufferFrom,
std::shared_ptr<vk::Buffer> bufferTo,
vk::DeviceSize bufferSize,
vk::BufferCopy copyRegion,
bool createBarrier);
std::shared_ptr<vk::Buffer> bufferFrom,
std::shared_ptr<vk::Buffer> bufferTo,
vk::DeviceSize bufferSize,
vk::BufferCopy copyRegion,
bool createBarrier);
// Private util functions
vk::BufferUsageFlags getPrimaryBufferUsageFlags();

View file

@ -84,7 +84,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
for (uint32_t i = 0; i < numParallel; i++) {
inputsAsyncB.push_back(mgr.tensor(data));
algosAsync.push_back(mgr.algorithm({inputsAsyncB[i]}, spirv));
algosAsync.push_back(mgr.algorithm({ inputsAsyncB[i] }, spirv));
}
std::vector<std::shared_ptr<kp::Sequence>> sqs;
@ -160,8 +160,8 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution)
sq1->eval<kp::OpTensorSyncLocal>({ tensorA, tensorB });
std::shared_ptr<kp::Algorithm> algo1 = mgr.algorithm({tensorA}, spirv);
std::shared_ptr<kp::Algorithm> algo2 = mgr.algorithm({tensorB}, spirv);
std::shared_ptr<kp::Algorithm> algo1 = mgr.algorithm({ tensorA }, spirv);
std::shared_ptr<kp::Algorithm> algo2 = mgr.algorithm({ tensorB }, spirv);
sq1->evalAsync<kp::OpAlgoDispatch>(algo1);
sq2->evalAsync<kp::OpAlgoDispatch>(algo2);

View file

@ -27,12 +27,12 @@ TEST(TestDestroy, TestDestroyTensorSingle)
tensorA = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::Algorithm> algo =
mgr.algorithm({ tensorA }, spirv);
mgr.algorithm({ tensorA }, spirv);
mgr.sequence()
->record<kp::OpAlgoDispatch>(algo)
->eval()
->eval<kp::OpTensorSyncLocal>(algo->getTensors());
->record<kp::OpAlgoDispatch>(algo)
->eval()
->eval<kp::OpTensorSyncLocal>(algo->getTensors());
tensorA->destroy();
EXPECT_FALSE(tensorA->isInit());
@ -68,14 +68,14 @@ TEST(TestDestroy, TestDestroyTensorVector)
tensorA = mgr.tensor({ 1, 1, 1 });
tensorB = mgr.tensor({ 1, 1, 1 });
std::shared_ptr<kp::Algorithm> algo =
mgr.algorithm({tensorA, tensorB}, spirv);
std::shared_ptr<kp::Algorithm> algo =
mgr.algorithm({ tensorA, tensorB }, spirv);
mgr.sequence()
->record<kp::OpTensorSyncDevice>(algo->getTensors())
->record<kp::OpAlgoDispatch>(algo)
->record<kp::OpTensorSyncLocal>(algo->getTensors())
->eval();
->record<kp::OpTensorSyncDevice>(algo->getTensors())
->record<kp::OpAlgoDispatch>(algo)
->record<kp::OpTensorSyncLocal>(algo->getTensors())
->eval();
tensorA->destroy();
tensorB->destroy();
@ -109,12 +109,13 @@ TEST(TestDestroy, TestDestroySequenceSingle)
{
kp::Manager mgr;
tensorA = mgr.tensor({0, 0, 0});
tensorA = mgr.tensor({ 0, 0, 0 });
sq = mgr.sequence()
->record<kp::OpTensorSyncDevice>({tensorA})
->record<kp::OpAlgoDispatch>(mgr.algorithm({tensorA}, spirv))
->record<kp::OpTensorSyncLocal>({tensorA})
sq =
mgr.sequence()
->record<kp::OpTensorSyncDevice>({ tensorA })
->record<kp::OpAlgoDispatch>(mgr.algorithm({ tensorA }, spirv))
->record<kp::OpTensorSyncLocal>({ tensorA })
->eval();
sq->destroy();
@ -124,4 +125,3 @@ TEST(TestDestroy, TestDestroySequenceSingle)
}
EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
}

View file

@ -29,24 +29,27 @@ TEST(TestLogisticRegression, TestMainLogisticRegression)
std::shared_ptr<kp::Tensor> lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
std::vector<std::shared_ptr<kp::Tensor>> params = { xI, xJ, y,
wIn, wOutI, wOutJ,
bIn, bOut, lOut };
wIn, wOutI, wOutJ,
bIn, bOut, lOut };
mgr.sequence()->eval<kp::OpTensorSyncDevice>(params);
std::vector<uint32_t> spirv = std::vector<uint32_t>(
(uint32_t*)kp::shader_data::test_shaders_glsl_test_logistic_regression_comp_spv,
(uint32_t*)(kp::shader_data::test_shaders_glsl_test_logistic_regression_comp_spv +
kp::shader_data::test_shaders_glsl_test_logistic_regression_comp_spv_len));
(uint32_t*)kp::shader_data::
test_shaders_glsl_test_logistic_regression_comp_spv,
(uint32_t*)(kp::shader_data::
test_shaders_glsl_test_logistic_regression_comp_spv +
kp::shader_data::
test_shaders_glsl_test_logistic_regression_comp_spv_len));
std::shared_ptr<kp::Algorithm> algorithm =
mgr.algorithm(params, spirv, kp::Workgroup({5}), kp::Constants({5.0}));
std::shared_ptr<kp::Algorithm> algorithm = mgr.algorithm(
params, spirv, kp::Workgroup({ 5 }), kp::Constants({ 5.0 }));
std::shared_ptr<kp::Sequence> sq =
mgr.sequence()
->record<kp::OpTensorSyncDevice>({ wIn, bIn })
->record<kp::OpAlgoDispatch>(algorithm)
->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
mgr.sequence()
->record<kp::OpTensorSyncDevice>({ wIn, bIn })
->record<kp::OpAlgoDispatch>(algorithm)
->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
// Iterate across all expected iterations
for (size_t i = 0; i < ITERATIONS; i++) {
@ -90,37 +93,38 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy)
std::shared_ptr<kp::Tensor> y = mgr.tensor({ 0, 0, 0, 1, 1 });
std::shared_ptr<kp::Tensor> wIn = mgr.tensor(
{ 0.001, 0.001 }, kp::Tensor::TensorTypes::eHost);
std::shared_ptr<kp::Tensor> wIn =
mgr.tensor({ 0.001, 0.001 }, kp::Tensor::TensorTypes::eHost);
std::shared_ptr<kp::Tensor> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::Tensor> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::Tensor> bIn = mgr.tensor(
{ 0 },
kp::Tensor::TensorTypes::eHost);
std::shared_ptr<kp::Tensor> bIn =
mgr.tensor({ 0 }, kp::Tensor::TensorTypes::eHost);
std::shared_ptr<kp::Tensor> bOut = mgr.tensor({ 0, 0, 0, 0, 0 });
std::shared_ptr<kp::Tensor> lOut = mgr.tensor({ 0, 0, 0, 0, 0 });
std::vector<std::shared_ptr<kp::Tensor>> params = { xI, xJ, y,
wIn, wOutI, wOutJ,
bIn, bOut, lOut };
wIn, wOutI, wOutJ,
bIn, bOut, lOut };
mgr.sequence()->record<kp::OpTensorSyncDevice>(params)->eval();
std::vector<uint32_t> spirv = std::vector<uint32_t>(
(uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
(uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv +
kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));
(uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
(uint32_t*)(kp::shader_data::
shaders_glsl_logisticregression_comp_spv +
kp::shader_data::
shaders_glsl_logisticregression_comp_spv_len));
std::shared_ptr<kp::Algorithm> algorithm =
mgr.algorithm(params, spirv, kp::Workgroup(), kp::Constants({5.0}));
mgr.algorithm(params, spirv, kp::Workgroup(), kp::Constants({ 5.0 }));
std::shared_ptr<kp::Sequence> sq =
mgr.sequence()
->record<kp::OpTensorSyncDevice>({ wIn, bIn })
->record<kp::OpAlgoDispatch>(algorithm)
->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
mgr.sequence()
->record<kp::OpTensorSyncDevice>({ wIn, bIn })
->record<kp::OpAlgoDispatch>(algorithm)
->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
// Iterate across all expected iterations
for (size_t i = 0; i < ITERATIONS; i++) {
@ -136,18 +140,18 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy)
bIn->mapDataIntoHostMemory();
}
// Based on the inputs the outputs should be at least:
// * wi < 0.01
// * wj > 1.0
// * b < 0
// TODO: Add EXPECT_DOUBLE_EQ instead
EXPECT_LT(wIn->data()[0], 0.01);
EXPECT_GT(wIn->data()[1], 1.0);
EXPECT_LT(bIn->data()[0], 0.0);
// Based on the inputs the outputs should be at least:
// * wi < 0.01
// * wj > 1.0
// * b < 0
// TODO: Add EXPECT_DOUBLE_EQ instead
EXPECT_LT(wIn->data()[0], 0.01);
EXPECT_GT(wIn->data()[1], 1.0);
EXPECT_LT(bIn->data()[0], 0.0);
KP_LOG_WARN("Result wIn i: {}, wIn j: {}, bIn: {}",
wIn->data()[0],
wIn->data()[1],
bIn->data()[0]);
KP_LOG_WARN("Result wIn i: {}, wIn j: {}, bIn: {}",
wIn->data()[0],
wIn->data()[1],
bIn->data()[0]);
}
}

View file

@ -11,13 +11,14 @@ TEST(TestManager, EndToEndOpMultEvalFlow)
std::shared_ptr<kp::Tensor> tensorRHS = mgr.tensor({ 2, 4, 6 });
std::shared_ptr<kp::Tensor> tensorOutput = mgr.tensor({ 0, 0, 0 });
std::vector<std::shared_ptr<kp::Tensor>> params =
{ tensorLHS, tensorRHS, tensorOutput };
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorLHS,
tensorRHS,
tensorOutput };
mgr.sequence()
->eval<kp::OpTensorSyncDevice>(params)
->eval<kp::OpMult>(params, mgr.algorithm())
->eval<kp::OpTensorSyncLocal>(params);
->eval<kp::OpTensorSyncDevice>(params)
->eval<kp::OpMult>(params, mgr.algorithm())
->eval<kp::OpTensorSyncLocal>(params);
EXPECT_EQ(tensorOutput->data(), std::vector<float>({ 0, 4, 12 }));
}
@ -30,14 +31,15 @@ TEST(TestManager, EndToEndOpMultSeqFlow)
std::shared_ptr<kp::Tensor> tensorRHS = mgr.tensor({ 2, 4, 6 });
std::shared_ptr<kp::Tensor> tensorOutput = mgr.tensor({ 0, 0, 0 });
std::vector<std::shared_ptr<kp::Tensor>> params =
{ tensorLHS, tensorRHS, tensorOutput };
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorLHS,
tensorRHS,
tensorOutput };
mgr.sequence()
->record<kp::OpTensorSyncDevice>(params)
->record<kp::OpMult>(params, mgr.algorithm())
->record<kp::OpTensorSyncLocal>(params)
->eval();
->record<kp::OpTensorSyncDevice>(params)
->record<kp::OpMult>(params, mgr.algorithm())
->record<kp::OpTensorSyncLocal>(params)
->eval();
EXPECT_EQ(tensorOutput->data(), std::vector<float>({ 0, 4, 12 }));
}
@ -50,8 +52,9 @@ TEST(TestManager, TestMultipleSequences)
std::shared_ptr<kp::Tensor> tensorRHS = mgr.tensor({ 2, 4, 6 });
std::shared_ptr<kp::Tensor> tensorOutput = mgr.tensor({ 0, 0, 0 });
std::vector<std::shared_ptr<kp::Tensor>> params =
{ tensorLHS, tensorRHS, tensorOutput };
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorLHS,
tensorRHS,
tensorOutput };
mgr.sequence()->eval<kp::OpTensorSyncDevice>(params);
mgr.sequence()->eval<kp::OpMult>(params, mgr.algorithm());
@ -59,4 +62,3 @@ TEST(TestManager, TestMultipleSequences)
EXPECT_EQ(tensorOutput->data(), std::vector<float>({ 0, 4, 12 }));
}

View file

@ -3,9 +3,10 @@
#include "kompute/Kompute.hpp"
TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality) {
TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality)
{
kp::Manager mgr;
kp::Manager mgr;
auto tensorInA = mgr.tensor({ 2., 2., 2. });
auto tensorInB = mgr.tensor({ 1., 2., 3. });
@ -38,21 +39,24 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality) {
}
)");
std::vector<std::shared_ptr<kp::Tensor>> params = {tensorInA, tensorInB, tensorOutA, tensorOutB};
std::vector<std::shared_ptr<kp::Tensor>> params = {
tensorInA, tensorInB, tensorOutA, tensorOutB
};
kp::Workgroup workgroup({3, 1, 1});
kp::Workgroup workgroup({ 3, 1, 1 });
kp::Constants specConsts({ 2 });
kp::Constants pushConstsA({ 2.0 });
kp::Constants pushConstsB({ 3.0 });
auto algorithm = mgr.algorithm(params, kp::Shader::compile_source(shader), workgroup, specConsts);
auto algorithm = mgr.algorithm(
params, kp::Shader::compile_source(shader), workgroup, specConsts);
// 3. Run operation with string shader synchronously
mgr.sequence()
->record<kp::OpTensorSyncDevice>(params)
->record<kp::OpAlgoDispatch>(algorithm, pushConstsA)
->record<kp::OpAlgoDispatch>(algorithm, pushConstsB)
->eval();
->record<kp::OpTensorSyncDevice>(params)
->record<kp::OpAlgoDispatch>(algorithm, pushConstsA)
->record<kp::OpAlgoDispatch>(algorithm, pushConstsB)
->eval();
auto sq = mgr.sequence();
sq->evalAsync<kp::OpTensorSyncLocal>(params);
@ -83,12 +87,12 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord)
{
mgr.sequence()
->record<kp::OpTensorSyncDevice>({ tensorA })
->record<kp::OpAlgoDispatch>(mgr.algorithm({tensorA}, spirv))
->record<kp::OpAlgoDispatch>(mgr.algorithm({tensorA}, spirv))
->record<kp::OpAlgoDispatch>(mgr.algorithm({tensorA}, spirv))
->record<kp::OpTensorSyncLocal>({ tensorA })
->eval();
->record<kp::OpTensorSyncDevice>({ tensorA })
->record<kp::OpAlgoDispatch>(mgr.algorithm({ tensorA }, spirv))
->record<kp::OpAlgoDispatch>(mgr.algorithm({ tensorA }, spirv))
->record<kp::OpAlgoDispatch>(mgr.algorithm({ tensorA }, spirv))
->record<kp::OpTensorSyncLocal>({ tensorA })
->eval();
}
EXPECT_EQ(tensorA->data(), std::vector<float>({ 3, 3, 3 }));
@ -111,29 +115,20 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords)
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
std::shared_ptr<kp::Algorithm> algorithm = mgr.algorithm({tensorA}, spirv);
std::shared_ptr<kp::Algorithm> algorithm =
mgr.algorithm({ tensorA }, spirv);
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
mgr.sequence()
->record<kp::OpTensorSyncDevice>({ tensorA })
->eval();
mgr.sequence()->record<kp::OpTensorSyncDevice>({ tensorA })->eval();
mgr.sequence()
->record<kp::OpAlgoDispatch>(algorithm)
->eval();
mgr.sequence()->record<kp::OpAlgoDispatch>(algorithm)->eval();
mgr.sequence()
->record<kp::OpAlgoDispatch>(algorithm)
->eval();
mgr.sequence()->record<kp::OpAlgoDispatch>(algorithm)->eval();
mgr.sequence()
->record<kp::OpAlgoDispatch>(algorithm)
->eval();
mgr.sequence()->record<kp::OpAlgoDispatch>(algorithm)->eval();
mgr.sequence()
->record<kp::OpTensorSyncLocal>({ tensorA })
->eval();
mgr.sequence()->record<kp::OpTensorSyncLocal>({ tensorA })->eval();
EXPECT_EQ(tensorA->data(), std::vector<float>({ 3, 3, 3 }));
}
@ -156,23 +151,20 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences)
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
std::shared_ptr<kp::Algorithm> algorithm = mgr.algorithm({tensorA}, spirv);
std::shared_ptr<kp::Algorithm> algorithm =
mgr.algorithm({ tensorA }, spirv);
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
sq->record<kp::OpTensorSyncDevice>({ tensorA })->eval();
sq->record<kp::OpAlgoDispatch>(algorithm)
->eval();
sq->record<kp::OpAlgoDispatch>(algorithm)->eval();
sq->record<kp::OpAlgoDispatch>(algorithm)
->eval();
sq->record<kp::OpAlgoDispatch>(algorithm)->eval();
sq->record<kp::OpAlgoDispatch>(algorithm)
->eval();
sq->record<kp::OpAlgoDispatch>(algorithm)->eval();
sq->record<kp::OpTensorSyncLocal>({ tensorA })
->eval();
sq->record<kp::OpTensorSyncLocal>({ tensorA })->eval();
EXPECT_EQ(tensorA->data(), std::vector<float>({ 3, 3, 3 }));
}
@ -194,24 +186,20 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
std::shared_ptr<kp::Algorithm> algorithm = mgr.algorithm({tensorA}, spirv);
std::shared_ptr<kp::Algorithm> algorithm =
mgr.algorithm({ tensorA }, spirv);
std::shared_ptr<kp::Sequence> sq = mgr.sequence();
sq->record<kp::OpTensorSyncDevice>({ tensorA })->eval();
sq->record<kp::OpAlgoDispatch>(algorithm)
->eval()
->eval()
->eval();
sq->record<kp::OpAlgoDispatch>(algorithm)->eval()->eval()->eval();
sq->record<kp::OpTensorSyncLocal>({ tensorA })
->eval();
sq->record<kp::OpTensorSyncLocal>({ tensorA })->eval();
EXPECT_EQ(tensorA->data(), std::vector<float>({ 3, 3, 3 }));
}
TEST(TestMultipleAlgoExecutions, SequenceAlgoDestroyOutsideManagerScope)
{
std::shared_ptr<kp::Tensor> tensorA = nullptr;
@ -234,22 +222,18 @@ TEST(TestMultipleAlgoExecutions, SequenceAlgoDestroyOutsideManagerScope)
std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
std::shared_ptr<kp::Algorithm> algorithm = mgr.algorithm({tensorA}, spirv);
std::shared_ptr<kp::Algorithm> algorithm =
mgr.algorithm({ tensorA }, spirv);
sq = mgr.sequence();
sq->record<kp::OpTensorSyncDevice>({ tensorA })->eval();
sq->record<kp::OpAlgoDispatch>(algorithm)
->eval()
->eval()
->eval();
sq->record<kp::OpAlgoDispatch>(algorithm)->eval()->eval()->eval();
sq->record<kp::OpTensorSyncLocal>({ tensorA })
->eval();
sq->record<kp::OpTensorSyncLocal>({ tensorA })->eval();
}
}
EXPECT_EQ(tensorA->data(), std::vector<float>({ 3, 3, 3 }));
}

View file

@ -32,10 +32,9 @@ TEST(TestOpAlgoCreate, ShaderRawDataFromConstructor)
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorA, tensorB };
mgr.sequence()
->eval<kp::OpTensorSyncDevice>(params)
->eval<kp::OpAlgoDispatch>(mgr.algorithm(params, spirv))
->eval<kp::OpTensorSyncLocal>(params);
->eval<kp::OpTensorSyncDevice>(params)
->eval<kp::OpAlgoDispatch>(mgr.algorithm(params, spirv))
->eval<kp::OpTensorSyncLocal>(params);
EXPECT_EQ(tensorA->data(), std::vector<float>({ 0, 1, 2 }));
EXPECT_EQ(tensorB->data(), std::vector<float>({ 3, 4, 5 }));
@ -48,27 +47,27 @@ TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor)
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 3, 4, 5 });
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor({ 0, 0, 0 });
std::vector<uint32_t> spirv =
std::vector<uint32_t>(
(uint32_t*)kp::shader_data::test_shaders_glsl_test_op_custom_shader_comp_spv,
(uint32_t*)(kp::shader_data::test_shaders_glsl_test_op_custom_shader_comp_spv +
kp::shader_data::
test_shaders_glsl_test_op_custom_shader_comp_spv_len));
std::vector<uint32_t> spirv = std::vector<uint32_t>(
(uint32_t*)
kp::shader_data::test_shaders_glsl_test_op_custom_shader_comp_spv,
(uint32_t*)(kp::shader_data::
test_shaders_glsl_test_op_custom_shader_comp_spv +
kp::shader_data::
test_shaders_glsl_test_op_custom_shader_comp_spv_len));
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorA, tensorB };
mgr.sequence()
->eval<kp::OpTensorSyncDevice>(params)
->eval<kp::OpAlgoDispatch>(mgr.algorithm(params, spirv))
->eval<kp::OpTensorSyncLocal>(params);
->eval<kp::OpTensorSyncDevice>(params)
->eval<kp::OpAlgoDispatch>(mgr.algorithm(params, spirv))
->eval<kp::OpTensorSyncLocal>(params);
EXPECT_EQ(tensorA->data(), std::vector<float>({ 0, 1, 2 }));
EXPECT_EQ(tensorB->data(), std::vector<float>({ 3, 4, 5 }));
}
// TODO: Add support to read from file for shader
//TEST(TestOpAlgoCreate, ShaderCompiledDataFromFile)
// TEST(TestOpAlgoCreate, ShaderCompiledDataFromFile)
//{
// kp::Manager mgr;
//
@ -77,7 +76,8 @@ TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor)
// mgr.rebuild({ tensorA, tensorB });
//
// mgr.evalOpDefault<kp::OpAlgoCreate>(
// { tensorA, tensorB }, "test/shaders/glsl/test_op_custom_shader.comp.spv");
// { tensorA, tensorB },
// "test/shaders/glsl/test_op_custom_shader.comp.spv");
//
// mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA, tensorB });
//

View file

@ -18,9 +18,9 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensor)
EXPECT_TRUE(tensorB->isInit());
mgr.sequence()
->eval<kp::OpTensorSyncDevice>({ tensorA, tensorB })
->eval<kp::OpTensorCopy>({ tensorA, tensorB })
->eval<kp::OpTensorSyncLocal>({ tensorA, tensorB });
->eval<kp::OpTensorSyncDevice>({ tensorA, tensorB })
->eval<kp::OpTensorCopy>({ tensorA, tensorB })
->eval<kp::OpTensorSyncLocal>({ tensorA, tensorB });
// Making sure the GPU holds the same data
EXPECT_EQ(tensorA->data(), tensorB->data());
@ -44,15 +44,14 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti)
EXPECT_TRUE(tensorC->isInit());
mgr.sequence()
->eval<kp::OpTensorSyncLocal>({tensorA, tensorB, tensorC})
->eval<kp::OpTensorCopy>({tensorA, tensorB, tensorC });
->eval<kp::OpTensorSyncLocal>({ tensorA, tensorB, tensorC })
->eval<kp::OpTensorCopy>({ tensorA, tensorB, tensorC });
EXPECT_EQ(tensorA->data(), tensorB->data());
EXPECT_EQ(tensorA->data(), tensorC->data());
// Making sure the GPU holds the same data
mgr.sequence()
->eval<kp::OpTensorSyncLocal>({ tensorB, tensorC });
mgr.sequence()->eval<kp::OpTensorSyncLocal>({ tensorB, tensorC });
EXPECT_EQ(tensorA->data(), tensorB->data());
EXPECT_EQ(tensorA->data(), tensorC->data());
@ -67,8 +66,8 @@ TEST(TestOpTensorCopy, CopyDeviceToHostTensor)
std::vector<float> testVecB{ 0, 0, 0 };
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(testVecA);
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor(
testVecB, kp::Tensor::TensorTypes::eHost);
std::shared_ptr<kp::Tensor> tensorB =
mgr.tensor(testVecB, kp::Tensor::TensorTypes::eHost);
// Only calling sync on device type tensor
mgr.sequence()->eval<kp::OpTensorSyncDevice>({ tensorA });
@ -93,8 +92,8 @@ TEST(TestOpTensorCopy, CopyHostToDeviceTensor)
std::vector<float> testVecA{ 4, 5, 6 };
std::vector<float> testVecB{ 0, 0, 0 };
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(
testVecA, kp::Tensor::TensorTypes::eHost);
std::shared_ptr<kp::Tensor> tensorA =
mgr.tensor(testVecA, kp::Tensor::TensorTypes::eHost);
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor(testVecB);
// Only calling sync on device type tensor
@ -120,17 +119,17 @@ TEST(TestOpTensorCopy, CopyHostToHostTensor)
std::vector<float> testVecA{ 5, 6, 7 };
std::vector<float> testVecB{ 0, 0, 0 };
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(
testVecA, kp::Tensor::TensorTypes::eHost);
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor(
testVecB, kp::Tensor::TensorTypes::eHost);
std::shared_ptr<kp::Tensor> tensorA =
mgr.tensor(testVecA, kp::Tensor::TensorTypes::eHost);
std::shared_ptr<kp::Tensor> tensorB =
mgr.tensor(testVecB, kp::Tensor::TensorTypes::eHost);
EXPECT_TRUE(tensorA->isInit());
EXPECT_TRUE(tensorB->isInit());
mgr.sequence()
->eval<kp::OpTensorSyncDevice>({ tensorA })
->eval<kp::OpTensorCopy>({ tensorA, tensorB });
->eval<kp::OpTensorSyncDevice>({ tensorA })
->eval<kp::OpTensorCopy>({ tensorA, tensorB });
EXPECT_EQ(tensorA->data(), tensorB->data());
@ -146,8 +145,8 @@ TEST(TestOpTensorCopy, SingleTensorShouldFail)
std::vector<float> testVecA{ 6, 7, 8 };
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor(
testVecA, kp::Tensor::TensorTypes::eHost);
std::shared_ptr<kp::Tensor> tensorA =
mgr.tensor(testVecA, kp::Tensor::TensorTypes::eHost);
EXPECT_TRUE(tensorA->isInit());

View file

@ -32,16 +32,18 @@ TEST(TestPushConstants, TestTwoConstants)
std::shared_ptr<kp::Tensor> tensor = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::Algorithm> algo =
mgr.algorithm({tensor}, spirv, kp::Workgroup({1}));
mgr.algorithm({ tensor }, spirv, kp::Workgroup({ 1 }));
sq = mgr.sequence()
->record<kp::OpTensorSyncDevice>({tensor})
->record<kp::OpAlgoDispatch>(algo, kp::Constants{0.1, 0.2, 0.3})
->record<kp::OpAlgoDispatch>(algo, kp::Constants{0.3, 0.2, 0.1})
->record<kp::OpTensorSyncLocal>({tensor})
->eval();
->record<kp::OpTensorSyncDevice>({ tensor })
->record<kp::OpAlgoDispatch>(algo,
kp::Constants{ 0.1, 0.2, 0.3 })
->record<kp::OpAlgoDispatch>(algo,
kp::Constants{ 0.3, 0.2, 0.1 })
->record<kp::OpTensorSyncLocal>({ tensor })
->eval();
EXPECT_EQ(tensor->data(), kp::Constants({0.4, 0.4, 0.4}));
EXPECT_EQ(tensor->data(), kp::Constants({ 0.4, 0.4, 0.4 }));
}
}
}

View file

@ -17,4 +17,3 @@ TEST(TestSequence, SequenceDestructorViaManager)
EXPECT_FALSE(sq->isInit());
}

View file

@ -28,17 +28,19 @@ TEST(TestSpecializationConstants, TestTwoConstants)
std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 0, 0 });
std::shared_ptr<kp::Tensor> tensorB = mgr.tensor({ 0, 0, 0 });
std::vector<std::shared_ptr<kp::Tensor>> params = {tensorA, tensorB};
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorA,
tensorB };
kp::Constants spec = kp::Constants({5.0, 0.3});
kp::Constants spec = kp::Constants({ 5.0, 0.3 });
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(params, spirv, {}, spec);
std::shared_ptr<kp::Algorithm> algo =
mgr.algorithm(params, spirv, {}, spec);
sq = mgr.sequence()
->record<kp::OpTensorSyncDevice>(params)
->record<kp::OpAlgoDispatch>(algo)
->record<kp::OpTensorSyncLocal>(params)
->eval();
->record<kp::OpTensorSyncDevice>(params)
->record<kp::OpAlgoDispatch>(algo)
->record<kp::OpTensorSyncLocal>(params)
->eval();
EXPECT_EQ(tensorA->data(), std::vector<float>({ 5, 5, 5 }));
EXPECT_EQ(tensorB->data(), std::vector<float>({ 0.3, 0.3, 0.3 }));

View file

@ -11,4 +11,3 @@ TEST(TestTensor, ConstructorData)
EXPECT_EQ(tensor->size(), vec.size());
EXPECT_EQ(tensor->data(), vec);
}

View file

@ -18,16 +18,21 @@ TEST(TestWorkgroup, TestSimpleWorkgroup)
tensorA = mgr.tensor(std::vector<float>(16 * 8));
tensorB = mgr.tensor(std::vector<float>(16 * 8));
std::vector<std::shared_ptr<kp::Tensor>> params = {tensorA, tensorB};
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorA,
tensorB };
std::vector<uint32_t> spirv(
(uint32_t*)kp::shader_data::test_shaders_glsl_test_workgroup_comp_spv,
(uint32_t*)(kp::shader_data::test_shaders_glsl_test_workgroup_comp_spv +
kp::shader_data::test_shaders_glsl_test_workgroup_comp_spv_len));
(uint32_t*)
kp::shader_data::test_shaders_glsl_test_workgroup_comp_spv,
(uint32_t*)(kp::shader_data::
test_shaders_glsl_test_workgroup_comp_spv +
kp::shader_data::
test_shaders_glsl_test_workgroup_comp_spv_len));
kp::Workgroup workgroup = {16, 8, 1};
kp::Workgroup workgroup = { 16, 8, 1 };
std::shared_ptr<kp::Algorithm> algorithm = mgr.algorithm(params, spirv, workgroup);
std::shared_ptr<kp::Algorithm> algorithm =
mgr.algorithm(params, spirv, workgroup);
sq = mgr.sequence();
sq->record<kp::OpTensorSyncDevice>(params);
@ -37,11 +42,26 @@ TEST(TestWorkgroup, TestSimpleWorkgroup)
}
}
std::vector<float> expectedA = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15};
std::vector<float> expectedA = {
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7,
8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11,
12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13,
14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15
};
std::vector<float> expectedB = { 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 };
std::vector<float> expectedB = {
0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3,
4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1,
2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7,
0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7
};
EXPECT_EQ(tensorA->data(), expectedA);
EXPECT_EQ(tensorB->data(), expectedB);
}