Reformat
This commit is contained in:
parent
75315db943
commit
63e220a8a4
26 changed files with 667 additions and 624 deletions
|
|
@ -4,23 +4,25 @@
|
|||
|
||||
namespace kp {
|
||||
|
||||
Algorithm::Algorithm(
|
||||
std::shared_ptr<vk::Device> device,
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::vector<uint32_t>& spirv,
|
||||
const Workgroup& workgroup,
|
||||
const Constants& specializationConstants)
|
||||
Algorithm::Algorithm(std::shared_ptr<vk::Device> device,
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::vector<uint32_t>& spirv,
|
||||
const Workgroup& workgroup,
|
||||
const Constants& specializationConstants)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm Constructor with device");
|
||||
|
||||
this->mDevice = device;
|
||||
|
||||
if (tensors.size() && spirv.size()) {
|
||||
KP_LOG_INFO("Kompute Algorithm initialising with tensor size: {} and spirv size: {}", tensors.size(), spirv.size());
|
||||
KP_LOG_INFO("Kompute Algorithm initialising with tensor size: {} and "
|
||||
"spirv size: {}",
|
||||
tensors.size(),
|
||||
spirv.size());
|
||||
this->rebuild(tensors, spirv, workgroup, specializationConstants);
|
||||
}
|
||||
else {
|
||||
KP_LOG_INFO("Kompute Algorithm constructor with empty tensors and or spirv so not rebuilding vulkan components");
|
||||
} else {
|
||||
KP_LOG_INFO("Kompute Algorithm constructor with empty tensors and or "
|
||||
"spirv so not rebuilding vulkan components");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -32,20 +34,21 @@ Algorithm::~Algorithm()
|
|||
}
|
||||
|
||||
void
|
||||
Algorithm::rebuild(
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::vector<uint32_t>& spirv,
|
||||
const Workgroup& workgroup,
|
||||
const Constants& specializationConstants)
|
||||
Algorithm::rebuild(const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::vector<uint32_t>& spirv,
|
||||
const Workgroup& workgroup,
|
||||
const Constants& specializationConstants)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm rebuild started");
|
||||
|
||||
this->mTensors = tensors;
|
||||
this->mSpirv = spirv;
|
||||
this->mSpecializationConstants = specializationConstants;
|
||||
this->setWorkgroup(workgroup, this->mTensors.size() ? this->mTensors[0]->size() : 1);
|
||||
this->setWorkgroup(workgroup,
|
||||
this->mTensors.size() ? this->mTensors[0]->size() : 1);
|
||||
|
||||
// Descriptor pool is created first so if available then destroy all before rebuild
|
||||
// Descriptor pool is created first so if available then destroy all before
|
||||
// rebuild
|
||||
if (this->isInit()) {
|
||||
this->destroy();
|
||||
}
|
||||
|
|
@ -56,22 +59,20 @@ Algorithm::rebuild(
|
|||
}
|
||||
|
||||
bool
|
||||
Algorithm::isInit() {
|
||||
return this->mPipeline &&
|
||||
this->mPipelineCache &&
|
||||
this->mPipelineLayout &&
|
||||
this->mDescriptorPool &&
|
||||
this->mDescriptorSet &&
|
||||
this->mDescriptorSetLayout &&
|
||||
this->mShaderModule;
|
||||
Algorithm::isInit()
|
||||
{
|
||||
return this->mPipeline && this->mPipelineCache && this->mPipelineLayout &&
|
||||
this->mDescriptorPool && this->mDescriptorSet &&
|
||||
this->mDescriptorSetLayout && this->mShaderModule;
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::destroy() {
|
||||
Algorithm::destroy()
|
||||
{
|
||||
|
||||
if (!this->mDevice) {
|
||||
KP_LOG_WARN(
|
||||
"Kompute Algorithm destroy function reached with null Device pointer");
|
||||
KP_LOG_WARN("Kompute Algorithm destroy function reached with null "
|
||||
"Device pointer");
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -79,7 +80,7 @@ Algorithm::destroy() {
|
|||
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline");
|
||||
if (!this->mPipeline) {
|
||||
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
|
||||
"pipeline but it is null");
|
||||
"pipeline but it is null");
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
*this->mPipeline,
|
||||
|
|
@ -91,7 +92,7 @@ Algorithm::destroy() {
|
|||
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline cache");
|
||||
if (!this->mPipelineCache) {
|
||||
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
|
||||
"pipeline cache but it is null");
|
||||
"pipeline cache but it is null");
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
*this->mPipelineCache,
|
||||
|
|
@ -103,7 +104,7 @@ Algorithm::destroy() {
|
|||
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline layout");
|
||||
if (!this->mPipelineLayout) {
|
||||
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
|
||||
"pipeline layout but it is null");
|
||||
"pipeline layout but it is null");
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
*this->mPipelineLayout,
|
||||
|
|
@ -115,7 +116,7 @@ Algorithm::destroy() {
|
|||
KP_LOG_DEBUG("Kompute Algorithm Destroying shader module");
|
||||
if (!this->mShaderModule) {
|
||||
KP_LOG_WARN("Kompute Algorithm Error requested to destroy shader "
|
||||
"module but it is null");
|
||||
"module but it is null");
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
*this->mShaderModule,
|
||||
|
|
@ -123,10 +124,10 @@ Algorithm::destroy() {
|
|||
this->mShaderModule = nullptr;
|
||||
}
|
||||
|
||||
// We don't call freeDescriptorSet as the descriptor pool is not created with
|
||||
// VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT more at
|
||||
// We don't call freeDescriptorSet as the descriptor pool is not created
|
||||
// with VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT more at
|
||||
// (https://www.khronos.org/registry/vulkan/specs/1.0/html/vkspec.html#VUID-vkFreeDescriptorSets-descriptorPool-00312))
|
||||
//if (this->mFreeDescriptorSet && this->mDescriptorSet) {
|
||||
// if (this->mFreeDescriptorSet && this->mDescriptorSet) {
|
||||
// KP_LOG_DEBUG("Kompute Algorithm Freeing Descriptor Set");
|
||||
// if (!this->mDescriptorSet) {
|
||||
// KP_LOG_WARN(
|
||||
|
|
@ -141,7 +142,7 @@ Algorithm::destroy() {
|
|||
KP_LOG_DEBUG("Kompute Algorithm Destroying Descriptor Set Layout");
|
||||
if (!this->mDescriptorSetLayout) {
|
||||
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
|
||||
"descriptor set layout but it is null");
|
||||
"descriptor set layout but it is null");
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
*this->mDescriptorSetLayout,
|
||||
|
|
@ -153,7 +154,7 @@ Algorithm::destroy() {
|
|||
KP_LOG_DEBUG("Kompute Algorithm Destroying Descriptor Pool");
|
||||
if (!this->mDescriptorPool) {
|
||||
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
|
||||
"descriptor pool but it is null");
|
||||
"descriptor pool but it is null");
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
*this->mDescriptorPool,
|
||||
|
|
@ -246,10 +247,10 @@ Algorithm::createShaderModule()
|
|||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm createShaderModule started");
|
||||
|
||||
vk::ShaderModuleCreateInfo shaderModuleInfo(
|
||||
vk::ShaderModuleCreateFlags(),
|
||||
sizeof(uint32_t) * this->mSpirv.size(),
|
||||
this->mSpirv.data());
|
||||
vk::ShaderModuleCreateInfo shaderModuleInfo(vk::ShaderModuleCreateFlags(),
|
||||
sizeof(uint32_t) *
|
||||
this->mSpirv.size(),
|
||||
this->mSpirv.data());
|
||||
|
||||
KP_LOG_DEBUG("Kompute Algorithm Creating shader module. ShaderFileSize: {}",
|
||||
this->mSpirv.size());
|
||||
|
|
@ -281,14 +282,14 @@ Algorithm::createPipeline()
|
|||
|
||||
for (uint32_t i = 0; i < this->mSpecializationConstants.size(); i++) {
|
||||
vk::SpecializationMapEntry specializationEntry(
|
||||
static_cast<uint32_t>(i),
|
||||
static_cast<uint32_t>(sizeof(float) * i),
|
||||
sizeof(float));
|
||||
static_cast<uint32_t>(i),
|
||||
static_cast<uint32_t>(sizeof(float) * i),
|
||||
sizeof(float));
|
||||
|
||||
specializationEntries.push_back(specializationEntry);
|
||||
}
|
||||
|
||||
// This passes ownership of the memory so we remove ownership from
|
||||
// This passes ownership of the memory so we remove ownership from
|
||||
// specialization container by using "transferDataOwnership"
|
||||
vk::SpecializationInfo specializationInfo(
|
||||
static_cast<uint32_t>(specializationEntries.size()),
|
||||
|
|
@ -338,7 +339,8 @@ Algorithm::createPipeline()
|
|||
// TODO: Update to consistent
|
||||
// this->mPipeline = std::make_shared<vk::Pipeline>();
|
||||
// this->mDevice->createComputePipelines(
|
||||
// *this->mPipelineCache, 1, &pipelineInfo, nullptr, this->mPipeline.get());
|
||||
// *this->mPipelineCache, 1, &pipelineInfo, nullptr,
|
||||
// this->mPipeline.get());
|
||||
|
||||
KP_LOG_DEBUG("Kompute Algorithm Create Pipeline Success");
|
||||
}
|
||||
|
|
@ -349,29 +351,31 @@ Algorithm::bindCore(const vk::CommandBuffer& commandBuffer)
|
|||
KP_LOG_DEBUG("Kompute Algorithm binding pipeline");
|
||||
|
||||
commandBuffer.bindPipeline(vk::PipelineBindPoint::eCompute,
|
||||
*this->mPipeline);
|
||||
*this->mPipeline);
|
||||
|
||||
KP_LOG_DEBUG("Kompute Algorithm binding descriptor sets");
|
||||
|
||||
commandBuffer.bindDescriptorSets(vk::PipelineBindPoint::eCompute,
|
||||
*this->mPipelineLayout,
|
||||
0, // First set
|
||||
*this->mDescriptorSet,
|
||||
nullptr // Dispatcher
|
||||
*this->mPipelineLayout,
|
||||
0, // First set
|
||||
*this->mDescriptorSet,
|
||||
nullptr // Dispatcher
|
||||
);
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::bindPush(const vk::CommandBuffer& commandBuffer, const Constants& pushConstants)
|
||||
Algorithm::bindPush(const vk::CommandBuffer& commandBuffer,
|
||||
const Constants& pushConstants)
|
||||
{
|
||||
if (pushConstants.size()) {
|
||||
KP_LOG_DEBUG("Kompute Algorithm binding push constants size: {}", pushConstants.size());
|
||||
KP_LOG_DEBUG("Kompute Algorithm binding push constants size: {}",
|
||||
pushConstants.size());
|
||||
|
||||
commandBuffer.pushConstants(*this->mPipelineLayout,
|
||||
vk::ShaderStageFlagBits::eCompute,
|
||||
0,
|
||||
pushConstants.size() * sizeof(float),
|
||||
pushConstants.data());
|
||||
vk::ShaderStageFlagBits::eCompute,
|
||||
0,
|
||||
pushConstants.size() * sizeof(float),
|
||||
pushConstants.data());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -380,11 +384,13 @@ Algorithm::recordDispatch(const vk::CommandBuffer& commandBuffer)
|
|||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm recording dispatch");
|
||||
|
||||
commandBuffer.dispatch(this->mWorkgroup[0], this->mWorkgroup[1], this->mWorkgroup[2]);
|
||||
commandBuffer.dispatch(
|
||||
this->mWorkgroup[0], this->mWorkgroup[1], this->mWorkgroup[2]);
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize) {
|
||||
Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize)
|
||||
{
|
||||
|
||||
KP_LOG_INFO("Kompute OpAlgoCreate setting dispatch size");
|
||||
|
||||
|
|
@ -393,11 +399,9 @@ Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize) {
|
|||
if (workgroup[0] > 0) {
|
||||
// If at least the x value is provided we use mainly the parameters
|
||||
// provided
|
||||
this->mWorkgroup = {
|
||||
workgroup[0],
|
||||
workgroup[1] > 0 ? workgroup[1] : 1,
|
||||
workgroup[2] > 0 ? workgroup[2] : 1
|
||||
};
|
||||
this->mWorkgroup = { workgroup[0],
|
||||
workgroup[1] > 0 ? workgroup[1] : 1,
|
||||
workgroup[2] > 0 ? workgroup[2] : 1 };
|
||||
} else {
|
||||
this->mWorkgroup = { minSize, 1, 1 };
|
||||
}
|
||||
|
|
@ -409,17 +413,20 @@ Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize) {
|
|||
}
|
||||
|
||||
const Workgroup&
|
||||
Algorithm::getWorkgroup() {
|
||||
Algorithm::getWorkgroup()
|
||||
{
|
||||
return this->mWorkgroup;
|
||||
}
|
||||
|
||||
const Constants&
|
||||
Algorithm::getSpecializationConstants() {
|
||||
Algorithm::getSpecializationConstants()
|
||||
{
|
||||
return this->mSpecializationConstants;
|
||||
}
|
||||
|
||||
const std::vector<std::shared_ptr<Tensor>>&
|
||||
Algorithm::getTensors() {
|
||||
Algorithm::getTensors()
|
||||
{
|
||||
return this->mTensors;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -55,7 +55,8 @@ Manager::~Manager()
|
|||
}
|
||||
|
||||
void
|
||||
Manager::destroy() {
|
||||
Manager::destroy()
|
||||
{
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager destroy() started");
|
||||
|
||||
|
|
@ -78,7 +79,8 @@ Manager::destroy() {
|
|||
|
||||
if (this->mManageResources && this->mManagedAlgorithms.size()) {
|
||||
KP_LOG_DEBUG("Kompute Manager explicitly freeing algorithms");
|
||||
for (const std::weak_ptr<Algorithm>& weakAlgorithm : this->mManagedAlgorithms) {
|
||||
for (const std::weak_ptr<Algorithm>& weakAlgorithm :
|
||||
this->mManagedAlgorithms) {
|
||||
if (std::shared_ptr<Algorithm> algorithm = weakAlgorithm.lock()) {
|
||||
algorithm->destroy();
|
||||
}
|
||||
|
|
@ -214,31 +216,31 @@ Manager::createInstance()
|
|||
}
|
||||
|
||||
void
|
||||
Manager::clear() {
|
||||
Manager::clear()
|
||||
{
|
||||
if (this->mManageResources) {
|
||||
this->mManagedTensors.erase(
|
||||
std::remove_if(
|
||||
begin(this->mManagedTensors),
|
||||
end(this->mManagedTensors),
|
||||
[](std::weak_ptr<Tensor> t) {return t.expired();}),
|
||||
end(this->mManagedTensors));
|
||||
std::remove_if(begin(this->mManagedTensors),
|
||||
end(this->mManagedTensors),
|
||||
[](std::weak_ptr<Tensor> t) { return t.expired(); }),
|
||||
end(this->mManagedTensors));
|
||||
this->mManagedAlgorithms.erase(
|
||||
std::remove_if(
|
||||
begin(this->mManagedAlgorithms),
|
||||
end(this->mManagedAlgorithms),
|
||||
[](std::weak_ptr<Algorithm> t) {return t.expired();}),
|
||||
end(this->mManagedAlgorithms));
|
||||
std::remove_if(
|
||||
begin(this->mManagedAlgorithms),
|
||||
end(this->mManagedAlgorithms),
|
||||
[](std::weak_ptr<Algorithm> t) { return t.expired(); }),
|
||||
end(this->mManagedAlgorithms));
|
||||
this->mManagedSequences.erase(
|
||||
std::remove_if(
|
||||
begin(this->mManagedSequences),
|
||||
end(this->mManagedSequences),
|
||||
[](std::weak_ptr<Sequence> t) {return t.expired();}),
|
||||
end(this->mManagedSequences));
|
||||
std::remove_if(begin(this->mManagedSequences),
|
||||
end(this->mManagedSequences),
|
||||
[](std::weak_ptr<Sequence> t) { return t.expired(); }),
|
||||
end(this->mManagedSequences));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices, uint32_t physicalDeviceIndex)
|
||||
Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices,
|
||||
uint32_t physicalDeviceIndex)
|
||||
{
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager creating Device");
|
||||
|
|
@ -256,8 +258,7 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices, uint32_t
|
|||
std::vector<vk::PhysicalDevice> physicalDevices =
|
||||
this->mInstance->enumeratePhysicalDevices();
|
||||
|
||||
vk::PhysicalDevice physicalDevice =
|
||||
physicalDevices[physicalDeviceIndex];
|
||||
vk::PhysicalDevice physicalDevice = physicalDevices[physicalDeviceIndex];
|
||||
|
||||
this->mPhysicalDevice =
|
||||
std::make_shared<vk::PhysicalDevice>(physicalDevice);
|
||||
|
|
@ -342,16 +343,14 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices, uint32_t
|
|||
}
|
||||
|
||||
std::shared_ptr<Tensor>
|
||||
Manager::tensor(
|
||||
const std::vector<float>& data,
|
||||
Tensor::TensorTypes tensorType)
|
||||
Manager::tensor(const std::vector<float>& data, Tensor::TensorTypes tensorType)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager tensor creation triggered");
|
||||
|
||||
std::shared_ptr<Tensor> tensor{
|
||||
new kp::Tensor(this->mPhysicalDevice, this->mDevice, data, tensorType) };
|
||||
std::shared_ptr<Tensor> tensor{ new kp::Tensor(
|
||||
this->mPhysicalDevice, this->mDevice, data, tensorType) };
|
||||
|
||||
if (this->mManageResources) {
|
||||
if (this->mManageResources) {
|
||||
this->mManagedTensors.push_back(tensor);
|
||||
}
|
||||
|
||||
|
|
@ -359,23 +358,18 @@ Manager::tensor(
|
|||
}
|
||||
|
||||
std::shared_ptr<Algorithm>
|
||||
Manager::algorithm(
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::vector<uint32_t>& spirv,
|
||||
const Workgroup& workgroup,
|
||||
const Constants& specializationConstants) {
|
||||
Manager::algorithm(const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::vector<uint32_t>& spirv,
|
||||
const Workgroup& workgroup,
|
||||
const Constants& specializationConstants)
|
||||
{
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager algorithm creation triggered");
|
||||
|
||||
std::shared_ptr<Algorithm> algorithm{
|
||||
new kp::Algorithm(
|
||||
this->mDevice,
|
||||
tensors,
|
||||
spirv,
|
||||
workgroup,
|
||||
specializationConstants)};
|
||||
std::shared_ptr<Algorithm> algorithm{ new kp::Algorithm(
|
||||
this->mDevice, tensors, spirv, workgroup, specializationConstants) };
|
||||
|
||||
if (this->mManageResources) {
|
||||
if (this->mManageResources) {
|
||||
this->mManagedAlgorithms.push_back(algorithm);
|
||||
}
|
||||
|
||||
|
|
@ -385,16 +379,15 @@ Manager::algorithm(
|
|||
std::shared_ptr<Sequence>
|
||||
Manager::sequence(uint32_t queueIndex)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager sequence() with queueIndex: {}",
|
||||
queueIndex);
|
||||
KP_LOG_DEBUG("Kompute Manager sequence() with queueIndex: {}", queueIndex);
|
||||
|
||||
std::shared_ptr<Sequence> sq{
|
||||
new kp::Sequence(this->mPhysicalDevice,
|
||||
this->mDevice,
|
||||
this->mComputeQueues[queueIndex],
|
||||
this->mComputeQueueFamilyIndices[queueIndex]) };
|
||||
std::shared_ptr<Sequence> sq{ new kp::Sequence(
|
||||
this->mPhysicalDevice,
|
||||
this->mDevice,
|
||||
this->mComputeQueues[queueIndex],
|
||||
this->mComputeQueueFamilyIndices[queueIndex]) };
|
||||
|
||||
if (this->mManageResources) {
|
||||
if (this->mManageResources) {
|
||||
this->mManagedSequences.push_back(sq);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
namespace kp {
|
||||
|
||||
OpAlgoDispatch::OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm,
|
||||
const kp::Constants& pushConstants)
|
||||
const kp::Constants& pushConstants)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor");
|
||||
|
||||
|
|
@ -24,7 +24,8 @@ OpAlgoDispatch::record(const vk::CommandBuffer& commandBuffer)
|
|||
KP_LOG_DEBUG("Kompute OpAlgoDispatch record called");
|
||||
|
||||
// Barrier to ensure the data is finished writing to buffer memory
|
||||
for (const std::shared_ptr<Tensor>& tensor : this->mAlgorithm->getTensors()) {
|
||||
for (const std::shared_ptr<Tensor>& tensor :
|
||||
this->mAlgorithm->getTensors()) {
|
||||
tensor->recordBufferMemoryBarrier(
|
||||
commandBuffer,
|
||||
vk::AccessFlagBits::eHostWrite,
|
||||
|
|
|
|||
|
|
@ -30,8 +30,8 @@ OpTensorSyncDevice::record(const vk::CommandBuffer& commandBuffer)
|
|||
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
|
||||
this->mTensors[i]->recordCopyFromStagingToDevice(
|
||||
commandBuffer, false);
|
||||
this->mTensors[i]->recordCopyFromStagingToDevice(commandBuffer,
|
||||
false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,8 +30,8 @@ OpTensorSyncLocal::record(const vk::CommandBuffer& commandBuffer)
|
|||
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
|
||||
this->mTensors[i]->recordCopyFromDeviceToStaging(
|
||||
commandBuffer, true);
|
||||
this->mTensors[i]->recordCopyFromDeviceToStaging(commandBuffer,
|
||||
true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,7 +37,8 @@ Sequence::begin()
|
|||
}
|
||||
|
||||
if (this->isRunning()) {
|
||||
throw std::runtime_error("Kompute Sequence begin called when sequence still running");
|
||||
throw std::runtime_error(
|
||||
"Kompute Sequence begin called when sequence still running");
|
||||
}
|
||||
|
||||
KP_LOG_INFO("Kompute Sequence command now started recording");
|
||||
|
|
@ -53,8 +54,7 @@ Sequence::end()
|
|||
if (!this->isRecording()) {
|
||||
KP_LOG_WARN("Kompute Sequence end called when not recording");
|
||||
return;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
KP_LOG_INFO("Kompute Sequence command recording END");
|
||||
this->mCommandBuffer->end();
|
||||
this->mRecording = false;
|
||||
|
|
@ -62,7 +62,8 @@ Sequence::end()
|
|||
}
|
||||
|
||||
void
|
||||
Sequence::clear() {
|
||||
Sequence::clear()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence calling clear");
|
||||
this->end();
|
||||
}
|
||||
|
|
@ -76,7 +77,8 @@ Sequence::eval()
|
|||
}
|
||||
|
||||
std::shared_ptr<Sequence>
|
||||
Sequence::eval(std::shared_ptr<OpBase> op) {
|
||||
Sequence::eval(std::shared_ptr<OpBase> op)
|
||||
{
|
||||
this->clear();
|
||||
return this->record(op)->eval();
|
||||
}
|
||||
|
|
@ -89,8 +91,9 @@ Sequence::evalAsync()
|
|||
}
|
||||
|
||||
if (this->mIsRunning) {
|
||||
throw std::runtime_error("Kompute Sequence evalAsync called when an eval async was "
|
||||
"called without successful wait");
|
||||
throw std::runtime_error(
|
||||
"Kompute Sequence evalAsync called when an eval async was "
|
||||
"called without successful wait");
|
||||
}
|
||||
|
||||
this->mIsRunning = true;
|
||||
|
|
@ -137,7 +140,8 @@ Sequence::evalAwait(uint64_t waitFor)
|
|||
this->mIsRunning = false;
|
||||
|
||||
if (result == vk::Result::eTimeout) {
|
||||
KP_LOG_WARN("Kompute Sequence evalAwait reached timeout of {}", waitFor);
|
||||
KP_LOG_WARN("Kompute Sequence evalAwait reached timeout of {}",
|
||||
waitFor);
|
||||
return shared_from_this();
|
||||
}
|
||||
|
||||
|
|
@ -161,11 +165,10 @@ Sequence::isRecording()
|
|||
}
|
||||
|
||||
bool
|
||||
Sequence::isInit() {
|
||||
return this->mDevice &&
|
||||
this->mCommandPool &&
|
||||
this->mCommandBuffer &&
|
||||
this->mComputeQueue;
|
||||
Sequence::isInit()
|
||||
{
|
||||
return this->mDevice && this->mCommandPool && this->mCommandBuffer &&
|
||||
this->mComputeQueue;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -175,16 +178,15 @@ Sequence::destroy()
|
|||
|
||||
if (!this->mDevice) {
|
||||
KP_LOG_WARN("Kompute Sequence destroy called "
|
||||
"with null Device pointer");
|
||||
"with null Device pointer");
|
||||
return;
|
||||
}
|
||||
|
||||
if (this->mFreeCommandBuffer) {
|
||||
KP_LOG_INFO("Freeing CommandBuffer");
|
||||
if (!this->mCommandBuffer) {
|
||||
KP_LOG_WARN(
|
||||
"Kompute Sequence destroy called with null "
|
||||
"CommandPool pointer");
|
||||
KP_LOG_WARN("Kompute Sequence destroy called with null "
|
||||
"CommandPool pointer");
|
||||
return;
|
||||
}
|
||||
this->mDevice->freeCommandBuffers(
|
||||
|
|
@ -199,9 +201,8 @@ Sequence::destroy()
|
|||
if (this->mFreeCommandPool) {
|
||||
KP_LOG_INFO("Destroying CommandPool");
|
||||
if (this->mCommandPool == nullptr) {
|
||||
KP_LOG_WARN(
|
||||
"Kompute Sequence destroy called with null "
|
||||
"CommandPool pointer");
|
||||
KP_LOG_WARN("Kompute Sequence destroy called with null "
|
||||
"CommandPool pointer");
|
||||
return;
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
|
|
@ -228,7 +229,6 @@ Sequence::destroy()
|
|||
if (this->mComputeQueue) {
|
||||
this->mComputeQueue = nullptr;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
std::shared_ptr<Sequence>
|
||||
|
|
|
|||
|
|
@ -5,11 +5,13 @@
|
|||
namespace kp {
|
||||
|
||||
std::vector<uint32_t>
|
||||
Shader::compile_sources(const std::vector<std::string>& sources,
|
||||
const std::vector<std::string>& files,
|
||||
const std::string& entryPoint,
|
||||
std::vector<std::pair<std::string,std::string>> definitions,
|
||||
const TBuiltInResource& resources) {
|
||||
Shader::compile_sources(
|
||||
const std::vector<std::string>& sources,
|
||||
const std::vector<std::string>& files,
|
||||
const std::string& entryPoint,
|
||||
std::vector<std::pair<std::string, std::string>> definitions,
|
||||
const TBuiltInResource& resources)
|
||||
{
|
||||
|
||||
// Initialize glslang library.
|
||||
glslang::InitializeProcess();
|
||||
|
|
@ -18,27 +20,32 @@ Shader::compile_sources(const std::vector<std::string>& sources,
|
|||
const EShLanguage language = EShLangCompute;
|
||||
glslang::TShader shader(language);
|
||||
|
||||
std::vector<const char*> filesCStr(files.size()), sourcesCStr(sources.size());
|
||||
for (size_t i = 0; i < sources.size(); i++) sourcesCStr[i] = sources[i].c_str();
|
||||
std::vector<const char*> filesCStr(files.size()),
|
||||
sourcesCStr(sources.size());
|
||||
for (size_t i = 0; i < sources.size(); i++)
|
||||
sourcesCStr[i] = sources[i].c_str();
|
||||
|
||||
if (files.size() > 1) {
|
||||
assert(files.size() == sources.size());
|
||||
for (size_t i = 0; i < files.size(); i++) filesCStr[i] = files[i].c_str();
|
||||
shader.setStringsWithLengthsAndNames(sourcesCStr.data(), nullptr, filesCStr.data(), filesCStr.size());
|
||||
}
|
||||
else {
|
||||
filesCStr = {""};
|
||||
shader.setStringsWithLengthsAndNames(sourcesCStr.data(), nullptr, filesCStr.data(), sourcesCStr.size());
|
||||
for (size_t i = 0; i < files.size(); i++)
|
||||
filesCStr[i] = files[i].c_str();
|
||||
shader.setStringsWithLengthsAndNames(
|
||||
sourcesCStr.data(), nullptr, filesCStr.data(), filesCStr.size());
|
||||
} else {
|
||||
filesCStr = { "" };
|
||||
shader.setStringsWithLengthsAndNames(
|
||||
sourcesCStr.data(), nullptr, filesCStr.data(), sourcesCStr.size());
|
||||
}
|
||||
|
||||
shader.setEntryPoint(entryPoint.c_str());
|
||||
shader.setSourceEntryPoint(entryPoint.c_str());
|
||||
|
||||
std::string info_log = "";
|
||||
const EShMessages messages = static_cast<EShMessages>(EShMsgDefault | EShMsgVulkanRules | EShMsgSpvRules);
|
||||
if (!shader.parse(&resources, 100, false, messages))
|
||||
{
|
||||
info_log = std::string(shader.getInfoLog()) + "\n" + std::string(shader.getInfoDebugLog());
|
||||
const EShMessages messages = static_cast<EShMessages>(
|
||||
EShMsgDefault | EShMsgVulkanRules | EShMsgSpvRules);
|
||||
if (!shader.parse(&resources, 100, false, messages)) {
|
||||
info_log = std::string(shader.getInfoLog()) + "\n" +
|
||||
std::string(shader.getInfoDebugLog());
|
||||
KP_LOG_ERROR("Kompute Shader Error: {}", info_log);
|
||||
throw std::runtime_error(info_log);
|
||||
}
|
||||
|
|
@ -47,24 +54,23 @@ Shader::compile_sources(const std::vector<std::string>& sources,
|
|||
glslang::TProgram program;
|
||||
program.addShader(&shader);
|
||||
// Link program.
|
||||
if (!program.link(messages))
|
||||
{
|
||||
info_log = std::string(program.getInfoLog()) + "\n" + std::string(program.getInfoDebugLog());
|
||||
if (!program.link(messages)) {
|
||||
info_log = std::string(program.getInfoLog()) + "\n" +
|
||||
std::string(program.getInfoDebugLog());
|
||||
KP_LOG_ERROR("Kompute Shader Error: {}", info_log);
|
||||
throw std::runtime_error(info_log);
|
||||
}
|
||||
|
||||
// Save any info log that was generated.
|
||||
if (shader.getInfoLog())
|
||||
{
|
||||
info_log += std::string(shader.getInfoLog()) + "\n" + std::string(shader.getInfoDebugLog()) + "\n";
|
||||
if (shader.getInfoLog()) {
|
||||
info_log += std::string(shader.getInfoLog()) + "\n" +
|
||||
std::string(shader.getInfoDebugLog()) + "\n";
|
||||
KP_LOG_INFO("Kompute Shader Information: {}", info_log);
|
||||
}
|
||||
|
||||
glslang::TIntermediate *intermediate = program.getIntermediate(language);
|
||||
glslang::TIntermediate* intermediate = program.getIntermediate(language);
|
||||
// Translate to SPIRV.
|
||||
if (!intermediate)
|
||||
{
|
||||
if (!intermediate) {
|
||||
info_log += "Failed to get shared intermediate code.\n";
|
||||
KP_LOG_ERROR("Kompute Shader Error: {}", info_log);
|
||||
throw std::runtime_error(info_log);
|
||||
|
|
@ -74,8 +80,7 @@ Shader::compile_sources(const std::vector<std::string>& sources,
|
|||
std::vector<std::uint32_t> spirv;
|
||||
glslang::GlslangToSpv(*intermediate, spirv, &logger);
|
||||
|
||||
if (shader.getInfoLog())
|
||||
{
|
||||
if (shader.getInfoLog()) {
|
||||
info_log += logger.getAllMessages() + "\n";
|
||||
KP_LOG_DEBUG("Kompute Shader all result messages: {}", info_log);
|
||||
}
|
||||
|
|
@ -87,11 +92,17 @@ Shader::compile_sources(const std::vector<std::string>& sources,
|
|||
}
|
||||
|
||||
std::vector<uint32_t>
|
||||
Shader::compile_source(const std::string& source,
|
||||
const std::string& entryPoint,
|
||||
std::vector<std::pair<std::string,std::string>> definitions,
|
||||
const TBuiltInResource& resource) {
|
||||
return compile_sources({source}, std::vector<std::string>({}), entryPoint, definitions, resource);
|
||||
Shader::compile_source(
|
||||
const std::string& source,
|
||||
const std::string& entryPoint,
|
||||
std::vector<std::pair<std::string, std::string>> definitions,
|
||||
const TBuiltInResource& resource)
|
||||
{
|
||||
return compile_sources({ source },
|
||||
std::vector<std::string>({}),
|
||||
entryPoint,
|
||||
definitions,
|
||||
resource);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
101
src/Tensor.cpp
101
src/Tensor.cpp
|
|
@ -4,9 +4,9 @@
|
|||
namespace kp {
|
||||
|
||||
Tensor::Tensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
const std::vector<float>& data,
|
||||
const TensorTypes& tensorType)
|
||||
std::shared_ptr<vk::Device> device,
|
||||
const std::vector<float>& data,
|
||||
const TensorTypes& tensorType)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Tensor constructor data length: {}, and type: {}",
|
||||
data.size(),
|
||||
|
|
@ -29,17 +29,16 @@ Tensor::~Tensor()
|
|||
}
|
||||
|
||||
void
|
||||
Tensor::rebuild(const std::vector<float>& data,
|
||||
TensorTypes tensorType)
|
||||
Tensor::rebuild(const std::vector<float>& data, TensorTypes tensorType)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Tensor rebuilding with size {}",
|
||||
data.size());
|
||||
KP_LOG_DEBUG("Kompute Tensor rebuilding with size {}", data.size());
|
||||
|
||||
this->mData = data;
|
||||
this->mTensorType = tensorType;
|
||||
|
||||
if (this->mPrimaryBuffer || this->mPrimaryMemory) {
|
||||
KP_LOG_DEBUG("Kompute Tensor destroying existing resources before rebuild");
|
||||
KP_LOG_DEBUG(
|
||||
"Kompute Tensor destroying existing resources before rebuild");
|
||||
this->destroy();
|
||||
}
|
||||
|
||||
|
|
@ -77,10 +76,9 @@ Tensor::tensorType()
|
|||
}
|
||||
|
||||
bool
|
||||
Tensor::isInit() {
|
||||
return this->mDevice &&
|
||||
this->mPrimaryBuffer &&
|
||||
this->mPrimaryMemory;
|
||||
Tensor::isInit()
|
||||
{
|
||||
return this->mDevice && this->mPrimaryBuffer && this->mPrimaryMemory;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -105,17 +103,16 @@ Tensor::recordCopyFrom(const vk::CommandBuffer& commandBuffer,
|
|||
KP_LOG_DEBUG("Kompute Tensor recordCopyFrom data size {}.", bufferSize);
|
||||
|
||||
this->recordCopyBuffer(commandBuffer,
|
||||
copyFromTensor->mPrimaryBuffer,
|
||||
this->mPrimaryBuffer,
|
||||
bufferSize,
|
||||
copyRegion,
|
||||
createBarrier);
|
||||
copyFromTensor->mPrimaryBuffer,
|
||||
this->mPrimaryBuffer,
|
||||
bufferSize,
|
||||
copyRegion,
|
||||
createBarrier);
|
||||
}
|
||||
|
||||
void
|
||||
Tensor::recordCopyFromStagingToDevice(
|
||||
const vk::CommandBuffer& commandBuffer,
|
||||
bool createBarrier)
|
||||
Tensor::recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer,
|
||||
bool createBarrier)
|
||||
{
|
||||
vk::DeviceSize bufferSize(this->memorySize());
|
||||
vk::BufferCopy copyRegion(0, 0, bufferSize);
|
||||
|
|
@ -123,17 +120,16 @@ Tensor::recordCopyFromStagingToDevice(
|
|||
KP_LOG_DEBUG("Kompute Tensor copying data size {}.", bufferSize);
|
||||
|
||||
this->recordCopyBuffer(commandBuffer,
|
||||
this->mStagingBuffer,
|
||||
this->mPrimaryBuffer,
|
||||
bufferSize,
|
||||
copyRegion,
|
||||
createBarrier);
|
||||
this->mStagingBuffer,
|
||||
this->mPrimaryBuffer,
|
||||
bufferSize,
|
||||
copyRegion,
|
||||
createBarrier);
|
||||
}
|
||||
|
||||
void
|
||||
Tensor::recordCopyFromDeviceToStaging(
|
||||
const vk::CommandBuffer& commandBuffer,
|
||||
bool createBarrier)
|
||||
Tensor::recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer,
|
||||
bool createBarrier)
|
||||
{
|
||||
vk::DeviceSize bufferSize(this->memorySize());
|
||||
vk::BufferCopy copyRegion(0, 0, bufferSize);
|
||||
|
|
@ -141,20 +137,20 @@ Tensor::recordCopyFromDeviceToStaging(
|
|||
KP_LOG_DEBUG("Kompute Tensor copying data size {}.", bufferSize);
|
||||
|
||||
this->recordCopyBuffer(commandBuffer,
|
||||
this->mPrimaryBuffer,
|
||||
this->mStagingBuffer,
|
||||
bufferSize,
|
||||
copyRegion,
|
||||
createBarrier);
|
||||
this->mPrimaryBuffer,
|
||||
this->mStagingBuffer,
|
||||
bufferSize,
|
||||
copyRegion,
|
||||
createBarrier);
|
||||
}
|
||||
|
||||
void
|
||||
Tensor::recordCopyBuffer(const vk::CommandBuffer& commandBuffer,
|
||||
std::shared_ptr<vk::Buffer> bufferFrom,
|
||||
std::shared_ptr<vk::Buffer> bufferTo,
|
||||
vk::DeviceSize bufferSize,
|
||||
vk::BufferCopy copyRegion,
|
||||
bool createBarrier)
|
||||
std::shared_ptr<vk::Buffer> bufferFrom,
|
||||
std::shared_ptr<vk::Buffer> bufferTo,
|
||||
vk::DeviceSize bufferSize,
|
||||
vk::BufferCopy copyRegion,
|
||||
bool createBarrier)
|
||||
{
|
||||
|
||||
commandBuffer.copyBuffer(*bufferFrom, *bufferTo, copyRegion);
|
||||
|
|
@ -170,12 +166,11 @@ Tensor::recordCopyBuffer(const vk::CommandBuffer& commandBuffer,
|
|||
}
|
||||
|
||||
void
|
||||
Tensor::recordBufferMemoryBarrier(
|
||||
const vk::CommandBuffer& commandBuffer,
|
||||
vk::AccessFlagBits srcAccessMask,
|
||||
vk::AccessFlagBits dstAccessMask,
|
||||
vk::PipelineStageFlagBits srcStageMask,
|
||||
vk::PipelineStageFlagBits dstStageMask)
|
||||
Tensor::recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
|
||||
vk::AccessFlagBits srcAccessMask,
|
||||
vk::AccessFlagBits dstAccessMask,
|
||||
vk::PipelineStageFlagBits srcStageMask,
|
||||
vk::PipelineStageFlagBits dstStageMask)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Tensor recording buffer memory barrier");
|
||||
|
||||
|
|
@ -190,11 +185,11 @@ Tensor::recordBufferMemoryBarrier(
|
|||
bufferMemoryBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
|
||||
commandBuffer.pipelineBarrier(srcStageMask,
|
||||
dstStageMask,
|
||||
vk::DependencyFlags(),
|
||||
nullptr,
|
||||
bufferMemoryBarrier,
|
||||
nullptr);
|
||||
dstStageMask,
|
||||
vk::DependencyFlags(),
|
||||
nullptr,
|
||||
bufferMemoryBarrier,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
vk::DescriptorBufferInfo
|
||||
|
|
@ -449,7 +444,7 @@ Tensor::destroy()
|
|||
if (this->mFreePrimaryBuffer) {
|
||||
if (!this->mPrimaryBuffer) {
|
||||
KP_LOG_WARN("Kompose Tensor expected to destroy primary buffer "
|
||||
"but got null buffer");
|
||||
"but got null buffer");
|
||||
} else {
|
||||
KP_LOG_DEBUG("Kompose Tensor destroying primary buffer");
|
||||
this->mDevice->destroy(
|
||||
|
|
@ -463,7 +458,7 @@ Tensor::destroy()
|
|||
if (this->mFreeStagingBuffer) {
|
||||
if (!this->mStagingBuffer) {
|
||||
KP_LOG_WARN("Kompose Tensor expected to destroy staging buffer "
|
||||
"but got null buffer");
|
||||
"but got null buffer");
|
||||
} else {
|
||||
KP_LOG_DEBUG("Kompose Tensor destroying staging buffer");
|
||||
this->mDevice->destroy(
|
||||
|
|
@ -477,7 +472,7 @@ Tensor::destroy()
|
|||
if (this->mFreePrimaryMemory) {
|
||||
if (!this->mPrimaryMemory) {
|
||||
KP_LOG_WARN("Kompose Tensor expected to free primary memory but "
|
||||
"got null memory");
|
||||
"got null memory");
|
||||
} else {
|
||||
KP_LOG_DEBUG("Kompose Tensor freeing primary memory");
|
||||
this->mDevice->freeMemory(
|
||||
|
|
@ -491,7 +486,7 @@ Tensor::destroy()
|
|||
if (this->mFreeStagingMemory) {
|
||||
if (!this->mStagingMemory) {
|
||||
KP_LOG_WARN("Kompose Tensor expected to free staging memory but "
|
||||
"got null memory");
|
||||
"got null memory");
|
||||
} else {
|
||||
KP_LOG_DEBUG("Kompose Tensor freeing staging memory");
|
||||
this->mDevice->freeMemory(
|
||||
|
|
|
|||
|
|
@ -12,8 +12,7 @@ namespace kp {
|
|||
*/
|
||||
class Algorithm
|
||||
{
|
||||
public:
|
||||
|
||||
public:
|
||||
/**
|
||||
* Default constructor for Algorithm
|
||||
*
|
||||
|
|
@ -21,12 +20,11 @@ public:
|
|||
* @param commandBuffer The vulkan command buffer to bind the pipeline and
|
||||
* shaders
|
||||
*/
|
||||
Algorithm(
|
||||
std::shared_ptr<vk::Device> device,
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
|
||||
const std::vector<uint32_t>& spirv = {},
|
||||
const Workgroup& workgroup = {},
|
||||
const Constants& specializationConstants = {});
|
||||
Algorithm(std::shared_ptr<vk::Device> device,
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
|
||||
const std::vector<uint32_t>& spirv = {},
|
||||
const Workgroup& workgroup = {},
|
||||
const Constants& specializationConstants = {});
|
||||
|
||||
/**
|
||||
* Initialiser for the shader data provided to the algorithm as well as
|
||||
|
|
@ -34,14 +32,13 @@ public:
|
|||
*
|
||||
* @param shaderFileData The bytes in spir-v format of the shader
|
||||
* @tensorParams The Tensors to be used in the Algorithm / shader for
|
||||
* @specalizationInstalces The specialization parameters to pass to the function
|
||||
* processing
|
||||
* @specalizationInstalces The specialization parameters to pass to the
|
||||
* function processing
|
||||
*/
|
||||
void rebuild(
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
|
||||
const std::vector<uint32_t>& spirv = {},
|
||||
const Workgroup& workgroup = {},
|
||||
const Constants& specializationConstants = {});
|
||||
void rebuild(const std::vector<std::shared_ptr<Tensor>>& tensors = {},
|
||||
const std::vector<uint32_t>& spirv = {},
|
||||
const Workgroup& workgroup = {},
|
||||
const Constants& specializationConstants = {});
|
||||
|
||||
/**
|
||||
* Destructor for Algorithm which is responsible for freeing and desroying
|
||||
|
|
@ -61,7 +58,8 @@ public:
|
|||
|
||||
void bindCore(const vk::CommandBuffer& commandBuffer);
|
||||
|
||||
void bindPush(const vk::CommandBuffer& commandBuffer, const Constants& pushConstants);
|
||||
void bindPush(const vk::CommandBuffer& commandBuffer,
|
||||
const Constants& pushConstants);
|
||||
|
||||
bool isInit();
|
||||
|
||||
|
|
@ -73,7 +71,7 @@ public:
|
|||
|
||||
void destroy();
|
||||
|
||||
private:
|
||||
private:
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
std::shared_ptr<vk::Device> mDevice;
|
||||
std::vector<std::shared_ptr<Tensor>> mTensors;
|
||||
|
|
|
|||
|
|
@ -60,12 +60,19 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error;
|
|||
#define KP_LOG_DEBUG(...)
|
||||
#else
|
||||
#if defined(VK_USE_PLATFORM_ANDROID_KHR)
|
||||
#define KP_LOG_DEBUG(...) \
|
||||
((void)__android_log_print(ANDROID_LOG_DEBUG, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
|
||||
#define KP_LOG_DEBUG(...) \
|
||||
((void)__android_log_print( \
|
||||
ANDROID_LOG_DEBUG, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
|
||||
#elif defined(KOMPUTE_BUILD_PYTHON)
|
||||
#define KP_LOG_DEBUG(...) kp_debug(fmt::format(__VA_ARGS__))
|
||||
#else
|
||||
#define KP_LOG_DEBUG(...) fmt::print("[{} {}] [debug] [{}:{}] {}\n", __DATE__, __TIME__, __FILE__, __LINE__, fmt::format(__VA_ARGS__))
|
||||
#define KP_LOG_DEBUG(...) \
|
||||
fmt::print("[{} {}] [debug] [{}:{}] {}\n", \
|
||||
__DATE__, \
|
||||
__TIME__, \
|
||||
__FILE__, \
|
||||
__LINE__, \
|
||||
fmt::format(__VA_ARGS__))
|
||||
#endif // VK_USE_PLATFORM_ANDROID_KHR
|
||||
#endif // SPDLOG_ACTIVE_LEVEL > 1
|
||||
|
||||
|
|
@ -73,12 +80,19 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error;
|
|||
#define KP_LOG_INFO(...)
|
||||
#else
|
||||
#if defined(VK_USE_PLATFORM_ANDROID_KHR)
|
||||
#define KP_LOG_INFO(...) \
|
||||
((void)__android_log_print(ANDROID_LOG_INFO, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
|
||||
#define KP_LOG_INFO(...) \
|
||||
((void)__android_log_print( \
|
||||
ANDROID_LOG_INFO, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
|
||||
#elif defined(KOMPUTE_BUILD_PYTHON)
|
||||
#define KP_LOG_INFO(...) kp_info(fmt::format(__VA_ARGS__))
|
||||
#else
|
||||
#define KP_LOG_INFO(...) fmt::print("[{} {}] [debug] [{}:{}] {}\n", __DATE__, __TIME__, __FILE__, __LINE__, fmt::format(__VA_ARGS__))
|
||||
#define KP_LOG_INFO(...) \
|
||||
fmt::print("[{} {}] [debug] [{}:{}] {}\n", \
|
||||
__DATE__, \
|
||||
__TIME__, \
|
||||
__FILE__, \
|
||||
__LINE__, \
|
||||
fmt::format(__VA_ARGS__))
|
||||
#endif // VK_USE_PLATFORM_ANDROID_KHR
|
||||
#endif // SPDLOG_ACTIVE_LEVEL > 2
|
||||
|
||||
|
|
@ -86,12 +100,19 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error;
|
|||
#define KP_LOG_WARN(...)
|
||||
#else
|
||||
#if defined(VK_USE_PLATFORM_ANDROID_KHR)
|
||||
#define KP_LOG_WARN(...) \
|
||||
((void)__android_log_print(ANDROID_LOG_WARN, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
|
||||
#define KP_LOG_WARN(...) \
|
||||
((void)__android_log_print( \
|
||||
ANDROID_LOG_WARN, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
|
||||
#elif defined(KOMPUTE_BUILD_PYTHON)
|
||||
#define KP_LOG_WARN(...) kp_warning(fmt::format(__VA_ARGS__))
|
||||
#else
|
||||
#define KP_LOG_WARN(...) fmt::print("[{} {}] [debug] [{}:{}] {}\n", __DATE__, __TIME__, __FILE__, __LINE__, fmt::format(__VA_ARGS__))
|
||||
#define KP_LOG_WARN(...) \
|
||||
fmt::print("[{} {}] [debug] [{}:{}] {}\n", \
|
||||
__DATE__, \
|
||||
__TIME__, \
|
||||
__FILE__, \
|
||||
__LINE__, \
|
||||
fmt::format(__VA_ARGS__))
|
||||
#endif // VK_USE_PLATFORM_ANDROID_KHR
|
||||
#endif // SPDLOG_ACTIVE_LEVEL > 3
|
||||
|
||||
|
|
@ -99,12 +120,19 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error;
|
|||
#define KP_LOG_ERROR(...)
|
||||
#else
|
||||
#if defined(VK_USE_PLATFORM_ANDROID_KHR)
|
||||
#define KP_LOG_ERROR(...) \
|
||||
((void)__android_log_print(ANDROID_LOG_ERROR, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
|
||||
#define KP_LOG_ERROR(...) \
|
||||
((void)__android_log_print( \
|
||||
ANDROID_LOG_ERROR, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
|
||||
#elif defined(KOMPUTE_BUILD_PYTHON)
|
||||
#define KP_LOG_ERROR(...) kp_error(fmt::format(__VA_ARGS__))
|
||||
#else
|
||||
#define KP_LOG_ERROR(...) fmt::print("[{} {}] [debug] [{}:{}] {}\n", __DATE__, __TIME__, __FILE__, __LINE__, fmt::format(__VA_ARGS__))
|
||||
#define KP_LOG_ERROR(...) \
|
||||
fmt::print("[{} {}] [debug] [{}:{}] {}\n", \
|
||||
__DATE__, \
|
||||
__TIME__, \
|
||||
__FILE__, \
|
||||
__LINE__, \
|
||||
fmt::format(__VA_ARGS__))
|
||||
#endif // VK_USE_PLATFORM_ANDROID_KHR
|
||||
#endif // SPDLOG_ACTIVE_LEVEL > 4
|
||||
#endif // KOMPUTE_SPDLOG_ENABLED
|
||||
|
|
|
|||
|
|
@ -84,10 +84,10 @@ class Manager
|
|||
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice);
|
||||
|
||||
std::shared_ptr<Algorithm> algorithm(
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
|
||||
const std::vector<uint32_t>& spirv = {},
|
||||
const Workgroup& workgroup = {},
|
||||
const Constants& specializationConstants = {});
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
|
||||
const std::vector<uint32_t>& spirv = {},
|
||||
const Workgroup& workgroup = {},
|
||||
const Constants& specializationConstants = {});
|
||||
|
||||
void destroy();
|
||||
void clear();
|
||||
|
|
@ -119,7 +119,8 @@ class Manager
|
|||
|
||||
// Create functions
|
||||
void createInstance();
|
||||
void createDevice(const std::vector<uint32_t>& familyQueueIndices = {}, uint32_t hysicalDeviceIndex = 0);
|
||||
void createDevice(const std::vector<uint32_t>& familyQueueIndices = {},
|
||||
uint32_t hysicalDeviceIndex = 0);
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ namespace kp {
|
|||
/**
|
||||
* Container of operations that can be sent to GPU as batch
|
||||
*/
|
||||
class Sequence: public std::enable_shared_from_this<Sequence>
|
||||
class Sequence : public std::enable_shared_from_this<Sequence>
|
||||
{
|
||||
public:
|
||||
/**
|
||||
|
|
@ -46,8 +46,9 @@ class Sequence: public std::enable_shared_from_this<Sequence>
|
|||
* which allows for extensible configurations on initialisation.
|
||||
*/
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence>
|
||||
record(std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
|
||||
std::shared_ptr<Sequence> record(
|
||||
std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
|
|
@ -56,14 +57,13 @@ class Sequence: public std::enable_shared_from_this<Sequence>
|
|||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
std::shared_ptr<T> op{
|
||||
new T(tensors, std::forward<TArgs>(params)...) };
|
||||
std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
|
||||
|
||||
return this->record(op);
|
||||
}
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence>
|
||||
record(std::shared_ptr<Algorithm> algorithm, TArgs&&... params)
|
||||
std::shared_ptr<Sequence> record(std::shared_ptr<Algorithm> algorithm,
|
||||
TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
|
|
@ -72,8 +72,8 @@ class Sequence: public std::enable_shared_from_this<Sequence>
|
|||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
std::shared_ptr<T> op{
|
||||
new T(algorithm, std::forward<TArgs>(params)...) };
|
||||
std::shared_ptr<T> op{ new T(algorithm,
|
||||
std::forward<TArgs>(params)...) };
|
||||
|
||||
return this->record(op);
|
||||
}
|
||||
|
|
@ -96,8 +96,8 @@ class Sequence: public std::enable_shared_from_this<Sequence>
|
|||
*/
|
||||
// TODO: Aim to have only a single function with tensors/algorithm
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence>
|
||||
eval(std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
|
||||
std::shared_ptr<Sequence> eval(std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
|
|
@ -106,16 +106,16 @@ class Sequence: public std::enable_shared_from_this<Sequence>
|
|||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
std::shared_ptr<T> op{
|
||||
new T(tensors, std::forward<TArgs>(params)...) };
|
||||
std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
|
||||
|
||||
// TODO: Aim to be able to handle errors when returning without throw except
|
||||
// TODO: Aim to be able to handle errors when returning without throw
|
||||
// except
|
||||
return this->eval(op);
|
||||
}
|
||||
// Needded as otherise can't use initialiser list
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence>
|
||||
eval(std::shared_ptr<Algorithm> algorithm, TArgs&&... params)
|
||||
std::shared_ptr<Sequence> eval(std::shared_ptr<Algorithm> algorithm,
|
||||
TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
|
|
@ -124,8 +124,8 @@ class Sequence: public std::enable_shared_from_this<Sequence>
|
|||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
std::shared_ptr<T> op{
|
||||
new T(algorithm, std::forward<TArgs>(params)...) };
|
||||
std::shared_ptr<T> op{ new T(algorithm,
|
||||
std::forward<TArgs>(params)...) };
|
||||
|
||||
return this->eval(op);
|
||||
}
|
||||
|
|
@ -147,8 +147,9 @@ class Sequence: public std::enable_shared_from_this<Sequence>
|
|||
* @return shared_ptr<Sequence> of the Sequence class itself
|
||||
*/
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence>
|
||||
evalAsync(std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
|
||||
std::shared_ptr<Sequence> evalAsync(
|
||||
std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
|
|
@ -157,15 +158,14 @@ class Sequence: public std::enable_shared_from_this<Sequence>
|
|||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
std::shared_ptr<T> op{
|
||||
new T(tensors, std::forward<TArgs>(params)...) };
|
||||
std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
|
||||
|
||||
return this->evalAsync(op);
|
||||
}
|
||||
// Needed as otherwise it's not possible to use initializer lists
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence>
|
||||
evalAsync(std::shared_ptr<Algorithm> algorithm, TArgs&&... params)
|
||||
std::shared_ptr<Sequence> evalAsync(std::shared_ptr<Algorithm> algorithm,
|
||||
TArgs&&... params)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
|
|
@ -174,8 +174,8 @@ class Sequence: public std::enable_shared_from_this<Sequence>
|
|||
"OpBase derived classes");
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
|
||||
std::shared_ptr<T> op{
|
||||
new T(algorithm, std::forward<TArgs>(params)...) };
|
||||
std::shared_ptr<T> op{ new T(algorithm,
|
||||
std::forward<TArgs>(params)...) };
|
||||
|
||||
return this->evalAsync(op);
|
||||
}
|
||||
|
|
@ -190,7 +190,8 @@ class Sequence: public std::enable_shared_from_this<Sequence>
|
|||
std::shared_ptr<Sequence> evalAwait(uint64_t waitFor = UINT64_MAX);
|
||||
|
||||
/**
|
||||
* Clear function clears all operations currently recorded and starts recording again.
|
||||
* Clear function clears all operations currently recorded and starts
|
||||
* recording again.
|
||||
*/
|
||||
void clear();
|
||||
|
||||
|
|
@ -217,7 +218,6 @@ class Sequence: public std::enable_shared_from_this<Sequence>
|
|||
*/
|
||||
bool isRecording();
|
||||
|
||||
|
||||
bool isInit();
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -4,9 +4,9 @@
|
|||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
#include <SPIRV/GlslangToSpv.h>
|
||||
#include <glslang/Include/ResourceLimits.h>
|
||||
#include <glslang/Public/ShaderLang.h>
|
||||
#include <SPIRV/GlslangToSpv.h>
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
|
|
@ -16,161 +16,162 @@ namespace kp {
|
|||
// Has been adobted by:
|
||||
// https://github.com/KhronosGroup/glslang/blob/master/StandAlone/ResourceLimits.cpp
|
||||
const TBuiltInResource defaultResource = {
|
||||
/* .MaxLights = */ 0,
|
||||
/* .MaxClipPlanes = */ 0,
|
||||
/* .MaxTextureUnits = */ 0,
|
||||
/* .MaxTextureCoords = */ 0,
|
||||
/* .MaxVertexAttribs = */ 64,
|
||||
/* .MaxVertexUniformComponents = */ 4096,
|
||||
/* .MaxVaryingFloats = */ 64,
|
||||
/* .MaxVertexTextureImageUnits = */ 0,
|
||||
/* .MaxCombinedTextureImageUnits = */ 0,
|
||||
/* .MaxTextureImageUnits = */ 0,
|
||||
/* .MaxFragmentUniformComponents = */ 0,
|
||||
/* .MaxDrawBuffers = */ 0,
|
||||
/* .MaxVertexUniformVectors = */ 128,
|
||||
/* .MaxVaryingVectors = */ 8,
|
||||
/* .MaxFragmentUniformVectors = */ 0,
|
||||
/* .MaxVertexOutputVectors = */ 16,
|
||||
/* .MaxFragmentInputVectors = */ 0,
|
||||
/* .MinProgramTexelOffset = */ -8,
|
||||
/* .MaxProgramTexelOffset = */ 7,
|
||||
/* .MaxClipDistances = */ 8,
|
||||
/* .MaxComputeWorkGroupCountX = */ 65535,
|
||||
/* .MaxComputeWorkGroupCountY = */ 65535,
|
||||
/* .MaxComputeWorkGroupCountZ = */ 65535,
|
||||
/* .MaxComputeWorkGroupSizeX = */ 1024,
|
||||
/* .MaxComputeWorkGroupSizeY = */ 1024,
|
||||
/* .MaxComputeWorkGroupSizeZ = */ 64,
|
||||
/* .MaxComputeUniformComponents = */ 1024,
|
||||
/* .MaxComputeTextureImageUnits = */ 16,
|
||||
/* .MaxComputeImageUniforms = */ 8,
|
||||
/* .MaxComputeAtomicCounters = */ 8,
|
||||
/* .MaxComputeAtomicCounterBuffers = */ 1,
|
||||
/* .MaxVaryingComponents = */ 60,
|
||||
/* .MaxVertexOutputComponents = */ 64,
|
||||
/* .MaxGeometryInputComponents = */ 64,
|
||||
/* .MaxGeometryOutputComponents = */ 128,
|
||||
/* .MaxFragmentInputComponents = */ 0,
|
||||
/* .MaxImageUnits = */ 0,
|
||||
/* .MaxCombinedImageUnitsAndFragmentOutputs = */ 0,
|
||||
/* .MaxCombinedShaderOutputResources = */ 8,
|
||||
/* .MaxImageSamples = */ 0,
|
||||
/* .MaxVertexImageUniforms = */ 0,
|
||||
/* .MaxTessControlImageUniforms = */ 0,
|
||||
/* .MaxTessEvaluationImageUniforms = */ 0,
|
||||
/* .MaxGeometryImageUniforms = */ 0,
|
||||
/* .MaxFragmentImageUniforms = */ 0,
|
||||
/* .MaxCombinedImageUniforms = */ 0,
|
||||
/* .MaxGeometryTextureImageUnits = */ 0,
|
||||
/* .MaxGeometryOutputVertices = */ 256,
|
||||
/* .MaxGeometryTotalOutputComponents = */ 1024,
|
||||
/* .MaxGeometryUniformComponents = */ 1024,
|
||||
/* .MaxGeometryVaryingComponents = */ 64,
|
||||
/* .MaxTessControlInputComponents = */ 128,
|
||||
/* .MaxTessControlOutputComponents = */ 128,
|
||||
/* .MaxTessControlTextureImageUnits = */ 0,
|
||||
/* .MaxTessControlUniformComponents = */ 1024,
|
||||
/* .MaxTessControlTotalOutputComponents = */ 4096,
|
||||
/* .MaxTessEvaluationInputComponents = */ 128,
|
||||
/* .MaxTessEvaluationOutputComponents = */ 128,
|
||||
/* .MaxTessEvaluationTextureImageUnits = */ 16,
|
||||
/* .MaxTessEvaluationUniformComponents = */ 1024,
|
||||
/* .MaxTessPatchComponents = */ 120,
|
||||
/* .MaxPatchVertices = */ 32,
|
||||
/* .MaxTessGenLevel = */ 64,
|
||||
/* .MaxViewports = */ 16,
|
||||
/* .MaxVertexAtomicCounters = */ 0,
|
||||
/* .MaxTessControlAtomicCounters = */ 0,
|
||||
/* .MaxTessEvaluationAtomicCounters = */ 0,
|
||||
/* .MaxGeometryAtomicCounters = */ 0,
|
||||
/* .MaxFragmentAtomicCounters = */ 0,
|
||||
/* .MaxCombinedAtomicCounters = */ 8,
|
||||
/* .MaxAtomicCounterBindings = */ 1,
|
||||
/* .MaxVertexAtomicCounterBuffers = */ 0,
|
||||
/* .MaxTessControlAtomicCounterBuffers = */ 0,
|
||||
/* .MaxTessEvaluationAtomicCounterBuffers = */ 0,
|
||||
/* .MaxGeometryAtomicCounterBuffers = */ 0,
|
||||
/* .MaxFragmentAtomicCounterBuffers = */ 0,
|
||||
/* .MaxCombinedAtomicCounterBuffers = */ 1,
|
||||
/* .MaxAtomicCounterBufferSize = */ 16384,
|
||||
/* .MaxTransformFeedbackBuffers = */ 4,
|
||||
/* .MaxTransformFeedbackInterleavedComponents = */ 64,
|
||||
/* .MaxCullDistances = */ 8,
|
||||
/* .MaxCombinedClipAndCullDistances = */ 8,
|
||||
/* .MaxSamples = */ 4,
|
||||
/* .maxMeshOutputVerticesNV = */ 256,
|
||||
/* .maxMeshOutputPrimitivesNV = */ 512,
|
||||
/* .maxMeshWorkGroupSizeX_NV = */ 32,
|
||||
/* .maxMeshWorkGroupSizeY_NV = */ 1,
|
||||
/* .maxMeshWorkGroupSizeZ_NV = */ 1,
|
||||
/* .maxTaskWorkGroupSizeX_NV = */ 32,
|
||||
/* .maxTaskWorkGroupSizeY_NV = */ 1,
|
||||
/* .maxTaskWorkGroupSizeZ_NV = */ 1,
|
||||
/* .maxMeshViewCountNV = */ 4,
|
||||
/* .maxDualSourceDrawBuffersEXT = */ 1,
|
||||
/* .MaxLights = */ 0,
|
||||
/* .MaxClipPlanes = */ 0,
|
||||
/* .MaxTextureUnits = */ 0,
|
||||
/* .MaxTextureCoords = */ 0,
|
||||
/* .MaxVertexAttribs = */ 64,
|
||||
/* .MaxVertexUniformComponents = */ 4096,
|
||||
/* .MaxVaryingFloats = */ 64,
|
||||
/* .MaxVertexTextureImageUnits = */ 0,
|
||||
/* .MaxCombinedTextureImageUnits = */ 0,
|
||||
/* .MaxTextureImageUnits = */ 0,
|
||||
/* .MaxFragmentUniformComponents = */ 0,
|
||||
/* .MaxDrawBuffers = */ 0,
|
||||
/* .MaxVertexUniformVectors = */ 128,
|
||||
/* .MaxVaryingVectors = */ 8,
|
||||
/* .MaxFragmentUniformVectors = */ 0,
|
||||
/* .MaxVertexOutputVectors = */ 16,
|
||||
/* .MaxFragmentInputVectors = */ 0,
|
||||
/* .MinProgramTexelOffset = */ -8,
|
||||
/* .MaxProgramTexelOffset = */ 7,
|
||||
/* .MaxClipDistances = */ 8,
|
||||
/* .MaxComputeWorkGroupCountX = */ 65535,
|
||||
/* .MaxComputeWorkGroupCountY = */ 65535,
|
||||
/* .MaxComputeWorkGroupCountZ = */ 65535,
|
||||
/* .MaxComputeWorkGroupSizeX = */ 1024,
|
||||
/* .MaxComputeWorkGroupSizeY = */ 1024,
|
||||
/* .MaxComputeWorkGroupSizeZ = */ 64,
|
||||
/* .MaxComputeUniformComponents = */ 1024,
|
||||
/* .MaxComputeTextureImageUnits = */ 16,
|
||||
/* .MaxComputeImageUniforms = */ 8,
|
||||
/* .MaxComputeAtomicCounters = */ 8,
|
||||
/* .MaxComputeAtomicCounterBuffers = */ 1,
|
||||
/* .MaxVaryingComponents = */ 60,
|
||||
/* .MaxVertexOutputComponents = */ 64,
|
||||
/* .MaxGeometryInputComponents = */ 64,
|
||||
/* .MaxGeometryOutputComponents = */ 128,
|
||||
/* .MaxFragmentInputComponents = */ 0,
|
||||
/* .MaxImageUnits = */ 0,
|
||||
/* .MaxCombinedImageUnitsAndFragmentOutputs = */ 0,
|
||||
/* .MaxCombinedShaderOutputResources = */ 8,
|
||||
/* .MaxImageSamples = */ 0,
|
||||
/* .MaxVertexImageUniforms = */ 0,
|
||||
/* .MaxTessControlImageUniforms = */ 0,
|
||||
/* .MaxTessEvaluationImageUniforms = */ 0,
|
||||
/* .MaxGeometryImageUniforms = */ 0,
|
||||
/* .MaxFragmentImageUniforms = */ 0,
|
||||
/* .MaxCombinedImageUniforms = */ 0,
|
||||
/* .MaxGeometryTextureImageUnits = */ 0,
|
||||
/* .MaxGeometryOutputVertices = */ 256,
|
||||
/* .MaxGeometryTotalOutputComponents = */ 1024,
|
||||
/* .MaxGeometryUniformComponents = */ 1024,
|
||||
/* .MaxGeometryVaryingComponents = */ 64,
|
||||
/* .MaxTessControlInputComponents = */ 128,
|
||||
/* .MaxTessControlOutputComponents = */ 128,
|
||||
/* .MaxTessControlTextureImageUnits = */ 0,
|
||||
/* .MaxTessControlUniformComponents = */ 1024,
|
||||
/* .MaxTessControlTotalOutputComponents = */ 4096,
|
||||
/* .MaxTessEvaluationInputComponents = */ 128,
|
||||
/* .MaxTessEvaluationOutputComponents = */ 128,
|
||||
/* .MaxTessEvaluationTextureImageUnits = */ 16,
|
||||
/* .MaxTessEvaluationUniformComponents = */ 1024,
|
||||
/* .MaxTessPatchComponents = */ 120,
|
||||
/* .MaxPatchVertices = */ 32,
|
||||
/* .MaxTessGenLevel = */ 64,
|
||||
/* .MaxViewports = */ 16,
|
||||
/* .MaxVertexAtomicCounters = */ 0,
|
||||
/* .MaxTessControlAtomicCounters = */ 0,
|
||||
/* .MaxTessEvaluationAtomicCounters = */ 0,
|
||||
/* .MaxGeometryAtomicCounters = */ 0,
|
||||
/* .MaxFragmentAtomicCounters = */ 0,
|
||||
/* .MaxCombinedAtomicCounters = */ 8,
|
||||
/* .MaxAtomicCounterBindings = */ 1,
|
||||
/* .MaxVertexAtomicCounterBuffers = */ 0,
|
||||
/* .MaxTessControlAtomicCounterBuffers = */ 0,
|
||||
/* .MaxTessEvaluationAtomicCounterBuffers = */ 0,
|
||||
/* .MaxGeometryAtomicCounterBuffers = */ 0,
|
||||
/* .MaxFragmentAtomicCounterBuffers = */ 0,
|
||||
/* .MaxCombinedAtomicCounterBuffers = */ 1,
|
||||
/* .MaxAtomicCounterBufferSize = */ 16384,
|
||||
/* .MaxTransformFeedbackBuffers = */ 4,
|
||||
/* .MaxTransformFeedbackInterleavedComponents = */ 64,
|
||||
/* .MaxCullDistances = */ 8,
|
||||
/* .MaxCombinedClipAndCullDistances = */ 8,
|
||||
/* .MaxSamples = */ 4,
|
||||
/* .maxMeshOutputVerticesNV = */ 256,
|
||||
/* .maxMeshOutputPrimitivesNV = */ 512,
|
||||
/* .maxMeshWorkGroupSizeX_NV = */ 32,
|
||||
/* .maxMeshWorkGroupSizeY_NV = */ 1,
|
||||
/* .maxMeshWorkGroupSizeZ_NV = */ 1,
|
||||
/* .maxTaskWorkGroupSizeX_NV = */ 32,
|
||||
/* .maxTaskWorkGroupSizeY_NV = */ 1,
|
||||
/* .maxTaskWorkGroupSizeZ_NV = */ 1,
|
||||
/* .maxMeshViewCountNV = */ 4,
|
||||
/* .maxDualSourceDrawBuffersEXT = */ 1,
|
||||
|
||||
/* .limits = */
|
||||
{
|
||||
/* .nonInductiveForLoops = */ 1,
|
||||
/* .whileLoops = */ 1,
|
||||
/* .doWhileLoops = */ 1,
|
||||
/* .generalUniformIndexing = */ 1,
|
||||
/* .generalAttributeMatrixVectorIndexing = */ 1,
|
||||
/* .generalVaryingIndexing = */ 1,
|
||||
/* .generalSamplerIndexing = */ 1,
|
||||
/* .generalVariableIndexing = */ 1,
|
||||
/* .generalConstantMatrixVectorIndexing = */ 1,
|
||||
}
|
||||
};
|
||||
|
||||
/* .limits = */ {
|
||||
/* .nonInductiveForLoops = */ 1,
|
||||
/* .whileLoops = */ 1,
|
||||
/* .doWhileLoops = */ 1,
|
||||
/* .generalUniformIndexing = */ 1,
|
||||
/* .generalAttributeMatrixVectorIndexing = */ 1,
|
||||
/* .generalVaryingIndexing = */ 1,
|
||||
/* .generalSamplerIndexing = */ 1,
|
||||
/* .generalVariableIndexing = */ 1,
|
||||
/* .generalConstantMatrixVectorIndexing = */ 1,
|
||||
}};
|
||||
|
||||
/**
|
||||
Shader utily class with functions to compile and process glsl files.
|
||||
*/
|
||||
class Shader {
|
||||
public:
|
||||
class Shader
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Compile multiple sources with optional filenames. Currently this function
|
||||
* uses the glslang C++ interface which is not thread safe so this funciton
|
||||
* should not be called from multiple threads concurrently. If you have a
|
||||
* online shader processing multithreading use-case that can't use offline
|
||||
* online shader processing multithreading use-case that can't use offline
|
||||
* compilation please open an issue.
|
||||
*
|
||||
* @param sources A list of raw glsl shaders in string format
|
||||
* @param files A list of file names respective to each of the sources
|
||||
* @param entryPoint The function name to use as entry point
|
||||
* @param definitions List of pairs containing key value definitions
|
||||
* @param resourcesLimit A list that contains the resource limits for the GLSL compiler
|
||||
* @param resourcesLimit A list that contains the resource limits for the
|
||||
* GLSL compiler
|
||||
* @return The compiled SPIR-V binary in unsigned int32 format
|
||||
*/
|
||||
static std::vector<uint32_t> compile_sources(
|
||||
const std::vector<std::string>& sources,
|
||||
const std::vector<std::string>& files = {},
|
||||
const std::string& entryPoint = "main",
|
||||
std::vector<std::pair<std::string,std::string>> definitions = {},
|
||||
const TBuiltInResource& resources = defaultResource);
|
||||
const std::vector<std::string>& sources,
|
||||
const std::vector<std::string>& files = {},
|
||||
const std::string& entryPoint = "main",
|
||||
std::vector<std::pair<std::string, std::string>> definitions = {},
|
||||
const TBuiltInResource& resources = defaultResource);
|
||||
|
||||
/**
|
||||
* Compile a single glslang source from string value. Currently this function
|
||||
* uses the glslang C++ interface which is not thread safe so this funciton
|
||||
* should not be called from multiple threads concurrently. If you have a
|
||||
* online shader processing multithreading use-case that can't use offline
|
||||
* compilation please open an issue.
|
||||
* Compile a single glslang source from string value. Currently this
|
||||
* function uses the glslang C++ interface which is not thread safe so this
|
||||
* funciton should not be called from multiple threads concurrently. If you
|
||||
* have a online shader processing multithreading use-case that can't use
|
||||
* offline compilation please open an issue.
|
||||
*
|
||||
* @param source An individual raw glsl shader in string format
|
||||
* @param entryPoint The function name to use as entry point
|
||||
* @param definitions List of pairs containing key value definitions
|
||||
* @param resourcesLimit A list that contains the resource limits for the GLSL compiler
|
||||
* @param resourcesLimit A list that contains the resource limits for the
|
||||
* GLSL compiler
|
||||
* @return The compiled SPIR-V binary in unsigned int32 format
|
||||
*/
|
||||
static std::vector<uint32_t> compile_source(
|
||||
const std::string& source,
|
||||
const std::string& entryPoint = "main",
|
||||
std::vector<std::pair<std::string,std::string>> definitions = {},
|
||||
const TBuiltInResource& resources = defaultResource);
|
||||
|
||||
const std::string& source,
|
||||
const std::string& entryPoint = "main",
|
||||
std::vector<std::pair<std::string, std::string>> definitions = {},
|
||||
const TBuiltInResource& resources = defaultResource);
|
||||
};
|
||||
|
||||
|
||||
|
||||
}
|
||||
#endif // DKOMPUTE_DISABLE_SHADER_UTILS
|
||||
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ class Tensor
|
|||
* otherwise there is no need to copy from host memory.
|
||||
*/
|
||||
void rebuild(const std::vector<float>& data,
|
||||
TensorTypes tensorType = TensorTypes::eDevice);
|
||||
TensorTypes tensorType = TensorTypes::eDevice);
|
||||
|
||||
/**
|
||||
* Destroys and frees the GPU resources which include the buffer and memory.
|
||||
|
|
@ -125,9 +125,8 @@ class Tensor
|
|||
* @param createBarrier Whether to create a barrier that ensures the data is
|
||||
* copied before further operations. Default is true.
|
||||
*/
|
||||
void recordCopyFromStagingToDevice(
|
||||
const vk::CommandBuffer& commandBuffer,
|
||||
bool createBarrier);
|
||||
void recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer,
|
||||
bool createBarrier);
|
||||
|
||||
/**
|
||||
* Records a copy from the internal device memory to the staging memory
|
||||
|
|
@ -138,9 +137,8 @@ class Tensor
|
|||
* @param createBarrier Whether to create a barrier that ensures the data is
|
||||
* copied before further operations. Default is true.
|
||||
*/
|
||||
void recordCopyFromDeviceToStaging(
|
||||
const vk::CommandBuffer& commandBuffer,
|
||||
bool createBarrier);
|
||||
void recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer,
|
||||
bool createBarrier);
|
||||
|
||||
/**
|
||||
* Records the buffer memory barrier into the command buffer which
|
||||
|
|
@ -152,12 +150,11 @@ class Tensor
|
|||
* @param scrStageMask Pipeline stage flags for source stage mask
|
||||
* @param dstStageMask Pipeline stage flags for destination stage mask
|
||||
*/
|
||||
void recordBufferMemoryBarrier(
|
||||
const vk::CommandBuffer& commandBuffer,
|
||||
vk::AccessFlagBits srcAccessMask,
|
||||
vk::AccessFlagBits dstAccessMask,
|
||||
vk::PipelineStageFlagBits srcStageMask,
|
||||
vk::PipelineStageFlagBits dstStageMask);
|
||||
void recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
|
||||
vk::AccessFlagBits srcAccessMask,
|
||||
vk::AccessFlagBits dstAccessMask,
|
||||
vk::PipelineStageFlagBits srcStageMask,
|
||||
vk::PipelineStageFlagBits dstStageMask);
|
||||
|
||||
/**
|
||||
* Constructs a vulkan descriptor buffer info which can be used to specify
|
||||
|
|
@ -205,11 +202,11 @@ class Tensor
|
|||
std::shared_ptr<vk::DeviceMemory> memory,
|
||||
vk::MemoryPropertyFlags memoryPropertyFlags);
|
||||
void recordCopyBuffer(const vk::CommandBuffer& commandBuffer,
|
||||
std::shared_ptr<vk::Buffer> bufferFrom,
|
||||
std::shared_ptr<vk::Buffer> bufferTo,
|
||||
vk::DeviceSize bufferSize,
|
||||
vk::BufferCopy copyRegion,
|
||||
bool createBarrier);
|
||||
std::shared_ptr<vk::Buffer> bufferFrom,
|
||||
std::shared_ptr<vk::Buffer> bufferTo,
|
||||
vk::DeviceSize bufferSize,
|
||||
vk::BufferCopy copyRegion,
|
||||
bool createBarrier);
|
||||
|
||||
// Private util functions
|
||||
vk::BufferUsageFlags getPrimaryBufferUsageFlags();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue