Updated to enable for opmult to work

This commit is contained in:
Alejandro Saucedo 2021-02-26 18:58:19 +00:00
parent fb617d1722
commit 3304767f2c
11 changed files with 49 additions and 39 deletions

View file

@ -1686,7 +1686,7 @@ class OpAlgoDispatch : public OpBase
{
public:
OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm);
OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm, bool skipAlgoInit = false);
/**
* Default destructor, which is in charge of destroying the algorithm
@ -1745,7 +1745,7 @@ class OpMult : public OpAlgoDispatch
* @param komputeWorkgroup Optional parameter to specify the layout for processing
*/
OpMult(std::vector<std::shared_ptr<Tensor>> tensors, std::shared_ptr<Algorithm> algorithm)
: OpAlgoDispatch(algorithm)
: OpAlgoDispatch(algorithm, true)
{
KP_LOG_DEBUG("Kompute OpMult constructor with params");

View file

@ -15,7 +15,14 @@ Algorithm::Algorithm(
KP_LOG_DEBUG("Kompute Algorithm Constructor with device");
this->mDevice = device;
this->rebuild(tensors, spirv, workgroup, specializationConstants, pushConstants);
if (tensors.size() && spirv.size()) {
KP_LOG_INFO("Kompute Algorithm initialising with tensor size: {} and spirv size: {}", tensors.size(), spirv.size());
this->rebuild(tensors, spirv, workgroup, specializationConstants, pushConstants);
}
else {
KP_LOG_INFO("Kompute Algorithm constructor with empty tensors and or spirv so not rebuilding vulkan components");
}
}
Algorithm::~Algorithm()
@ -39,10 +46,10 @@ Algorithm::rebuild(
this->mSpirv = spirv;
this->mSpecializationConstants = specializationConstants;
this->mPushConstants = pushConstants;
this->setWorkgroup(workgroup);
this->setWorkgroup(workgroup, this->mTensors.size() ? this->mTensors[0]->size() : 1);
// Descriptor pool is created first so if available then destroy all before rebuild
if (this->mFreeDescriptorPool) {
if (this->isInit()) {
this->destroy();
}
@ -340,20 +347,6 @@ Algorithm::recordDispatch(std::shared_ptr<vk::CommandBuffer> commandBuffer)
{
KP_LOG_DEBUG("Kompute Algorithm calling record dispatch");
if(this->mPipelineCache) {
KP_LOG_WARN("Value valid");
}
else {
KP_LOG_WARN("NOT Value valid");
}
if(this->mPipeline) {
KP_LOG_WARN("Value valid");
}
else {
KP_LOG_WARN("NOT Value valid");
}
commandBuffer->bindPipeline(vk::PipelineBindPoint::eCompute,
*this->mPipeline);
@ -376,10 +369,7 @@ Algorithm::recordDispatch(std::shared_ptr<vk::CommandBuffer> commandBuffer)
void
Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize) {
KP_LOG_INFO("Kompute OpAlgoCreate setting dispatch size X: {}, Y: {}, Z: {}",
this->mWorkgroup[0],
this->mWorkgroup[1],
this->mWorkgroup[2]);
KP_LOG_INFO("Kompute OpAlgoCreate setting dispatch size");
// The dispatch size is set up based on either explicitly provided template
// parameters or by default it would take the shape and size of the tensors
@ -394,6 +384,11 @@ Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize) {
} else {
this->mWorkgroup = { minSize, 1, 1 };
}
KP_LOG_INFO("Kompute OpAlgoCreate set dispatch size X: {}, Y: {}, Z: {}",
this->mWorkgroup[0],
this->mWorkgroup[1],
this->mWorkgroup[2]);
}
const Workgroup&

View file

@ -33,7 +33,7 @@ Manager::Manager()
Manager::Manager(uint32_t physicalDeviceIndex,
const std::vector<uint32_t>& familyQueueIndices)
{
this->mManageResources = false;
this->mManageResources = true;
this->createInstance();
this->createDevice(familyQueueIndices, physicalDeviceIndex);
@ -43,7 +43,7 @@ Manager::Manager(std::shared_ptr<vk::Instance> instance,
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device)
{
this->mManageResources = true;
this->mManageResources = false;
this->mInstance = instance;
this->mPhysicalDevice = physicalDevice;

View file

@ -4,10 +4,14 @@
namespace kp {
OpAlgoDispatch::OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm)
OpAlgoDispatch::OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm, bool skipAlgoCheck)
{
KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor");
if (!skipAlgoCheck && !algorithm->isInit()) {
throw std::runtime_error("Kompute OpAlgoDispatch constructor with non initialised algorithm");
}
this->mAlgorithm = algorithm;
}

View file

@ -19,6 +19,8 @@ OpTensorSyncDevice::OpTensorSyncDevice(
OpTensorSyncDevice::~OpTensorSyncDevice()
{
KP_LOG_DEBUG("Kompute OpTensorSyncDevice destructor started");
this->mTensors.clear();
}
void

View file

@ -78,8 +78,7 @@ Sequence::eval()
std::shared_ptr<Sequence>
Sequence::eval(std::shared_ptr<OpBase> op) {
this->clear();
this->record(op);
this->eval();
return this->record(op)->eval();
}
std::shared_ptr<Sequence>
@ -88,6 +87,7 @@ Sequence::evalAsync()
if (this->isRecording()) {
this->end();
}
if (this->mIsRunning) {
throw std::runtime_error("Kompute Sequence evalAsync called when an eval async was "
"called without successful wait");

View file

@ -17,7 +17,7 @@ class OpAlgoDispatch : public OpBase
{
public:
OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm);
OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm, bool skipAlgoInit = false);
/**
* Default destructor, which is in charge of destroying the algorithm

View file

@ -33,7 +33,7 @@ class OpMult : public OpAlgoDispatch
* @param komputeWorkgroup Optional parameter to specify the layout for processing
*/
OpMult(std::vector<std::shared_ptr<Tensor>> tensors, std::shared_ptr<Algorithm> algorithm)
: OpAlgoDispatch(algorithm)
: OpAlgoDispatch(algorithm, true)
{
KP_LOG_DEBUG("Kompute OpMult constructor with params");

View file

@ -80,8 +80,11 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
std::vector<std::shared_ptr<kp::Tensor>> inputsAsyncB;
std::vector<std::shared_ptr<kp::Algorithm>> algosAsync;
for (uint32_t i = 0; i < numParallel; i++) {
inputsAsyncB.push_back(mgr.tensor(data));
algosAsync.push_back(mgr.algorithm({inputsAsyncB[i]}, spirv));
}
std::vector<std::shared_ptr<kp::Sequence>> sqs;
@ -93,7 +96,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
auto startAsync = std::chrono::high_resolution_clock::now();
for (uint32_t i = 0; i < numParallel; i++) {
sqs[i]->evalAsync<kp::OpAlgoDispatch>(algorithms[i]);
sqs[i]->evalAsync<kp::OpAlgoDispatch>(algosAsync[i]);
}
for (uint32_t i = 0; i < numParallel; i++) {
@ -157,8 +160,8 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution)
sq1->eval<kp::OpTensorSyncLocal>({ tensorA, tensorB });
std::shared_ptr<kp::Algorithm> algo1 = mgr.algorithm({tensorA});
std::shared_ptr<kp::Algorithm> algo2 = mgr.algorithm({tensorB});
std::shared_ptr<kp::Algorithm> algo1 = mgr.algorithm({tensorA}, spirv);
std::shared_ptr<kp::Algorithm> algo2 = mgr.algorithm({tensorB}, spirv);
sq1->evalAsync<kp::OpAlgoDispatch>(algo1);
sq2->evalAsync<kp::OpAlgoDispatch>(algo2);

View file

@ -74,7 +74,7 @@ TEST(TestDestroy, TestDestroyTensorVector)
mgr.sequence()
->record<kp::OpTensorSyncDevice>(algo->getTensors())
->record<kp::OpAlgoDispatch>(algo)
->record<kp::OpTensorSyncDevice>(algo->getTensors())
->record<kp::OpTensorSyncLocal>(algo->getTensors())
->eval();
tensorA->destroy();
@ -111,12 +111,14 @@ TEST(TestDestroy, TestDestroySequenceSingle)
tensorA = mgr.tensor({0, 0, 0});
mgr.sequence()
sq = mgr.sequence()
->record<kp::OpTensorSyncDevice>({tensorA})
->record<kp::OpAlgoDispatch>(mgr.algorithm({tensorA}, spirv))
->record<kp::OpTensorSyncLocal>({tensorA})
->eval();
sq->destroy();
EXPECT_FALSE(sq->isInit());
}
}

View file

@ -32,13 +32,15 @@ TEST(TestLogisticRegression, TestMainLogisticRegression)
wIn, wOutI, wOutJ,
bIn, bOut, lOut };
mgr.sequence()->eval<kp::OpTensorSyncDevice>(params);
std::vector<uint32_t> spirv = std::vector<uint32_t>(
(uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
(uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv +
kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));
(uint32_t*)kp::shader_data::test_shaders_glsl_test_logistic_regression_comp_spv,
(uint32_t*)(kp::shader_data::test_shaders_glsl_test_logistic_regression_comp_spv +
kp::shader_data::test_shaders_glsl_test_logistic_regression_comp_spv_len));
std::shared_ptr<kp::Algorithm> algorithm =
mgr.algorithm(params, spirv, kp::Workgroup(), kp::Constants({5.0}));
mgr.algorithm(params, spirv, kp::Workgroup({5}), kp::Constants({5.0}));
std::shared_ptr<kp::Sequence> sq =
mgr.sequence()
@ -104,6 +106,8 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy)
wIn, wOutI, wOutJ,
bIn, bOut, lOut };
mgr.sequence()->record<kp::OpTensorSyncDevice>(params)->eval();
std::vector<uint32_t> spirv = std::vector<uint32_t>(
(uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
(uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv +