Updated to current multiple queue implementation
This commit is contained in:
parent
3e5364fc44
commit
4e697bb787
4 changed files with 151 additions and 79 deletions
|
|
@ -1253,19 +1253,22 @@ class Manager
|
|||
Manager();
|
||||
|
||||
/**
|
||||
Similar to base constructor but allows the user to provide the device
|
||||
they would like to create the resources on.
|
||||
* Similar to base constructor but allows the user to provide the device
|
||||
* they would like to create the resources on.
|
||||
*
|
||||
* @param physicalDeviceIndex The index of the physical device to use
|
||||
* @param totalQueues The total number of compute queues to create.
|
||||
*/
|
||||
Manager(uint32_t physicalDeviceIndex);
|
||||
Manager(uint32_t physicalDeviceIndex, uint32_t totalComputeQueues = 1);
|
||||
|
||||
/**
|
||||
* Manager constructor which allows your own vulkan application to integrate
|
||||
* with the vulkan kompute use.
|
||||
*
|
||||
* @param instance Vulkan compute instance to base this application
|
||||
* @physicalDevice Vulkan physical device to use for application
|
||||
* @device Vulkan logical device to use for all base resources
|
||||
* @physicalDeviceIndex Index for vulkan physical device used
|
||||
* @param physicalDevice Vulkan physical device to use for application
|
||||
* @param device Vulkan logical device to use for all base resources
|
||||
* @param physicalDeviceIndex Index for vulkan physical device used
|
||||
*/
|
||||
Manager(std::shared_ptr<vk::Instance> instance,
|
||||
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
|
|
@ -1290,6 +1293,16 @@ class Manager
|
|||
std::weak_ptr<Sequence> getOrCreateManagedSequence(
|
||||
std::string sequenceName);
|
||||
|
||||
/**
|
||||
* Create a new managed Kompute sequence so it's available within the manager.
|
||||
*
|
||||
* @param sequenceName The name for the named sequence to be created
|
||||
* @param queueIndex The queue to use from the available queues
|
||||
* @return Weak pointer to the manager owned sequence resource
|
||||
*/
|
||||
std::weak_ptr<Sequence> createManagedSequence(
|
||||
std::string sequenceName, uint32_t queueIndex = 0);
|
||||
|
||||
/**
|
||||
* Operation that adds extra operations to existing or new created
|
||||
* sequences.
|
||||
|
|
@ -1342,7 +1355,12 @@ class Manager
|
|||
std::weak_ptr<Sequence> sqWeakPtr =
|
||||
this->getOrCreateManagedSequence(sequenceName);
|
||||
|
||||
if (std::shared_ptr<kp::Sequence> sq = sqWeakPtr.lock()) {
|
||||
std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator found =
|
||||
this->mManagedSequences.find(sequenceName);
|
||||
|
||||
if (found == this->mManagedSequences.end()) {
|
||||
std::shared_ptr<Sequence> sq = found->second;
|
||||
|
||||
SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence BEGIN");
|
||||
sq->begin();
|
||||
|
||||
|
|
@ -1355,6 +1373,9 @@ class Manager
|
|||
SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence EVAL");
|
||||
sq->evalAsync();
|
||||
}
|
||||
else {
|
||||
SPDLOG_ERROR("Kompute Manager evalOpAsync sequence [{}] not found", sequenceName);
|
||||
}
|
||||
SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence SUCCESS");
|
||||
}
|
||||
|
||||
|
|
@ -1382,7 +1403,7 @@ class Manager
|
|||
SPDLOG_DEBUG("Kompute Manager evalOpAwait running sequence SUCCESS");
|
||||
}
|
||||
else {
|
||||
SPDLOG_ERROR("Sequence not found");
|
||||
SPDLOG_ERROR("Kompute Manager evalOpAwait Sequence not found");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1437,7 +1458,7 @@ class Manager
|
|||
std::shared_ptr<vk::Device> mDevice = nullptr;
|
||||
bool mFreeDevice = false;
|
||||
uint32_t mComputeQueueFamilyIndex = -1;
|
||||
std::shared_ptr<vk::Queue> mComputeQueue = nullptr;
|
||||
std::vector<std::shared_ptr<vk::Queue>> mComputeQueues;
|
||||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::unordered_map<std::string, std::shared_ptr<Sequence>>
|
||||
|
|
@ -1452,7 +1473,7 @@ class Manager
|
|||
|
||||
// Create functions
|
||||
void createInstance();
|
||||
void createDevice();
|
||||
void createDevice(uint32_t totalComputeQueues);
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -25,15 +25,15 @@ debugMessageCallback(VkDebugReportFlagsEXT flags,
|
|||
#endif
|
||||
|
||||
Manager::Manager()
|
||||
: Manager(0)
|
||||
: Manager(0, 1)
|
||||
{}
|
||||
|
||||
Manager::Manager(uint32_t physicalDeviceIndex)
|
||||
Manager::Manager(uint32_t physicalDeviceIndex, uint32_t totalComputeQueues)
|
||||
{
|
||||
this->mPhysicalDeviceIndex = physicalDeviceIndex;
|
||||
|
||||
this->createInstance();
|
||||
this->createDevice();
|
||||
this->createDevice(totalComputeQueues);
|
||||
}
|
||||
|
||||
Manager::Manager(std::shared_ptr<vk::Instance> instance,
|
||||
|
|
@ -98,19 +98,27 @@ Manager::getOrCreateManagedSequence(std::string sequenceName)
|
|||
this->mManagedSequences.find(sequenceName);
|
||||
|
||||
if (found == this->mManagedSequences.end()) {
|
||||
std::shared_ptr<Sequence> sq =
|
||||
std::make_shared<Sequence>(this->mPhysicalDevice,
|
||||
this->mDevice,
|
||||
this->mComputeQueue,
|
||||
this->mComputeQueueFamilyIndex);
|
||||
sq->init();
|
||||
this->mManagedSequences.insert({ sequenceName, sq });
|
||||
return sq;
|
||||
return this->createManagedSequence(sequenceName);
|
||||
} else {
|
||||
return found->second;
|
||||
}
|
||||
}
|
||||
|
||||
std::weak_ptr<Sequence>
|
||||
Manager::createManagedSequence(std::string sequenceName, uint32_t queueIndex) {
|
||||
|
||||
SPDLOG_DEBUG("Kompute Manager createManagedSequence with sequenceName: {} and queueIndex: {}", sequenceName, queueIndex);
|
||||
|
||||
std::shared_ptr<Sequence> sq =
|
||||
std::make_shared<Sequence>(this->mPhysicalDevice,
|
||||
this->mDevice,
|
||||
this->mComputeQueues[queueIndex],
|
||||
this->mComputeQueueFamilyIndex);
|
||||
sq->init();
|
||||
this->mManagedSequences.insert({ sequenceName, sq });
|
||||
return sq;
|
||||
}
|
||||
|
||||
void
|
||||
Manager::createInstance()
|
||||
{
|
||||
|
|
@ -197,7 +205,7 @@ Manager::createInstance()
|
|||
}
|
||||
|
||||
void
|
||||
Manager::createDevice()
|
||||
Manager::createDevice(uint32_t totalComputeQueues)
|
||||
{
|
||||
|
||||
SPDLOG_DEBUG("Kompute Manager creating Device");
|
||||
|
|
@ -248,7 +256,7 @@ Manager::createDevice()
|
|||
}
|
||||
|
||||
const float defaultQueuePriority(0.0f);
|
||||
const uint32_t defaultQueueCount(1);
|
||||
const uint32_t defaultQueueCount(totalComputeQueues);
|
||||
vk::DeviceQueueCreateInfo deviceQueueCreateInfo(
|
||||
vk::DeviceQueueCreateFlags(),
|
||||
this->mComputeQueueFamilyIndex,
|
||||
|
|
@ -264,9 +272,16 @@ Manager::createDevice()
|
|||
&deviceCreateInfo, nullptr, this->mDevice.get());
|
||||
SPDLOG_DEBUG("Kompute Manager device created");
|
||||
|
||||
this->mComputeQueue = std::make_shared<vk::Queue>();
|
||||
this->mDevice->getQueue(
|
||||
this->mComputeQueueFamilyIndex, 0, this->mComputeQueue.get());
|
||||
for (uint32_t i = 0; i < totalComputeQueues; i++)
|
||||
{
|
||||
std::shared_ptr<vk::Queue> currQueue = std::make_shared<vk::Queue>();
|
||||
|
||||
this->mDevice->getQueue(
|
||||
this->mComputeQueueFamilyIndex, i, currQueue.get());
|
||||
|
||||
this->mComputeQueues.push_back(currQueue);
|
||||
}
|
||||
|
||||
SPDLOG_DEBUG("Kompute Manager compute queue obtained");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,19 +25,22 @@ class Manager
|
|||
Manager();
|
||||
|
||||
/**
|
||||
Similar to base constructor but allows the user to provide the device
|
||||
they would like to create the resources on.
|
||||
* Similar to base constructor but allows the user to provide the device
|
||||
* they would like to create the resources on.
|
||||
*
|
||||
* @param physicalDeviceIndex The index of the physical device to use
|
||||
* @param totalQueues The total number of compute queues to create.
|
||||
*/
|
||||
Manager(uint32_t physicalDeviceIndex);
|
||||
Manager(uint32_t physicalDeviceIndex, uint32_t totalComputeQueues = 1);
|
||||
|
||||
/**
|
||||
* Manager constructor which allows your own vulkan application to integrate
|
||||
* with the vulkan kompute use.
|
||||
*
|
||||
* @param instance Vulkan compute instance to base this application
|
||||
* @physicalDevice Vulkan physical device to use for application
|
||||
* @device Vulkan logical device to use for all base resources
|
||||
* @physicalDeviceIndex Index for vulkan physical device used
|
||||
* @param physicalDevice Vulkan physical device to use for application
|
||||
* @param device Vulkan logical device to use for all base resources
|
||||
* @param physicalDeviceIndex Index for vulkan physical device used
|
||||
*/
|
||||
Manager(std::shared_ptr<vk::Instance> instance,
|
||||
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
|
|
@ -62,6 +65,16 @@ class Manager
|
|||
std::weak_ptr<Sequence> getOrCreateManagedSequence(
|
||||
std::string sequenceName);
|
||||
|
||||
/**
|
||||
* Create a new managed Kompute sequence so it's available within the manager.
|
||||
*
|
||||
* @param sequenceName The name for the named sequence to be created
|
||||
* @param queueIndex The queue to use from the available queues
|
||||
* @return Weak pointer to the manager owned sequence resource
|
||||
*/
|
||||
std::weak_ptr<Sequence> createManagedSequence(
|
||||
std::string sequenceName, uint32_t queueIndex = 0);
|
||||
|
||||
/**
|
||||
* Operation that adds extra operations to existing or new created
|
||||
* sequences.
|
||||
|
|
@ -114,7 +127,12 @@ class Manager
|
|||
std::weak_ptr<Sequence> sqWeakPtr =
|
||||
this->getOrCreateManagedSequence(sequenceName);
|
||||
|
||||
if (std::shared_ptr<kp::Sequence> sq = sqWeakPtr.lock()) {
|
||||
std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator found =
|
||||
this->mManagedSequences.find(sequenceName);
|
||||
|
||||
if (found == this->mManagedSequences.end()) {
|
||||
std::shared_ptr<Sequence> sq = found->second;
|
||||
|
||||
SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence BEGIN");
|
||||
sq->begin();
|
||||
|
||||
|
|
@ -127,6 +145,9 @@ class Manager
|
|||
SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence EVAL");
|
||||
sq->evalAsync();
|
||||
}
|
||||
else {
|
||||
SPDLOG_ERROR("Kompute Manager evalOpAsync sequence [{}] not found", sequenceName);
|
||||
}
|
||||
SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence SUCCESS");
|
||||
}
|
||||
|
||||
|
|
@ -154,7 +175,7 @@ class Manager
|
|||
SPDLOG_DEBUG("Kompute Manager evalOpAwait running sequence SUCCESS");
|
||||
}
|
||||
else {
|
||||
SPDLOG_ERROR("Sequence not found");
|
||||
SPDLOG_ERROR("Kompute Manager evalOpAwait Sequence not found");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -209,7 +230,7 @@ class Manager
|
|||
std::shared_ptr<vk::Device> mDevice = nullptr;
|
||||
bool mFreeDevice = false;
|
||||
uint32_t mComputeQueueFamilyIndex = -1;
|
||||
std::shared_ptr<vk::Queue> mComputeQueue = nullptr;
|
||||
std::vector<std::shared_ptr<vk::Queue>> mComputeQueues;
|
||||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::unordered_map<std::string, std::shared_ptr<Sequence>>
|
||||
|
|
@ -224,7 +245,7 @@ class Manager
|
|||
|
||||
// Create functions
|
||||
void createInstance();
|
||||
void createDevice();
|
||||
void createDevice(uint32_t totalComputeQueues);
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -9,21 +9,6 @@ TEST(TestAsyncOperations, TestManagerAsync)
|
|||
{
|
||||
uint32_t size = 100000;
|
||||
|
||||
std::vector<float> data(size, 0.0);
|
||||
std::vector<float> resultSync(size, 100000);
|
||||
std::vector<float> resultAsync(size, 200000);
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(data) };
|
||||
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(data) };
|
||||
std::shared_ptr<kp::Tensor> tensorC{ new kp::Tensor(data) };
|
||||
std::shared_ptr<kp::Tensor> tensorD{ new kp::Tensor(data) };
|
||||
std::shared_ptr<kp::Tensor> tensorE{ new kp::Tensor(data) };
|
||||
std::shared_ptr<kp::Tensor> tensorF{ new kp::Tensor(data) };
|
||||
|
||||
kp::Manager mgr;
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB, tensorC, tensorD, tensorE, tensorF });
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
|
||||
|
|
@ -44,52 +29,82 @@ TEST(TestAsyncOperations, TestManagerAsync)
|
|||
}
|
||||
)");
|
||||
|
||||
std::vector<float> data(size, 0.0);
|
||||
std::vector<float> resultSync(size, 100000);
|
||||
std::vector<float> resultAsync(size, 100000);
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorSyncA{ new kp::Tensor(data) };
|
||||
std::shared_ptr<kp::Tensor> tensorSyncB{ new kp::Tensor(data) };
|
||||
std::shared_ptr<kp::Tensor> tensorSyncC{ new kp::Tensor(data) };
|
||||
std::shared_ptr<kp::Tensor> tensorSyncD{ new kp::Tensor(data) };
|
||||
std::shared_ptr<kp::Tensor> tensorSyncE{ new kp::Tensor(data) };
|
||||
std::shared_ptr<kp::Tensor> tensorSyncF{ new kp::Tensor(data) };
|
||||
|
||||
kp::Manager mgr;
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorSyncA, tensorSyncB, tensorSyncC, tensorSyncD, tensorSyncE, tensorSyncF });
|
||||
|
||||
auto startSync = std::chrono::high_resolution_clock::now();
|
||||
|
||||
mgr.evalOpDefault<kp::OpAlgoBase<>>(
|
||||
{ tensorA, tensorB }, std::vector<char>(shader.begin(), shader.end()));
|
||||
{ tensorSyncA, tensorSyncB }, std::vector<char>(shader.begin(), shader.end()));
|
||||
|
||||
mgr.evalOpDefault<kp::OpAlgoBase<>>(
|
||||
{ tensorC, tensorD }, std::vector<char>(shader.begin(), shader.end()));
|
||||
{ tensorSyncC, tensorSyncD }, std::vector<char>(shader.begin(), shader.end()));
|
||||
|
||||
mgr.evalOpDefault<kp::OpAlgoBase<>>(
|
||||
{ tensorE, tensorF }, std::vector<char>(shader.begin(), shader.end()));
|
||||
{ tensorSyncE, tensorSyncF }, std::vector<char>(shader.begin(), shader.end()));
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorSyncB, tensorSyncD, tensorSyncF });
|
||||
|
||||
auto endSync = std::chrono::high_resolution_clock::now();
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorB, tensorD, tensorF });
|
||||
|
||||
EXPECT_EQ(tensorB->data(), resultSync);
|
||||
EXPECT_EQ(tensorD->data(), resultSync);
|
||||
EXPECT_EQ(tensorF->data(), resultSync);
|
||||
|
||||
auto durationSync = std::chrono::duration_cast<std::chrono::microseconds>(endSync - startSync).count();
|
||||
|
||||
auto startAsync = std::chrono::high_resolution_clock::now();
|
||||
EXPECT_EQ(tensorSyncB->data(), resultSync);
|
||||
EXPECT_EQ(tensorSyncD->data(), resultSync);
|
||||
EXPECT_EQ(tensorSyncF->data(), resultSync);
|
||||
|
||||
mgr.evalOpAsync<kp::OpAlgoBase<>>(
|
||||
{ tensorA, tensorB }, "asyncOne", std::vector<char>(shader.begin(), shader.end()));
|
||||
//std::shared_ptr<kp::Tensor> tensorAsyncA{ new kp::Tensor(data) };
|
||||
//std::shared_ptr<kp::Tensor> tensorAsyncB{ new kp::Tensor(data) };
|
||||
//std::shared_ptr<kp::Tensor> tensorAsyncC{ new kp::Tensor(data) };
|
||||
//std::shared_ptr<kp::Tensor> tensorAsyncD{ new kp::Tensor(data) };
|
||||
//std::shared_ptr<kp::Tensor> tensorAsyncE{ new kp::Tensor(data) };
|
||||
//std::shared_ptr<kp::Tensor> tensorAsyncF{ new kp::Tensor(data) };
|
||||
|
||||
mgr.evalOpAsync<kp::OpAlgoBase<>>(
|
||||
{ tensorC, tensorD }, "asyncTwo", std::vector<char>(shader.begin(), shader.end()));
|
||||
//kp::Manager mgrAsync(0, 1);
|
||||
|
||||
mgr.evalOpAsync<kp::OpAlgoBase<>>(
|
||||
{ tensorE, tensorF }, "asyncThree", std::vector<char>(shader.begin(), shader.end()));
|
||||
//mgrAsync.evalOpDefault<kp::OpTensorCreate>({ tensorAsyncA, tensorAsyncB, tensorAsyncC, tensorAsyncD, tensorAsyncE, tensorAsyncF });
|
||||
|
||||
mgr.evalOpAwait("asyncOne");
|
||||
mgr.evalOpAwait("asyncTwo");
|
||||
mgr.evalOpAwait("asyncThree");
|
||||
//mgrAsync.createManagedSequence("async0", 0);
|
||||
////mgrAsync.createManagedSequence("async1", 1);
|
||||
////mgrAsync.createManagedSequence("async2", 2);
|
||||
|
||||
auto endAsync = std::chrono::high_resolution_clock::now();
|
||||
//auto startAsync = std::chrono::high_resolution_clock::now();
|
||||
|
||||
auto durationAsync = std::chrono::duration_cast<std::chrono::microseconds>(endAsync - startAsync).count();
|
||||
//mgrAsync.evalOpAsync<kp::OpAlgoBase<>>(
|
||||
// { tensorAsyncA, tensorAsyncB }, "async0", std::vector<char>(shader.begin(), shader.end()));
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorB, tensorD, tensorF });
|
||||
////mgrAsync.evalOpAsync<kp::OpAlgoBase<>>(
|
||||
//// { tensorAsyncC, tensorAsyncD }, "async1", std::vector<char>(shader.begin(), shader.end()));
|
||||
|
||||
EXPECT_EQ(tensorB->data(), resultAsync);
|
||||
EXPECT_EQ(tensorD->data(), resultAsync);
|
||||
EXPECT_EQ(tensorF->data(), resultAsync);
|
||||
////mgrAsync.evalOpAsync<kp::OpAlgoBase<>>(
|
||||
//// { tensorAsyncE, tensorAsyncF }, "async2", std::vector<char>(shader.begin(), shader.end()));
|
||||
|
||||
SPDLOG_DEBUG("Total Sync: {}", durationSync);
|
||||
SPDLOG_DEBUG("Total Async: {}", durationAsync);
|
||||
//mgrAsync.evalOpAwait("async0");
|
||||
////mgrAsync.evalOpAwait("async1");
|
||||
////mgrAsync.evalOpAwait("async2");
|
||||
|
||||
//mgrAsync.evalOpDefault<kp::OpTensorSyncLocal>({ tensorAsyncB });
|
||||
////mgrAsync.evalOpDefault<kp::OpTensorSyncLocal>({ tensorAsyncD });
|
||||
////mgrAsync.evalOpDefault<kp::OpTensorSyncLocal>({ tensorAsyncF });
|
||||
|
||||
//auto endAsync = std::chrono::high_resolution_clock::now();
|
||||
//auto durationAsync = std::chrono::duration_cast<std::chrono::microseconds>(endAsync - startAsync).count();
|
||||
|
||||
//EXPECT_EQ(tensorAsyncB->data(), resultAsync);
|
||||
////EXPECT_EQ(tensorAsyncD->data(), resultAsync);
|
||||
////EXPECT_EQ(tensorAsyncF->data(), resultAsync);
|
||||
|
||||
////SPDLOG_DEBUG("Total Sync: {}", durationSync);
|
||||
//SPDLOG_DEBUG("Total Async: {}", durationAsync);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue