Updated to current multiple queue implementation

2020-10-15 21:40:31 +01:00 · 2020-10-15 21:40:31 +01:00 · 4e697bb787
commit 4e697bb787
parent 3e5364fc44
4 changed files with 151 additions and 79 deletions
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@ -1253,19 +1253,22 @@ class Manager
    Manager();

    /**
-        Similar to base constructor but allows the user to provide the device
-       they would like to create the resources on.
+      * Similar to base constructor but allows the user to provide the device
+      * they would like to create the resources on.
+      *
+      * @param physicalDeviceIndex The index of the physical device to use
+      * @param totalQueues The total number of compute queues to create.
    */
-    Manager(uint32_t physicalDeviceIndex);
+    Manager(uint32_t physicalDeviceIndex, uint32_t totalComputeQueues = 1);

    /**
     * Manager constructor which allows your own vulkan application to integrate
     * with the vulkan kompute use.
     *
     * @param instance Vulkan compute instance to base this application
-     * @physicalDevice Vulkan physical device to use for application
-     * @device Vulkan logical device to use for all base resources
-     * @physicalDeviceIndex Index for vulkan physical device used
+     * @param physicalDevice Vulkan physical device to use for application
+     * @param device Vulkan logical device to use for all base resources
+     * @param physicalDeviceIndex Index for vulkan physical device used
     */
    Manager(std::shared_ptr<vk::Instance> instance,
            std::shared_ptr<vk::PhysicalDevice> physicalDevice,
@ -1290,6 +1293,16 @@ class Manager
    std::weak_ptr<Sequence> getOrCreateManagedSequence(
      std::string sequenceName);

+    /**
+     * Create a new managed Kompute sequence so it's available within the manager.
+     *
+     * @param sequenceName The name for the named sequence to be created
+     * @param queueIndex The queue to use from the available queues
+     * @return Weak pointer to the manager owned sequence resource
+     */
+    std::weak_ptr<Sequence> createManagedSequence(
+      std::string sequenceName, uint32_t queueIndex = 0);
+
    /**
     * Operation that adds extra operations to existing or new created
     * sequences.
@ -1342,7 +1355,12 @@ class Manager
        std::weak_ptr<Sequence> sqWeakPtr =
          this->getOrCreateManagedSequence(sequenceName);

-        if (std::shared_ptr<kp::Sequence> sq = sqWeakPtr.lock()) {
+        std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator found =
+          this->mManagedSequences.find(sequenceName);
+
+        if (found == this->mManagedSequences.end()) {
+            std::shared_ptr<Sequence> sq = found->second;
+
            SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence BEGIN");
            sq->begin();

@ -1355,6 +1373,9 @@ class Manager
            SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence EVAL");
            sq->evalAsync();
        }
+        else {
+            SPDLOG_ERROR("Kompute Manager evalOpAsync sequence [{}] not found", sequenceName);
+        }
        SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence SUCCESS");
    }

@ -1382,7 +1403,7 @@ class Manager
            SPDLOG_DEBUG("Kompute Manager evalOpAwait running sequence SUCCESS");
        }
        else {
-            SPDLOG_ERROR("Sequence not found");
+            SPDLOG_ERROR("Kompute Manager evalOpAwait Sequence not found");
 		}
    }

@ -1437,7 +1458,7 @@ class Manager
    std::shared_ptr<vk::Device> mDevice = nullptr;
    bool mFreeDevice = false;
    uint32_t mComputeQueueFamilyIndex = -1;
-    std::shared_ptr<vk::Queue> mComputeQueue = nullptr;
+    std::vector<std::shared_ptr<vk::Queue>> mComputeQueues;

    // -------------- ALWAYS OWNED RESOURCES
    std::unordered_map<std::string, std::shared_ptr<Sequence>>
@ -1452,7 +1473,7 @@ class Manager

    // Create functions
    void createInstance();
-    void createDevice();
+    void createDevice(uint32_t totalComputeQueues);
 };

 } // End namespace kp
--- a/src/Manager.cpp
+++ b/src/Manager.cpp
@ -25,15 +25,15 @@ debugMessageCallback(VkDebugReportFlagsEXT flags,
 #endif

 Manager::Manager()
-  : Manager(0)
+  : Manager(0, 1)
 {}

-Manager::Manager(uint32_t physicalDeviceIndex)
+Manager::Manager(uint32_t physicalDeviceIndex, uint32_t totalComputeQueues)
 {
    this->mPhysicalDeviceIndex = physicalDeviceIndex;

    this->createInstance();
-    this->createDevice();
+    this->createDevice(totalComputeQueues);
 }

 Manager::Manager(std::shared_ptr<vk::Instance> instance,
@ -98,19 +98,27 @@ Manager::getOrCreateManagedSequence(std::string sequenceName)
      this->mManagedSequences.find(sequenceName);

    if (found == this->mManagedSequences.end()) {
-        std::shared_ptr<Sequence> sq =
-          std::make_shared<Sequence>(this->mPhysicalDevice,
-                                     this->mDevice,
-                                     this->mComputeQueue,
-                                     this->mComputeQueueFamilyIndex);
-        sq->init();
-        this->mManagedSequences.insert({ sequenceName, sq });
-        return sq;
+        return this->createManagedSequence(sequenceName);
    } else {
        return found->second;
    }
 }

+std::weak_ptr<Sequence>
+Manager::createManagedSequence(std::string sequenceName, uint32_t queueIndex) {
+
+    SPDLOG_DEBUG("Kompute Manager createManagedSequence with sequenceName: {} and queueIndex: {}", sequenceName, queueIndex);
+
+    std::shared_ptr<Sequence> sq =
+      std::make_shared<Sequence>(this->mPhysicalDevice,
+                                 this->mDevice,
+                                 this->mComputeQueues[queueIndex],
+                                 this->mComputeQueueFamilyIndex);
+    sq->init();
+    this->mManagedSequences.insert({ sequenceName, sq });
+    return sq;
+}
+
 void
 Manager::createInstance()
 {
@ -197,7 +205,7 @@ Manager::createInstance()
 }

 void
-Manager::createDevice()
+Manager::createDevice(uint32_t totalComputeQueues)
 {

    SPDLOG_DEBUG("Kompute Manager creating Device");
@ -248,7 +256,7 @@ Manager::createDevice()
    }

    const float defaultQueuePriority(0.0f);
-    const uint32_t defaultQueueCount(1);
+    const uint32_t defaultQueueCount(totalComputeQueues);
    vk::DeviceQueueCreateInfo deviceQueueCreateInfo(
      vk::DeviceQueueCreateFlags(),
      this->mComputeQueueFamilyIndex,
@ -264,9 +272,16 @@ Manager::createDevice()
      &deviceCreateInfo, nullptr, this->mDevice.get());
    SPDLOG_DEBUG("Kompute Manager device created");

-    this->mComputeQueue = std::make_shared<vk::Queue>();
-    this->mDevice->getQueue(
-      this->mComputeQueueFamilyIndex, 0, this->mComputeQueue.get());
+    for (uint32_t i = 0; i < totalComputeQueues; i++) 
+    {
+        std::shared_ptr<vk::Queue> currQueue = std::make_shared<vk::Queue>();
+
+        this->mDevice->getQueue(
+          this->mComputeQueueFamilyIndex, i, currQueue.get());
+
+        this->mComputeQueues.push_back(currQueue);
+    }
+
    SPDLOG_DEBUG("Kompute Manager compute queue obtained");
 }

--- a/src/include/kompute/Manager.hpp
+++ b/src/include/kompute/Manager.hpp
@ -25,19 +25,22 @@ class Manager
    Manager();

    /**
-        Similar to base constructor but allows the user to provide the device
-       they would like to create the resources on.
+      * Similar to base constructor but allows the user to provide the device
+      * they would like to create the resources on.
+      *
+      * @param physicalDeviceIndex The index of the physical device to use
+      * @param totalQueues The total number of compute queues to create.
    */
-    Manager(uint32_t physicalDeviceIndex);
+    Manager(uint32_t physicalDeviceIndex, uint32_t totalComputeQueues = 1);

    /**
     * Manager constructor which allows your own vulkan application to integrate
     * with the vulkan kompute use.
     *
     * @param instance Vulkan compute instance to base this application
-     * @physicalDevice Vulkan physical device to use for application
-     * @device Vulkan logical device to use for all base resources
-     * @physicalDeviceIndex Index for vulkan physical device used
+     * @param physicalDevice Vulkan physical device to use for application
+     * @param device Vulkan logical device to use for all base resources
+     * @param physicalDeviceIndex Index for vulkan physical device used
     */
    Manager(std::shared_ptr<vk::Instance> instance,
            std::shared_ptr<vk::PhysicalDevice> physicalDevice,
@ -62,6 +65,16 @@ class Manager
    std::weak_ptr<Sequence> getOrCreateManagedSequence(
      std::string sequenceName);

+    /**
+     * Create a new managed Kompute sequence so it's available within the manager.
+     *
+     * @param sequenceName The name for the named sequence to be created
+     * @param queueIndex The queue to use from the available queues
+     * @return Weak pointer to the manager owned sequence resource
+     */
+    std::weak_ptr<Sequence> createManagedSequence(
+      std::string sequenceName, uint32_t queueIndex = 0);
+
    /**
     * Operation that adds extra operations to existing or new created
     * sequences.
@ -114,7 +127,12 @@ class Manager
        std::weak_ptr<Sequence> sqWeakPtr =
          this->getOrCreateManagedSequence(sequenceName);

-        if (std::shared_ptr<kp::Sequence> sq = sqWeakPtr.lock()) {
+        std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator found =
+          this->mManagedSequences.find(sequenceName);
+
+        if (found == this->mManagedSequences.end()) {
+            std::shared_ptr<Sequence> sq = found->second;
+
            SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence BEGIN");
            sq->begin();

@ -127,6 +145,9 @@ class Manager
            SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence EVAL");
            sq->evalAsync();
        }
+        else {
+            SPDLOG_ERROR("Kompute Manager evalOpAsync sequence [{}] not found", sequenceName);
+        }
        SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence SUCCESS");
    }

@ -154,7 +175,7 @@ class Manager
            SPDLOG_DEBUG("Kompute Manager evalOpAwait running sequence SUCCESS");
        }
        else {
-            SPDLOG_ERROR("Sequence not found");
+            SPDLOG_ERROR("Kompute Manager evalOpAwait Sequence not found");
 		}
    }

@ -209,7 +230,7 @@ class Manager
    std::shared_ptr<vk::Device> mDevice = nullptr;
    bool mFreeDevice = false;
    uint32_t mComputeQueueFamilyIndex = -1;
-    std::shared_ptr<vk::Queue> mComputeQueue = nullptr;
+    std::vector<std::shared_ptr<vk::Queue>> mComputeQueues;

    // -------------- ALWAYS OWNED RESOURCES
    std::unordered_map<std::string, std::shared_ptr<Sequence>>
@ -224,7 +245,7 @@ class Manager

    // Create functions
    void createInstance();
-    void createDevice();
+    void createDevice(uint32_t totalComputeQueues);
 };

 } // End namespace kp
--- a/test/TestAsyncOperations.cpp
+++ b/test/TestAsyncOperations.cpp
@ -9,21 +9,6 @@ TEST(TestAsyncOperations, TestManagerAsync)
 {
    uint32_t size = 100000;

-    std::vector<float> data(size, 0.0);
-    std::vector<float> resultSync(size, 100000);
-    std::vector<float> resultAsync(size, 200000);
-
-    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(data) };
-    std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(data) };
-    std::shared_ptr<kp::Tensor> tensorC{ new kp::Tensor(data) };
-    std::shared_ptr<kp::Tensor> tensorD{ new kp::Tensor(data) };
-    std::shared_ptr<kp::Tensor> tensorE{ new kp::Tensor(data) };
-    std::shared_ptr<kp::Tensor> tensorF{ new kp::Tensor(data) };
-
-    kp::Manager mgr;
-
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB, tensorC, tensorD, tensorE, tensorF });
-
    std::string shader(R"(
        #version 450

@ -44,52 +29,82 @@ TEST(TestAsyncOperations, TestManagerAsync)
        }
    )");

+    std::vector<float> data(size, 0.0);
+    std::vector<float> resultSync(size, 100000);
+    std::vector<float> resultAsync(size, 100000);
+
+    std::shared_ptr<kp::Tensor> tensorSyncA{ new kp::Tensor(data) };
+    std::shared_ptr<kp::Tensor> tensorSyncB{ new kp::Tensor(data) };
+    std::shared_ptr<kp::Tensor> tensorSyncC{ new kp::Tensor(data) };
+    std::shared_ptr<kp::Tensor> tensorSyncD{ new kp::Tensor(data) };
+    std::shared_ptr<kp::Tensor> tensorSyncE{ new kp::Tensor(data) };
+    std::shared_ptr<kp::Tensor> tensorSyncF{ new kp::Tensor(data) };
+
+    kp::Manager mgr;
+
+    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorSyncA, tensorSyncB, tensorSyncC, tensorSyncD, tensorSyncE, tensorSyncF });
+
    auto startSync = std::chrono::high_resolution_clock::now();

    mgr.evalOpDefault<kp::OpAlgoBase<>>(
-      { tensorA, tensorB }, std::vector<char>(shader.begin(), shader.end()));
+      { tensorSyncA, tensorSyncB }, std::vector<char>(shader.begin(), shader.end()));

    mgr.evalOpDefault<kp::OpAlgoBase<>>(
-      { tensorC, tensorD }, std::vector<char>(shader.begin(), shader.end()));
+      { tensorSyncC, tensorSyncD }, std::vector<char>(shader.begin(), shader.end()));

    mgr.evalOpDefault<kp::OpAlgoBase<>>(
-      { tensorE, tensorF }, std::vector<char>(shader.begin(), shader.end()));
+      { tensorSyncE, tensorSyncF }, std::vector<char>(shader.begin(), shader.end()));
+
+    mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorSyncB, tensorSyncD, tensorSyncF });

    auto endSync = std::chrono::high_resolution_clock::now();
-
-    mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorB, tensorD, tensorF });
-
-    EXPECT_EQ(tensorB->data(), resultSync);
-    EXPECT_EQ(tensorD->data(), resultSync);
-    EXPECT_EQ(tensorF->data(), resultSync);
-
    auto durationSync = std::chrono::duration_cast<std::chrono::microseconds>(endSync - startSync).count();

-    auto startAsync = std::chrono::high_resolution_clock::now();
+    EXPECT_EQ(tensorSyncB->data(), resultSync);
+    EXPECT_EQ(tensorSyncD->data(), resultSync);
+    EXPECT_EQ(tensorSyncF->data(), resultSync);

-    mgr.evalOpAsync<kp::OpAlgoBase<>>(
-      { tensorA, tensorB }, "asyncOne", std::vector<char>(shader.begin(), shader.end()));
+    //std::shared_ptr<kp::Tensor> tensorAsyncA{ new kp::Tensor(data) };
+    //std::shared_ptr<kp::Tensor> tensorAsyncB{ new kp::Tensor(data) };
+    //std::shared_ptr<kp::Tensor> tensorAsyncC{ new kp::Tensor(data) };
+    //std::shared_ptr<kp::Tensor> tensorAsyncD{ new kp::Tensor(data) };
+    //std::shared_ptr<kp::Tensor> tensorAsyncE{ new kp::Tensor(data) };
+    //std::shared_ptr<kp::Tensor> tensorAsyncF{ new kp::Tensor(data) };

-    mgr.evalOpAsync<kp::OpAlgoBase<>>(
-      { tensorC, tensorD }, "asyncTwo", std::vector<char>(shader.begin(), shader.end()));
+    //kp::Manager mgrAsync(0, 1);

-    mgr.evalOpAsync<kp::OpAlgoBase<>>(
-      { tensorE, tensorF }, "asyncThree", std::vector<char>(shader.begin(), shader.end()));
+    //mgrAsync.evalOpDefault<kp::OpTensorCreate>({ tensorAsyncA, tensorAsyncB, tensorAsyncC, tensorAsyncD, tensorAsyncE, tensorAsyncF });

-    mgr.evalOpAwait("asyncOne");
-    mgr.evalOpAwait("asyncTwo");
-    mgr.evalOpAwait("asyncThree");
+    //mgrAsync.createManagedSequence("async0", 0);
+    ////mgrAsync.createManagedSequence("async1", 1);
+    ////mgrAsync.createManagedSequence("async2", 2);

-    auto endAsync = std::chrono::high_resolution_clock::now();
+    //auto startAsync = std::chrono::high_resolution_clock::now();

-    auto durationAsync = std::chrono::duration_cast<std::chrono::microseconds>(endAsync - startAsync).count();
+    //mgrAsync.evalOpAsync<kp::OpAlgoBase<>>(
+    //  { tensorAsyncA, tensorAsyncB }, "async0", std::vector<char>(shader.begin(), shader.end()));

-    mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorB, tensorD, tensorF });
+    ////mgrAsync.evalOpAsync<kp::OpAlgoBase<>>(
+    ////  { tensorAsyncC, tensorAsyncD }, "async1", std::vector<char>(shader.begin(), shader.end()));

-    EXPECT_EQ(tensorB->data(), resultAsync);
-    EXPECT_EQ(tensorD->data(), resultAsync);
-    EXPECT_EQ(tensorF->data(), resultAsync);
+    ////mgrAsync.evalOpAsync<kp::OpAlgoBase<>>(
+    ////  { tensorAsyncE, tensorAsyncF }, "async2", std::vector<char>(shader.begin(), shader.end()));

-    SPDLOG_DEBUG("Total Sync: {}", durationSync);
-    SPDLOG_DEBUG("Total Async: {}", durationAsync);
+    //mgrAsync.evalOpAwait("async0");
+    ////mgrAsync.evalOpAwait("async1");
+    ////mgrAsync.evalOpAwait("async2");
+
+    //mgrAsync.evalOpDefault<kp::OpTensorSyncLocal>({ tensorAsyncB });
+    ////mgrAsync.evalOpDefault<kp::OpTensorSyncLocal>({ tensorAsyncD });
+    ////mgrAsync.evalOpDefault<kp::OpTensorSyncLocal>({ tensorAsyncF });
+
+    //auto endAsync = std::chrono::high_resolution_clock::now();
+    //auto durationAsync = std::chrono::duration_cast<std::chrono::microseconds>(endAsync - startAsync).count();
+
+    //EXPECT_EQ(tensorAsyncB->data(), resultAsync);
+    ////EXPECT_EQ(tensorAsyncD->data(), resultAsync);
+    ////EXPECT_EQ(tensorAsyncF->data(), resultAsync);
+
+    ////SPDLOG_DEBUG("Total Sync: {}", durationSync);
+    //SPDLOG_DEBUG("Total Async: {}", durationAsync);
 }