From 1b4e4b6b18c9a78995f74ad09b9b180018df28fd Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 6 Sep 2020 12:52:45 +0100 Subject: [PATCH] Added optensorSyncDevice and optensorcopy tests --- single_include/AggregateHeaders.cpp | 2 + single_include/kompute/Kompute.hpp | 116 +++++++++++++- src/OpTensorCopy.cpp | 2 +- src/OpTensorCreate.cpp | 25 +--- src/OpTensorSyncDevice.cpp | 50 +++++-- src/OpTensorSyncLocal.cpp | 97 ++++++++++++ src/include/kompute/operations/OpAlgoBase.hpp | 1 + .../kompute/operations/OpTensorCreate.hpp | 8 +- .../kompute/operations/OpTensorSyncDevice.hpp | 8 +- .../kompute/operations/OpTensorSyncLocal.hpp | 60 ++++++++ test/TestOpTensorCopy.cpp | 141 +++++++++--------- test/TestTensor.cpp | 4 +- 12 files changed, 398 insertions(+), 116 deletions(-) create mode 100644 src/OpTensorSyncLocal.cpp create mode 100644 src/include/kompute/operations/OpTensorSyncLocal.hpp diff --git a/single_include/AggregateHeaders.cpp b/single_include/AggregateHeaders.cpp index 57cd16e5e..dd47e9ee3 100644 --- a/single_include/AggregateHeaders.cpp +++ b/single_include/AggregateHeaders.cpp @@ -8,5 +8,7 @@ #include "kompute/operations/OpMult.hpp" #include "kompute/operations/OpTensorCreate.hpp" #include "kompute/operations/OpTensorCopy.hpp" +#include "kompute/operations/OpTensorSyncDevice.hpp" +#include "kompute/operations/OpTensorSyncLocal.hpp" #include "kompute/Algorithm.hpp" #include "kompute/Tensor.hpp" diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index 2d718b9e6..83e97fdd8 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -1150,6 +1150,7 @@ OpAlgoBase::record() } // Record copy from and create barrier for STAGING tensors + // TODO: This only accounts for device tensors need to account for staging and storage for (size_t i = 0; i < this->mTensors.size(); i++) { this->mOutputStagingTensors[i]->recordCopyFrom( this->mCommandBuffer, @@ -1539,15 +1540,17 @@ class OpTensorCreate : public OpBase /** * In charge of initialising the primary Tensor as well as the staging * tensor as required. It will only initialise a staging tensor if the - * Primary tensor is of type Device. + * Primary tensor is of type Device. For staging tensors it performs a + * mapDataIntoHostMemory which would perform immediately as opposed to + * on sequence eval/submission. */ void init() override; /** * Record runs the core actions to create the tensors. For device tensors * it records a copyCommand to move the data from the staging tensor to the - * device tensor. For staging tensors it performs a mapDataIntoHostMemory - * which would perform immediately as opposed to on sequence eval/submission. + * device tensor. The mapping for staging tensors happens in the init function + * not in the record function. */ void record() override; @@ -1567,7 +1570,7 @@ class OpTensorCreate : public OpBase namespace kp { /** - Operation that copies the data from the first tensor to the rest of the tensors provided, using a record command for all the vectors. This operation does not own/manage the memory of the tensors passed to it. + Operation that copies the data from the first tensor to the rest of the tensors provided, using a record command for all the vectors. This operation does not own/manage the memory of the tensors passed to it. The operation must only receive tensors of type */ class OpTensorCopy : public OpBase { @@ -1588,13 +1591,12 @@ class OpTensorCopy : public OpBase std::vector> tensors); /** - * Default destructor which in this case expects the parent class to free - * the tensors + * Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release. */ ~OpTensorCopy() override; /** - * TODO + * Performs basic checks such as ensuring there are at least two tensors provided, that they are initialised and that they are not of type TensorTypes::eStorage. */ void init() override; @@ -1612,3 +1614,103 @@ class OpTensorCopy : public OpBase }; } // End namespace kp + +namespace kp { + +/** + Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a staging tensor to perform the copy. For TensorTypes::eStaging it will only copy the data and perform a map, which will be executed during the record (as opposed to during the sequence eval/submit). This function cannot be carried out for TensorTypes::eStaging. +*/ +class OpTensorSyncDevice : public OpBase +{ + public: + OpTensorSyncDevice(); + + /** + * Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensos provided cannot be of type TensorTypes::eStorage. + * + * @param physicalDevice Vulkan physical device used to find device queues + * @param device Vulkan logical device for passing to Algorithm + * @param commandBuffer Vulkan Command Buffer to record commands into + * @param tensors Tensors that will be used to create in operation. + */ + OpTensorSyncDevice(std::shared_ptr physicalDevice, + std::shared_ptr device, + std::shared_ptr commandBuffer, + std::vector> tensors); + + /** + * Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release. + */ + ~OpTensorSyncDevice() override; + + /** + * Performs basic checks such as ensuring that there is at least one tensor provided, that they are initialized and that they are not of type TensorTpes::eStaging. For staging tensors in host memory, the map is performed during the init function. + */ + void init() override; + + /** + * For device tensors, it records the copy command to the device tensor from the temporary staging tensor. + */ + void record() override; + + /** + * Does not perform any further sync functions. Frees the staging tensors together with their respective memory. + */ + void postSubmit() override; + + private: + // Never owned resources + std::vector> mStagingTensors; +}; + +} // End namespace kp + +namespace kp { + +/** + Operation that syncs tensor's local data by mapping the data from device memory into the local vector. For TensorTypes::eDevice it will use a staging tensor to perform the copy. For TensorTypes::eStaging it will only copy the data and perform a map, which will be executed during the postSubmit (there will be no copy during the sequence eval/submit). This function cannot be carried out for TensorTypes::eStaging. +*/ +class OpTensorSyncLocal : public OpBase +{ + public: + OpTensorSyncLocal(); + + /** + * Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensors provided cannot be of type TensorTypes::eStorage. + * + * @param physicalDevice Vulkan physical device used to find device queues + * @param device Vulkan logical device for passing to Algorithm + * @param commandBuffer Vulkan Command Buffer to record commands into + * @param tensors Tensors that will be used to create in operation. + */ + OpTensorSyncLocal(std::shared_ptr physicalDevice, + std::shared_ptr device, + std::shared_ptr commandBuffer, + std::vector> tensors); + + /** + * Default destructor. This class manages the memory of the staging tensors it owns but these are released in the postSubmit, before it arrives to the destructor. + */ + ~OpTensorSyncLocal() override; + + /** + * Performs basic checks such as ensuring that there is at least one tensor provided, that they are initialized and that they are not of type TensorTpes::eStaging. + */ + void init() override; + + /** + * For device tensors, it records the copy command into the staging tensor from the device tensor. + */ + void record() override; + + /** + * For host tensors it performs the map command from the host memory into local memory. Frees the staging tensors together with their respective memory. + */ + void postSubmit() override; + + private: + // Never owned resources + std::vector> mStagingTensors; +}; + +} // End namespace kp diff --git a/src/OpTensorCopy.cpp b/src/OpTensorCopy.cpp index c0e1f5046..50eb9c4c1 100644 --- a/src/OpTensorCopy.cpp +++ b/src/OpTensorCopy.cpp @@ -40,7 +40,7 @@ OpTensorCopy::init() throw std::runtime_error("Kompute OpTensorCopy tensor parameter has not been initialized"); } if (tensor->tensorType() == Tensor::TensorTypes::eStorage) { - throw std::runtime_error("Kompute OpTensorCopy tensor parameter is of type storage and hence cannot be used to receive or pass data."); + throw std::runtime_error("Kompute OpTensorCopy tensor parameter is of TensorTypes::eStorage and hence cannot be used to receive or pass data."); } } } diff --git a/src/OpTensorCreate.cpp b/src/OpTensorCreate.cpp index 32688da1f..5bd7317d1 100644 --- a/src/OpTensorCreate.cpp +++ b/src/OpTensorCreate.cpp @@ -23,13 +23,6 @@ OpTensorCreate::OpTensorCreate( OpTensorCreate::~OpTensorCreate() { SPDLOG_DEBUG("Kompute OpTensorCreate destructor started"); - - SPDLOG_DEBUG("Kompute OpTensorCreate destroying staging tensors"); - for (size_t i = 0; i < this->mStagingTensors.size(); i++) { - if (this->mStagingTensors[i]) { - this->mStagingTensors[i]->freeMemoryDestroyGPUResources(); - } - } } void @@ -65,6 +58,8 @@ OpTensorCreate::init() tensor->init( this->mPhysicalDevice, this->mDevice); + tensor->mapDataIntoHostMemory(); + // We push a nullptr when no staging tensor is needed to match // index number in array to have one to one mapping with tensors this->mStagingTensors.push_back(nullptr); @@ -80,9 +75,7 @@ OpTensorCreate::record() for (size_t i = 0; i < this->mTensors.size(); i++) { if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) { this->mTensors[i]->recordCopyFrom(this->mCommandBuffer, this->mStagingTensors[i], false); - } else if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eStaging) { - this->mTensors[i]->mapDataIntoHostMemory(); - } + } } } @@ -91,13 +84,11 @@ OpTensorCreate::postSubmit() { SPDLOG_DEBUG("Kompute OpTensorCreate postSubmit called"); - for (size_t i = 0; i < this->mTensors.size(); i++) { - if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) { - this->mStagingTensors[i]->mapDataFromHostMemory(); - - this->mTensors[i]->setData(this->mStagingTensors[i]->data()); - } - } + // TODO: Remove and add a test that checks that the memory in + // the staging tensor is actually storing the data + SPDLOG_DEBUG("Kompute OpTensorCreate destroying staging tensors"); + // TODO: This would cause issues if there is no CPU barrier + this->mStagingTensors.clear(); } } diff --git a/src/OpTensorSyncDevice.cpp b/src/OpTensorSyncDevice.cpp index 1652b88ba..7c87245cd 100644 --- a/src/OpTensorSyncDevice.cpp +++ b/src/OpTensorSyncDevice.cpp @@ -1,4 +1,6 @@ +#include "kompute/Tensor.hpp" + #include "kompute/operations/OpTensorSyncDevice.hpp" namespace kp { @@ -21,8 +23,6 @@ OpTensorSyncDevice::OpTensorSyncDevice( OpTensorSyncDevice::~OpTensorSyncDevice() { SPDLOG_DEBUG("Kompute OpTensorSyncDevice destructor started"); - - SPDLOG_DEBUG("Kompute OpTensorSyncDevice destroying staging tensors"); } void @@ -30,17 +30,37 @@ OpTensorSyncDevice::init() { SPDLOG_DEBUG("Kompute OpTensorSyncDevice init called"); - if (this->mTensors.size() < 2) { + if (this->mTensors.size() < 1) { throw std::runtime_error( - "Kompute OpTensorSyncDevice called with less than 2 tensor"); + "Kompute OpTensorSyncDevice called with less than 1 tensor"); } for (std::shared_ptr tensor: this->mTensors) { - if (!tensor->isInit()) { - throw std::runtime_error("Kompute OpTensorSyncDevice tensor parameter has not been initialized"); + if (tensor->isInit()) { + throw std::runtime_error("Kompute OpTensorSyncDevice: Tensor has already been initialized"); } if (tensor->tensorType() == Tensor::TensorTypes::eStorage) { - throw std::runtime_error("Kompute OpTensorSyncDevice tensor parameter is of type storage and hence cannot be used to receive or pass data."); + throw std::runtime_error("Kompute OpTensorSyncLocal tensor parameter is of type TensorTypes::eStorage and hence cannot be used to receive or pass data."); + } + if (tensor->tensorType() == Tensor::TensorTypes::eDevice) { + + std::shared_ptr stagingTensor = std::make_shared( + tensor->data(), Tensor::TensorTypes::eStaging); + + stagingTensor->init( + this->mPhysicalDevice, this->mDevice); + + stagingTensor->mapDataIntoHostMemory(); + + this->mStagingTensors.push_back(stagingTensor); + + } else { + + tensor->mapDataIntoHostMemory(); + + // We push a nullptr when no staging tensor is needed to match + // index number in array to have one to one mapping with tensors + this->mStagingTensors.push_back(nullptr); } } } @@ -50,9 +70,10 @@ OpTensorSyncDevice::record() { SPDLOG_DEBUG("Kompute OpTensorSyncDevice record called"); - // We iterate from the second tensor onwards and record a copy to all - for (size_t i = 1; i < this->mTensors.size(); i++) { - this->mTensors[i]->recordCopyFrom(this->mCommandBuffer, this->mTensors[0], false); + for (size_t i = 0; i < this->mTensors.size(); i++) { + if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) { + this->mTensors[i]->recordCopyFrom(this->mCommandBuffer, this->mStagingTensors[i], false); + } } } @@ -61,11 +82,10 @@ OpTensorSyncDevice::postSubmit() { SPDLOG_DEBUG("Kompute OpTensorSyncDevice postSubmit called"); - // Copy the data from the first tensor into all the tensors - for (size_t i = 1; i < this->mTensors.size(); i++) { - this->mTensors[i]->setData(this->mTensors[0]->data()); - } + // Remove all staging tensors as they are not required after operation + SPDLOG_DEBUG("Kompute OpTensorSyncDevice destroying staging tensors"); + // TODO: This would cause issues if there is no CPU barrier + this->mStagingTensors.clear(); } } - diff --git a/src/OpTensorSyncLocal.cpp b/src/OpTensorSyncLocal.cpp new file mode 100644 index 000000000..37037ff99 --- /dev/null +++ b/src/OpTensorSyncLocal.cpp @@ -0,0 +1,97 @@ + +#include "kompute/Tensor.hpp" + +#include "kompute/operations/OpTensorSyncLocal.hpp" + +namespace kp { + +OpTensorSyncLocal::OpTensorSyncLocal() +{ + SPDLOG_DEBUG("Kompute OpTensorSyncLocal constructor base"); +} + +OpTensorSyncLocal::OpTensorSyncLocal( + std::shared_ptr physicalDevice, + std::shared_ptr device, + std::shared_ptr commandBuffer, + std::vector> tensors) + : OpBase(physicalDevice, device, commandBuffer, tensors, false) +{ + SPDLOG_DEBUG("Kompute OpTensorSyncLocal constructor with params"); +} + +OpTensorSyncLocal::~OpTensorSyncLocal() +{ + SPDLOG_DEBUG("Kompute OpTensorSyncLocal destructor started"); +} + +void +OpTensorSyncLocal::init() +{ + SPDLOG_DEBUG("Kompute OpTensorSyncLocal init called"); + + if (this->mTensors.size() < 1) { + throw std::runtime_error( + "Kompute OpTensorSyncLocal called with less than 1 tensor"); + } + + for (std::shared_ptr tensor: this->mTensors) { + if (!tensor->isInit()) { + throw std::runtime_error("Kompute OpTensorSyncLocal: Tensor has not been initialized"); + } + if (tensor->tensorType() == Tensor::TensorTypes::eStorage) { + throw std::runtime_error("Kompute OpTensorSyncLocal tensor parameter is of type TensorTypes::eStorage and hence cannot be used to receive or pass data."); + } + if (tensor->tensorType() == Tensor::TensorTypes::eDevice) { + + std::shared_ptr stagingTensor = std::make_shared( + tensor->data(), Tensor::TensorTypes::eStaging); + + stagingTensor->init( + this->mPhysicalDevice, this->mDevice); + + this->mStagingTensors.push_back(stagingTensor); + + } else { + + // We push a nullptr when no staging tensor is needed to match + // index number in array to have one to one mapping with tensors + this->mStagingTensors.push_back(nullptr); + } + } +} + +void +OpTensorSyncLocal::record() +{ + SPDLOG_DEBUG("Kompute OpTensorSyncLocal record called"); + + for (size_t i = 0; i < this->mTensors.size(); i++) { + if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) { + this->mStagingTensors[i]->recordCopyFrom(this->mCommandBuffer, this->mTensors[i], true); + } + } +} + +void +OpTensorSyncLocal::postSubmit() +{ + SPDLOG_DEBUG("Kompute OpTensorSyncLocal postSubmit called"); + + for (size_t i = 0; i < this->mTensors.size(); i++) { + if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) { + this->mStagingTensors[i]->mapDataFromHostMemory(); + this->mTensors[i]->setData(this->mStagingTensors[i]->data()); + } else { + this->mTensors[i]->mapDataFromHostMemory(); + } + } + + // Remove all staging tensors as they are not required after operation + SPDLOG_DEBUG("Kompute OpTensorSyncLocal destroying staging tensors"); + // TODO: This would cause issues if there is no CPU barrier + this->mStagingTensors.clear(); +} + +} + diff --git a/src/include/kompute/operations/OpAlgoBase.hpp b/src/include/kompute/operations/OpAlgoBase.hpp index 86e03c442..92c7e607b 100644 --- a/src/include/kompute/operations/OpAlgoBase.hpp +++ b/src/include/kompute/operations/OpAlgoBase.hpp @@ -304,6 +304,7 @@ OpAlgoBase::record() } // Record copy from and create barrier for STAGING tensors + // TODO: This only accounts for device tensors need to account for staging and storage for (size_t i = 0; i < this->mTensors.size(); i++) { this->mOutputStagingTensors[i]->recordCopyFrom( this->mCommandBuffer, diff --git a/src/include/kompute/operations/OpTensorCreate.hpp b/src/include/kompute/operations/OpTensorCreate.hpp index 7360e5bdc..1702237eb 100644 --- a/src/include/kompute/operations/OpTensorCreate.hpp +++ b/src/include/kompute/operations/OpTensorCreate.hpp @@ -42,15 +42,17 @@ class OpTensorCreate : public OpBase /** * In charge of initialising the primary Tensor as well as the staging * tensor as required. It will only initialise a staging tensor if the - * Primary tensor is of type Device. + * Primary tensor is of type Device. For staging tensors it performs a + * mapDataIntoHostMemory which would perform immediately as opposed to + * on sequence eval/submission. */ void init() override; /** * Record runs the core actions to create the tensors. For device tensors * it records a copyCommand to move the data from the staging tensor to the - * device tensor. For staging tensors it performs a mapDataIntoHostMemory - * which would perform immediately as opposed to on sequence eval/submission. + * device tensor. The mapping for staging tensors happens in the init function + * not in the record function. */ void record() override; diff --git a/src/include/kompute/operations/OpTensorSyncDevice.hpp b/src/include/kompute/operations/OpTensorSyncDevice.hpp index 14f95a7be..de57e0683 100644 --- a/src/include/kompute/operations/OpTensorSyncDevice.hpp +++ b/src/include/kompute/operations/OpTensorSyncDevice.hpp @@ -35,21 +35,23 @@ class OpTensorSyncDevice : public OpBase ~OpTensorSyncDevice() override; /** - * Performs basic checks such as ensuring that there is at least one tensor provided, that they are initialized and that they are not of type TensorTpes::eStaging. + * Performs basic checks such as ensuring that there is at least one tensor provided, that they are initialized and that they are not of type TensorTpes::eStaging. For staging tensors in host memory, the map is performed during the init function. */ void init() override; /** - * Records the copy commands from teh first tensor into all the other tensors provided. Also optionally records a barrier. + * For device tensors, it records the copy command to the device tensor from the temporary staging tensor. */ void record() override; /** - * Copies the local vectors for all the tensors to sync the data with the gpu. + * Does not perform any further sync functions. Frees the staging tensors together with their respective memory. */ void postSubmit() override; private: + // Never owned resources + std::vector> mStagingTensors; }; } // End namespace kp diff --git a/src/include/kompute/operations/OpTensorSyncLocal.hpp b/src/include/kompute/operations/OpTensorSyncLocal.hpp new file mode 100644 index 000000000..d06629c29 --- /dev/null +++ b/src/include/kompute/operations/OpTensorSyncLocal.hpp @@ -0,0 +1,60 @@ +#pragma once + +#include "kompute/Core.hpp" + +#include "kompute/Tensor.hpp" + +#include "kompute/operations/OpBase.hpp" + +namespace kp { + +/** + Operation that syncs tensor's local data by mapping the data from device memory into the local vector. For TensorTypes::eDevice it will use a staging tensor to perform the copy. For TensorTypes::eStaging it will only copy the data and perform a map, which will be executed during the postSubmit (there will be no copy during the sequence eval/submit). This function cannot be carried out for TensorTypes::eStaging. +*/ +class OpTensorSyncLocal : public OpBase +{ + public: + OpTensorSyncLocal(); + + /** + * Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensors provided cannot be of type TensorTypes::eStorage. + * + * @param physicalDevice Vulkan physical device used to find device queues + * @param device Vulkan logical device for passing to Algorithm + * @param commandBuffer Vulkan Command Buffer to record commands into + * @param tensors Tensors that will be used to create in operation. + */ + OpTensorSyncLocal(std::shared_ptr physicalDevice, + std::shared_ptr device, + std::shared_ptr commandBuffer, + std::vector> tensors); + + /** + * Default destructor. This class manages the memory of the staging tensors it owns but these are released in the postSubmit, before it arrives to the destructor. + */ + ~OpTensorSyncLocal() override; + + /** + * Performs basic checks such as ensuring that there is at least one tensor provided, that they are initialized and that they are not of type TensorTpes::eStaging. + */ + void init() override; + + /** + * For device tensors, it records the copy command into the staging tensor from the device tensor. + */ + void record() override; + + /** + * For host tensors it performs the map command from the host memory into local memory. Frees the staging tensors together with their respective memory. + */ + void postSubmit() override; + + private: + // Never owned resources + std::vector> mStagingTensors; +}; + +} // End namespace kp + + + diff --git a/test/TestOpTensorCopy.cpp b/test/TestOpTensorCopy.cpp index 6b36e078a..9325cacf5 100644 --- a/test/TestOpTensorCopy.cpp +++ b/test/TestOpTensorCopy.cpp @@ -8,25 +8,7 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensor) { kp::Manager mgr; std::vector testVecA{ 9, 8, 7 }; - - std::shared_ptr tensorA{new kp::Tensor(testVecA)}; - - mgr.evalOpDefault({tensorA}); - - EXPECT_TRUE(tensorA->isInit()); - - EXPECT_EQ(tensorA->data(), testVecA); - - tensorA->freeMemoryDestroyGPUResources(); - EXPECT_FALSE(tensorA->isInit()); -} - -TEST(TestOpTensorCopy, CreateMultipleTensorSingleOp) { - - kp::Manager mgr; - - std::vector testVecA{ 9, 8, 7 }; - std::vector testVecB{ 6, 5, 4 }; + std::vector testVecB{ 0, 0, 0 }; std::shared_ptr tensorA{new kp::Tensor(testVecA)}; std::shared_ptr tensorB{new kp::Tensor(testVecB)}; @@ -36,76 +18,101 @@ TEST(TestOpTensorCopy, CreateMultipleTensorSingleOp) { EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); - EXPECT_EQ(tensorA->data(), testVecA); - EXPECT_EQ(tensorB->data(), testVecB); + mgr.evalOpDefault({tensorA, tensorB}); + + EXPECT_EQ(tensorA->data(), tensorB->data()); + + // Making sure the GPU holds the same data + mgr.evalOpDefault({tensorB}); + EXPECT_EQ(tensorA->data(), tensorB->data()); } -TEST(TestOpTensorCopy, CreateMultipleTensorMultipleOp) { +TEST(TestOpTensorCopy, CopyDeviceToStagingTensor) { kp::Manager mgr; std::vector testVecA{ 9, 8, 7 }; - std::vector testVecB{ 6, 5, 4 }; + std::vector testVecB{ 0, 0, 0 }; std::shared_ptr tensorA{new kp::Tensor(testVecA)}; - std::shared_ptr tensorB{new kp::Tensor(testVecB)}; + std::shared_ptr tensorB{new kp::Tensor(testVecB, kp::Tensor::TensorTypes::eStaging)}; - mgr.evalOpDefault({tensorA}); - mgr.evalOpDefault({tensorB}); + mgr.evalOpDefault({tensorA, tensorB}); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); - EXPECT_EQ(tensorA->data(), testVecA); - EXPECT_EQ(tensorB->data(), testVecB); + mgr.evalOpDefault({tensorA, tensorB}); + + EXPECT_EQ(tensorA->data(), tensorB->data()); + + // Making sure the GPU holds the same data + mgr.evalOpDefault({tensorB}); + EXPECT_EQ(tensorA->data(), tensorB->data()); } -TEST(TestOpTensorCopy, ManageTensorMemoryWhenOpTensorCreateDestroyed) { - - std::vector testVecA{ 9, 8, 7 }; - std::vector testVecB{ 6, 5, 4 }; - - std::shared_ptr tensorA{new kp::Tensor(testVecA)}; - std::shared_ptr tensorB{new kp::Tensor(testVecB)}; - - { - kp::Manager mgr; - mgr.evalOpDefault({tensorA}); - mgr.evalOpDefault({tensorB}); - - EXPECT_TRUE(tensorA->isInit()); - EXPECT_TRUE(tensorB->isInit()); - - EXPECT_EQ(tensorA->data(), testVecA); - EXPECT_EQ(tensorB->data(), testVecB); - } - - EXPECT_FALSE(tensorA->isInit()); - EXPECT_FALSE(tensorB->isInit()); -} - -TEST(TestOpTensorCopy, NoErrorIfTensorFreedBefore) { - - std::vector testVecA{ 9, 8, 7 }; - std::vector testVecB{ 6, 5, 4 }; - - std::shared_ptr tensorA{new kp::Tensor(testVecA)}; - std::shared_ptr tensorB{new kp::Tensor(testVecB)}; +TEST(TestOpTensorCopy, CopyStagingToDeviceTensor) { kp::Manager mgr; - mgr.evalOpDefault({tensorA}); - mgr.evalOpDefault({tensorB}); + std::vector testVecA{ 9, 8, 7 }; + std::vector testVecB{ 0, 0, 0 }; + + std::shared_ptr tensorA{new kp::Tensor(testVecA, kp::Tensor::TensorTypes::eStaging)}; + std::shared_ptr tensorB{new kp::Tensor(testVecB)}; + + mgr.evalOpDefault({tensorA, tensorB}); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); - EXPECT_EQ(tensorA->data(), testVecA); - EXPECT_EQ(tensorB->data(), testVecB); + mgr.evalOpDefault({tensorA, tensorB}); - tensorA->freeMemoryDestroyGPUResources(); - tensorB->freeMemoryDestroyGPUResources(); - EXPECT_FALSE(tensorA->isInit()); - EXPECT_FALSE(tensorB->isInit()); + EXPECT_EQ(tensorA->data(), tensorB->data()); + + // Making sure the GPU holds the same data + mgr.evalOpDefault({tensorB}); + EXPECT_EQ(tensorA->data(), tensorB->data()); +} + +TEST(TestOpTensorCopy, CopyStagingToStagingTensor) { + + kp::Manager mgr; + + std::vector testVecA{ 9, 8, 7 }; + std::vector testVecB{ 0, 0, 0 }; + + std::shared_ptr tensorA{new kp::Tensor(testVecA, kp::Tensor::TensorTypes::eStaging)}; + std::shared_ptr tensorB{new kp::Tensor(testVecB, kp::Tensor::TensorTypes::eStaging)}; + + mgr.evalOpDefault({tensorA, tensorB}); + + EXPECT_TRUE(tensorA->isInit()); + EXPECT_TRUE(tensorB->isInit()); + + mgr.evalOpDefault({tensorA, tensorB}); + + EXPECT_EQ(tensorA->data(), tensorB->data()); + + // Making sure the GPU holds the same data + mgr.evalOpDefault({tensorB}); + EXPECT_EQ(tensorA->data(), tensorB->data()); +} + +TEST(TestOpTensorCopy, SingleTensorShouldFail) { + + kp::Manager mgr; + + std::vector testVecA{ 9, 8, 7 }; + + std::shared_ptr tensorA{new kp::Tensor(testVecA, kp::Tensor::TensorTypes::eStaging)}; + + mgr.evalOpDefault({tensorA}); + + EXPECT_TRUE(tensorA->isInit()); + + EXPECT_THROW( + mgr.evalOpDefault({tensorA}), + std::runtime_error); } diff --git a/test/TestTensor.cpp b/test/TestTensor.cpp index 5f90dc7bf..58a677820 100644 --- a/test/TestTensor.cpp +++ b/test/TestTensor.cpp @@ -18,7 +18,7 @@ TEST(TestTensor, CopyFromHostData) { vecA, kp::Tensor::TensorTypes::eStaging); std::shared_ptr tensorB = std::make_shared( - vecA, + vecB, kp::Tensor::TensorTypes::eStaging); kp::Manager mgr; @@ -35,8 +35,6 @@ TEST(TestTensor, CopyFromHostData) { sq->end(); sq->eval(); - - tensorB->mapDataFromHostMemory(); } EXPECT_EQ(tensorA->data(), tensorB->data());