Added optensorSyncDevice and optensorcopy tests

This commit is contained in:
Alejandro Saucedo 2020-09-06 12:52:45 +01:00
parent ec89fc6d56
commit 1b4e4b6b18
12 changed files with 398 additions and 116 deletions

View file

@ -8,5 +8,7 @@
#include "kompute/operations/OpMult.hpp"
#include "kompute/operations/OpTensorCreate.hpp"
#include "kompute/operations/OpTensorCopy.hpp"
#include "kompute/operations/OpTensorSyncDevice.hpp"
#include "kompute/operations/OpTensorSyncLocal.hpp"
#include "kompute/Algorithm.hpp"
#include "kompute/Tensor.hpp"

View file

@ -1150,6 +1150,7 @@ OpAlgoBase<tX, tY, tZ>::record()
}
// Record copy from and create barrier for STAGING tensors
// TODO: This only accounts for device tensors need to account for staging and storage
for (size_t i = 0; i < this->mTensors.size(); i++) {
this->mOutputStagingTensors[i]->recordCopyFrom(
this->mCommandBuffer,
@ -1539,15 +1540,17 @@ class OpTensorCreate : public OpBase
/**
* In charge of initialising the primary Tensor as well as the staging
* tensor as required. It will only initialise a staging tensor if the
* Primary tensor is of type Device.
* Primary tensor is of type Device. For staging tensors it performs a
* mapDataIntoHostMemory which would perform immediately as opposed to
* on sequence eval/submission.
*/
void init() override;
/**
* Record runs the core actions to create the tensors. For device tensors
* it records a copyCommand to move the data from the staging tensor to the
* device tensor. For staging tensors it performs a mapDataIntoHostMemory
* which would perform immediately as opposed to on sequence eval/submission.
* device tensor. The mapping for staging tensors happens in the init function
* not in the record function.
*/
void record() override;
@ -1567,7 +1570,7 @@ class OpTensorCreate : public OpBase
namespace kp {
/**
Operation that copies the data from the first tensor to the rest of the tensors provided, using a record command for all the vectors. This operation does not own/manage the memory of the tensors passed to it.
Operation that copies the data from the first tensor to the rest of the tensors provided, using a record command for all the vectors. This operation does not own/manage the memory of the tensors passed to it. The operation must only receive tensors of type
*/
class OpTensorCopy : public OpBase
{
@ -1588,13 +1591,12 @@ class OpTensorCopy : public OpBase
std::vector<std::shared_ptr<Tensor>> tensors);
/**
* Default destructor which in this case expects the parent class to free
* the tensors
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
*/
~OpTensorCopy() override;
/**
* TODO
* Performs basic checks such as ensuring there are at least two tensors provided, that they are initialised and that they are not of type TensorTypes::eStorage.
*/
void init() override;
@ -1612,3 +1614,103 @@ class OpTensorCopy : public OpBase
};
} // End namespace kp
namespace kp {
/**
Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a staging tensor to perform the copy. For TensorTypes::eStaging it will only copy the data and perform a map, which will be executed during the record (as opposed to during the sequence eval/submit). This function cannot be carried out for TensorTypes::eStaging.
*/
class OpTensorSyncDevice : public OpBase
{
public:
OpTensorSyncDevice();
/**
* Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensos provided cannot be of type TensorTypes::eStorage.
*
* @param physicalDevice Vulkan physical device used to find device queues
* @param device Vulkan logical device for passing to Algorithm
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that will be used to create in operation.
*/
OpTensorSyncDevice(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>> tensors);
/**
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
*/
~OpTensorSyncDevice() override;
/**
* Performs basic checks such as ensuring that there is at least one tensor provided, that they are initialized and that they are not of type TensorTpes::eStaging. For staging tensors in host memory, the map is performed during the init function.
*/
void init() override;
/**
* For device tensors, it records the copy command to the device tensor from the temporary staging tensor.
*/
void record() override;
/**
* Does not perform any further sync functions. Frees the staging tensors together with their respective memory.
*/
void postSubmit() override;
private:
// Never owned resources
std::vector<std::shared_ptr<Tensor>> mStagingTensors;
};
} // End namespace kp
namespace kp {
/**
Operation that syncs tensor's local data by mapping the data from device memory into the local vector. For TensorTypes::eDevice it will use a staging tensor to perform the copy. For TensorTypes::eStaging it will only copy the data and perform a map, which will be executed during the postSubmit (there will be no copy during the sequence eval/submit). This function cannot be carried out for TensorTypes::eStaging.
*/
class OpTensorSyncLocal : public OpBase
{
public:
OpTensorSyncLocal();
/**
* Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensors provided cannot be of type TensorTypes::eStorage.
*
* @param physicalDevice Vulkan physical device used to find device queues
* @param device Vulkan logical device for passing to Algorithm
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that will be used to create in operation.
*/
OpTensorSyncLocal(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>> tensors);
/**
* Default destructor. This class manages the memory of the staging tensors it owns but these are released in the postSubmit, before it arrives to the destructor.
*/
~OpTensorSyncLocal() override;
/**
* Performs basic checks such as ensuring that there is at least one tensor provided, that they are initialized and that they are not of type TensorTpes::eStaging.
*/
void init() override;
/**
* For device tensors, it records the copy command into the staging tensor from the device tensor.
*/
void record() override;
/**
* For host tensors it performs the map command from the host memory into local memory. Frees the staging tensors together with their respective memory.
*/
void postSubmit() override;
private:
// Never owned resources
std::vector<std::shared_ptr<Tensor>> mStagingTensors;
};
} // End namespace kp

View file

@ -40,7 +40,7 @@ OpTensorCopy::init()
throw std::runtime_error("Kompute OpTensorCopy tensor parameter has not been initialized");
}
if (tensor->tensorType() == Tensor::TensorTypes::eStorage) {
throw std::runtime_error("Kompute OpTensorCopy tensor parameter is of type storage and hence cannot be used to receive or pass data.");
throw std::runtime_error("Kompute OpTensorCopy tensor parameter is of TensorTypes::eStorage and hence cannot be used to receive or pass data.");
}
}
}

View file

@ -23,13 +23,6 @@ OpTensorCreate::OpTensorCreate(
OpTensorCreate::~OpTensorCreate()
{
SPDLOG_DEBUG("Kompute OpTensorCreate destructor started");
SPDLOG_DEBUG("Kompute OpTensorCreate destroying staging tensors");
for (size_t i = 0; i < this->mStagingTensors.size(); i++) {
if (this->mStagingTensors[i]) {
this->mStagingTensors[i]->freeMemoryDestroyGPUResources();
}
}
}
void
@ -65,6 +58,8 @@ OpTensorCreate::init()
tensor->init(
this->mPhysicalDevice, this->mDevice);
tensor->mapDataIntoHostMemory();
// We push a nullptr when no staging tensor is needed to match
// index number in array to have one to one mapping with tensors
this->mStagingTensors.push_back(nullptr);
@ -80,9 +75,7 @@ OpTensorCreate::record()
for (size_t i = 0; i < this->mTensors.size(); i++) {
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
this->mTensors[i]->recordCopyFrom(this->mCommandBuffer, this->mStagingTensors[i], false);
} else if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eStaging) {
this->mTensors[i]->mapDataIntoHostMemory();
}
}
}
}
@ -91,13 +84,11 @@ OpTensorCreate::postSubmit()
{
SPDLOG_DEBUG("Kompute OpTensorCreate postSubmit called");
for (size_t i = 0; i < this->mTensors.size(); i++) {
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
this->mStagingTensors[i]->mapDataFromHostMemory();
this->mTensors[i]->setData(this->mStagingTensors[i]->data());
}
}
// TODO: Remove and add a test that checks that the memory in
// the staging tensor is actually storing the data
SPDLOG_DEBUG("Kompute OpTensorCreate destroying staging tensors");
// TODO: This would cause issues if there is no CPU barrier
this->mStagingTensors.clear();
}
}

View file

@ -1,4 +1,6 @@
#include "kompute/Tensor.hpp"
#include "kompute/operations/OpTensorSyncDevice.hpp"
namespace kp {
@ -21,8 +23,6 @@ OpTensorSyncDevice::OpTensorSyncDevice(
OpTensorSyncDevice::~OpTensorSyncDevice()
{
SPDLOG_DEBUG("Kompute OpTensorSyncDevice destructor started");
SPDLOG_DEBUG("Kompute OpTensorSyncDevice destroying staging tensors");
}
void
@ -30,17 +30,37 @@ OpTensorSyncDevice::init()
{
SPDLOG_DEBUG("Kompute OpTensorSyncDevice init called");
if (this->mTensors.size() < 2) {
if (this->mTensors.size() < 1) {
throw std::runtime_error(
"Kompute OpTensorSyncDevice called with less than 2 tensor");
"Kompute OpTensorSyncDevice called with less than 1 tensor");
}
for (std::shared_ptr<Tensor> tensor: this->mTensors) {
if (!tensor->isInit()) {
throw std::runtime_error("Kompute OpTensorSyncDevice tensor parameter has not been initialized");
if (tensor->isInit()) {
throw std::runtime_error("Kompute OpTensorSyncDevice: Tensor has already been initialized");
}
if (tensor->tensorType() == Tensor::TensorTypes::eStorage) {
throw std::runtime_error("Kompute OpTensorSyncDevice tensor parameter is of type storage and hence cannot be used to receive or pass data.");
throw std::runtime_error("Kompute OpTensorSyncLocal tensor parameter is of type TensorTypes::eStorage and hence cannot be used to receive or pass data.");
}
if (tensor->tensorType() == Tensor::TensorTypes::eDevice) {
std::shared_ptr<Tensor> stagingTensor = std::make_shared<Tensor>(
tensor->data(), Tensor::TensorTypes::eStaging);
stagingTensor->init(
this->mPhysicalDevice, this->mDevice);
stagingTensor->mapDataIntoHostMemory();
this->mStagingTensors.push_back(stagingTensor);
} else {
tensor->mapDataIntoHostMemory();
// We push a nullptr when no staging tensor is needed to match
// index number in array to have one to one mapping with tensors
this->mStagingTensors.push_back(nullptr);
}
}
}
@ -50,9 +70,10 @@ OpTensorSyncDevice::record()
{
SPDLOG_DEBUG("Kompute OpTensorSyncDevice record called");
// We iterate from the second tensor onwards and record a copy to all
for (size_t i = 1; i < this->mTensors.size(); i++) {
this->mTensors[i]->recordCopyFrom(this->mCommandBuffer, this->mTensors[0], false);
for (size_t i = 0; i < this->mTensors.size(); i++) {
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
this->mTensors[i]->recordCopyFrom(this->mCommandBuffer, this->mStagingTensors[i], false);
}
}
}
@ -61,11 +82,10 @@ OpTensorSyncDevice::postSubmit()
{
SPDLOG_DEBUG("Kompute OpTensorSyncDevice postSubmit called");
// Copy the data from the first tensor into all the tensors
for (size_t i = 1; i < this->mTensors.size(); i++) {
this->mTensors[i]->setData(this->mTensors[0]->data());
}
// Remove all staging tensors as they are not required after operation
SPDLOG_DEBUG("Kompute OpTensorSyncDevice destroying staging tensors");
// TODO: This would cause issues if there is no CPU barrier
this->mStagingTensors.clear();
}
}

97
src/OpTensorSyncLocal.cpp Normal file
View file

@ -0,0 +1,97 @@
#include "kompute/Tensor.hpp"
#include "kompute/operations/OpTensorSyncLocal.hpp"
namespace kp {
OpTensorSyncLocal::OpTensorSyncLocal()
{
SPDLOG_DEBUG("Kompute OpTensorSyncLocal constructor base");
}
OpTensorSyncLocal::OpTensorSyncLocal(
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>> tensors)
: OpBase(physicalDevice, device, commandBuffer, tensors, false)
{
SPDLOG_DEBUG("Kompute OpTensorSyncLocal constructor with params");
}
OpTensorSyncLocal::~OpTensorSyncLocal()
{
SPDLOG_DEBUG("Kompute OpTensorSyncLocal destructor started");
}
void
OpTensorSyncLocal::init()
{
SPDLOG_DEBUG("Kompute OpTensorSyncLocal init called");
if (this->mTensors.size() < 1) {
throw std::runtime_error(
"Kompute OpTensorSyncLocal called with less than 1 tensor");
}
for (std::shared_ptr<Tensor> tensor: this->mTensors) {
if (!tensor->isInit()) {
throw std::runtime_error("Kompute OpTensorSyncLocal: Tensor has not been initialized");
}
if (tensor->tensorType() == Tensor::TensorTypes::eStorage) {
throw std::runtime_error("Kompute OpTensorSyncLocal tensor parameter is of type TensorTypes::eStorage and hence cannot be used to receive or pass data.");
}
if (tensor->tensorType() == Tensor::TensorTypes::eDevice) {
std::shared_ptr<Tensor> stagingTensor = std::make_shared<Tensor>(
tensor->data(), Tensor::TensorTypes::eStaging);
stagingTensor->init(
this->mPhysicalDevice, this->mDevice);
this->mStagingTensors.push_back(stagingTensor);
} else {
// We push a nullptr when no staging tensor is needed to match
// index number in array to have one to one mapping with tensors
this->mStagingTensors.push_back(nullptr);
}
}
}
void
OpTensorSyncLocal::record()
{
SPDLOG_DEBUG("Kompute OpTensorSyncLocal record called");
for (size_t i = 0; i < this->mTensors.size(); i++) {
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
this->mStagingTensors[i]->recordCopyFrom(this->mCommandBuffer, this->mTensors[i], true);
}
}
}
void
OpTensorSyncLocal::postSubmit()
{
SPDLOG_DEBUG("Kompute OpTensorSyncLocal postSubmit called");
for (size_t i = 0; i < this->mTensors.size(); i++) {
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
this->mStagingTensors[i]->mapDataFromHostMemory();
this->mTensors[i]->setData(this->mStagingTensors[i]->data());
} else {
this->mTensors[i]->mapDataFromHostMemory();
}
}
// Remove all staging tensors as they are not required after operation
SPDLOG_DEBUG("Kompute OpTensorSyncLocal destroying staging tensors");
// TODO: This would cause issues if there is no CPU barrier
this->mStagingTensors.clear();
}
}

View file

@ -304,6 +304,7 @@ OpAlgoBase<tX, tY, tZ>::record()
}
// Record copy from and create barrier for STAGING tensors
// TODO: This only accounts for device tensors need to account for staging and storage
for (size_t i = 0; i < this->mTensors.size(); i++) {
this->mOutputStagingTensors[i]->recordCopyFrom(
this->mCommandBuffer,

View file

@ -42,15 +42,17 @@ class OpTensorCreate : public OpBase
/**
* In charge of initialising the primary Tensor as well as the staging
* tensor as required. It will only initialise a staging tensor if the
* Primary tensor is of type Device.
* Primary tensor is of type Device. For staging tensors it performs a
* mapDataIntoHostMemory which would perform immediately as opposed to
* on sequence eval/submission.
*/
void init() override;
/**
* Record runs the core actions to create the tensors. For device tensors
* it records a copyCommand to move the data from the staging tensor to the
* device tensor. For staging tensors it performs a mapDataIntoHostMemory
* which would perform immediately as opposed to on sequence eval/submission.
* device tensor. The mapping for staging tensors happens in the init function
* not in the record function.
*/
void record() override;

View file

@ -35,21 +35,23 @@ class OpTensorSyncDevice : public OpBase
~OpTensorSyncDevice() override;
/**
* Performs basic checks such as ensuring that there is at least one tensor provided, that they are initialized and that they are not of type TensorTpes::eStaging.
* Performs basic checks such as ensuring that there is at least one tensor provided, that they are initialized and that they are not of type TensorTpes::eStaging. For staging tensors in host memory, the map is performed during the init function.
*/
void init() override;
/**
* Records the copy commands from teh first tensor into all the other tensors provided. Also optionally records a barrier.
* For device tensors, it records the copy command to the device tensor from the temporary staging tensor.
*/
void record() override;
/**
* Copies the local vectors for all the tensors to sync the data with the gpu.
* Does not perform any further sync functions. Frees the staging tensors together with their respective memory.
*/
void postSubmit() override;
private:
// Never owned resources
std::vector<std::shared_ptr<Tensor>> mStagingTensors;
};
} // End namespace kp

View file

@ -0,0 +1,60 @@
#pragma once
#include "kompute/Core.hpp"
#include "kompute/Tensor.hpp"
#include "kompute/operations/OpBase.hpp"
namespace kp {
/**
Operation that syncs tensor's local data by mapping the data from device memory into the local vector. For TensorTypes::eDevice it will use a staging tensor to perform the copy. For TensorTypes::eStaging it will only copy the data and perform a map, which will be executed during the postSubmit (there will be no copy during the sequence eval/submit). This function cannot be carried out for TensorTypes::eStaging.
*/
class OpTensorSyncLocal : public OpBase
{
public:
OpTensorSyncLocal();
/**
* Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensors provided cannot be of type TensorTypes::eStorage.
*
* @param physicalDevice Vulkan physical device used to find device queues
* @param device Vulkan logical device for passing to Algorithm
* @param commandBuffer Vulkan Command Buffer to record commands into
* @param tensors Tensors that will be used to create in operation.
*/
OpTensorSyncLocal(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::vector<std::shared_ptr<Tensor>> tensors);
/**
* Default destructor. This class manages the memory of the staging tensors it owns but these are released in the postSubmit, before it arrives to the destructor.
*/
~OpTensorSyncLocal() override;
/**
* Performs basic checks such as ensuring that there is at least one tensor provided, that they are initialized and that they are not of type TensorTpes::eStaging.
*/
void init() override;
/**
* For device tensors, it records the copy command into the staging tensor from the device tensor.
*/
void record() override;
/**
* For host tensors it performs the map command from the host memory into local memory. Frees the staging tensors together with their respective memory.
*/
void postSubmit() override;
private:
// Never owned resources
std::vector<std::shared_ptr<Tensor>> mStagingTensors;
};
} // End namespace kp

View file

@ -8,25 +8,7 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensor) {
kp::Manager mgr;
std::vector<float> testVecA{ 9, 8, 7 };
std::shared_ptr<kp::Tensor> tensorA{new kp::Tensor(testVecA)};
mgr.evalOpDefault<kp::OpTensorCreate>({tensorA});
EXPECT_TRUE(tensorA->isInit());
EXPECT_EQ(tensorA->data(), testVecA);
tensorA->freeMemoryDestroyGPUResources();
EXPECT_FALSE(tensorA->isInit());
}
TEST(TestOpTensorCopy, CreateMultipleTensorSingleOp) {
kp::Manager mgr;
std::vector<float> testVecA{ 9, 8, 7 };
std::vector<float> testVecB{ 6, 5, 4 };
std::vector<float> testVecB{ 0, 0, 0 };
std::shared_ptr<kp::Tensor> tensorA{new kp::Tensor(testVecA)};
std::shared_ptr<kp::Tensor> tensorB{new kp::Tensor(testVecB)};
@ -36,76 +18,101 @@ TEST(TestOpTensorCopy, CreateMultipleTensorSingleOp) {
EXPECT_TRUE(tensorA->isInit());
EXPECT_TRUE(tensorB->isInit());
EXPECT_EQ(tensorA->data(), testVecA);
EXPECT_EQ(tensorB->data(), testVecB);
mgr.evalOpDefault<kp::OpTensorCopy>({tensorA, tensorB});
EXPECT_EQ(tensorA->data(), tensorB->data());
// Making sure the GPU holds the same data
mgr.evalOpDefault<kp::OpTensorSyncLocal>({tensorB});
EXPECT_EQ(tensorA->data(), tensorB->data());
}
TEST(TestOpTensorCopy, CreateMultipleTensorMultipleOp) {
TEST(TestOpTensorCopy, CopyDeviceToStagingTensor) {
kp::Manager mgr;
std::vector<float> testVecA{ 9, 8, 7 };
std::vector<float> testVecB{ 6, 5, 4 };
std::vector<float> testVecB{ 0, 0, 0 };
std::shared_ptr<kp::Tensor> tensorA{new kp::Tensor(testVecA)};
std::shared_ptr<kp::Tensor> tensorB{new kp::Tensor(testVecB)};
std::shared_ptr<kp::Tensor> tensorB{new kp::Tensor(testVecB, kp::Tensor::TensorTypes::eStaging)};
mgr.evalOpDefault<kp::OpTensorCreate>({tensorA});
mgr.evalOpDefault<kp::OpTensorCreate>({tensorB});
mgr.evalOpDefault<kp::OpTensorCreate>({tensorA, tensorB});
EXPECT_TRUE(tensorA->isInit());
EXPECT_TRUE(tensorB->isInit());
EXPECT_EQ(tensorA->data(), testVecA);
EXPECT_EQ(tensorB->data(), testVecB);
mgr.evalOpDefault<kp::OpTensorCopy>({tensorA, tensorB});
EXPECT_EQ(tensorA->data(), tensorB->data());
// Making sure the GPU holds the same data
mgr.evalOpDefault<kp::OpTensorSyncLocal>({tensorB});
EXPECT_EQ(tensorA->data(), tensorB->data());
}
TEST(TestOpTensorCopy, ManageTensorMemoryWhenOpTensorCreateDestroyed) {
std::vector<float> testVecA{ 9, 8, 7 };
std::vector<float> testVecB{ 6, 5, 4 };
std::shared_ptr<kp::Tensor> tensorA{new kp::Tensor(testVecA)};
std::shared_ptr<kp::Tensor> tensorB{new kp::Tensor(testVecB)};
{
kp::Manager mgr;
mgr.evalOpDefault<kp::OpTensorCreate>({tensorA});
mgr.evalOpDefault<kp::OpTensorCreate>({tensorB});
EXPECT_TRUE(tensorA->isInit());
EXPECT_TRUE(tensorB->isInit());
EXPECT_EQ(tensorA->data(), testVecA);
EXPECT_EQ(tensorB->data(), testVecB);
}
EXPECT_FALSE(tensorA->isInit());
EXPECT_FALSE(tensorB->isInit());
}
TEST(TestOpTensorCopy, NoErrorIfTensorFreedBefore) {
std::vector<float> testVecA{ 9, 8, 7 };
std::vector<float> testVecB{ 6, 5, 4 };
std::shared_ptr<kp::Tensor> tensorA{new kp::Tensor(testVecA)};
std::shared_ptr<kp::Tensor> tensorB{new kp::Tensor(testVecB)};
TEST(TestOpTensorCopy, CopyStagingToDeviceTensor) {
kp::Manager mgr;
mgr.evalOpDefault<kp::OpTensorCreate>({tensorA});
mgr.evalOpDefault<kp::OpTensorCreate>({tensorB});
std::vector<float> testVecA{ 9, 8, 7 };
std::vector<float> testVecB{ 0, 0, 0 };
std::shared_ptr<kp::Tensor> tensorA{new kp::Tensor(testVecA, kp::Tensor::TensorTypes::eStaging)};
std::shared_ptr<kp::Tensor> tensorB{new kp::Tensor(testVecB)};
mgr.evalOpDefault<kp::OpTensorCreate>({tensorA, tensorB});
EXPECT_TRUE(tensorA->isInit());
EXPECT_TRUE(tensorB->isInit());
EXPECT_EQ(tensorA->data(), testVecA);
EXPECT_EQ(tensorB->data(), testVecB);
mgr.evalOpDefault<kp::OpTensorCopy>({tensorA, tensorB});
tensorA->freeMemoryDestroyGPUResources();
tensorB->freeMemoryDestroyGPUResources();
EXPECT_FALSE(tensorA->isInit());
EXPECT_FALSE(tensorB->isInit());
EXPECT_EQ(tensorA->data(), tensorB->data());
// Making sure the GPU holds the same data
mgr.evalOpDefault<kp::OpTensorSyncLocal>({tensorB});
EXPECT_EQ(tensorA->data(), tensorB->data());
}
TEST(TestOpTensorCopy, CopyStagingToStagingTensor) {
kp::Manager mgr;
std::vector<float> testVecA{ 9, 8, 7 };
std::vector<float> testVecB{ 0, 0, 0 };
std::shared_ptr<kp::Tensor> tensorA{new kp::Tensor(testVecA, kp::Tensor::TensorTypes::eStaging)};
std::shared_ptr<kp::Tensor> tensorB{new kp::Tensor(testVecB, kp::Tensor::TensorTypes::eStaging)};
mgr.evalOpDefault<kp::OpTensorCreate>({tensorA, tensorB});
EXPECT_TRUE(tensorA->isInit());
EXPECT_TRUE(tensorB->isInit());
mgr.evalOpDefault<kp::OpTensorCopy>({tensorA, tensorB});
EXPECT_EQ(tensorA->data(), tensorB->data());
// Making sure the GPU holds the same data
mgr.evalOpDefault<kp::OpTensorSyncLocal>({tensorB});
EXPECT_EQ(tensorA->data(), tensorB->data());
}
TEST(TestOpTensorCopy, SingleTensorShouldFail) {
kp::Manager mgr;
std::vector<float> testVecA{ 9, 8, 7 };
std::shared_ptr<kp::Tensor> tensorA{new kp::Tensor(testVecA, kp::Tensor::TensorTypes::eStaging)};
mgr.evalOpDefault<kp::OpTensorCreate>({tensorA});
EXPECT_TRUE(tensorA->isInit());
EXPECT_THROW(
mgr.evalOpDefault<kp::OpTensorCopy>({tensorA}),
std::runtime_error);
}

View file

@ -18,7 +18,7 @@ TEST(TestTensor, CopyFromHostData) {
vecA,
kp::Tensor::TensorTypes::eStaging);
std::shared_ptr<kp::Tensor> tensorB = std::make_shared<kp::Tensor>(
vecA,
vecB,
kp::Tensor::TensorTypes::eStaging);
kp::Manager mgr;
@ -35,8 +35,6 @@ TEST(TestTensor, CopyFromHostData) {
sq->end();
sq->eval();
tensorB->mapDataFromHostMemory();
}
EXPECT_EQ(tensorA->data(), tensorB->data());