Added optensorSyncDevice and optensorcopy tests
This commit is contained in:
parent
ec89fc6d56
commit
1b4e4b6b18
12 changed files with 398 additions and 116 deletions
|
|
@ -8,5 +8,7 @@
|
|||
#include "kompute/operations/OpMult.hpp"
|
||||
#include "kompute/operations/OpTensorCreate.hpp"
|
||||
#include "kompute/operations/OpTensorCopy.hpp"
|
||||
#include "kompute/operations/OpTensorSyncDevice.hpp"
|
||||
#include "kompute/operations/OpTensorSyncLocal.hpp"
|
||||
#include "kompute/Algorithm.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
|
|
|||
|
|
@ -1150,6 +1150,7 @@ OpAlgoBase<tX, tY, tZ>::record()
|
|||
}
|
||||
|
||||
// Record copy from and create barrier for STAGING tensors
|
||||
// TODO: This only accounts for device tensors need to account for staging and storage
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
this->mOutputStagingTensors[i]->recordCopyFrom(
|
||||
this->mCommandBuffer,
|
||||
|
|
@ -1539,15 +1540,17 @@ class OpTensorCreate : public OpBase
|
|||
/**
|
||||
* In charge of initialising the primary Tensor as well as the staging
|
||||
* tensor as required. It will only initialise a staging tensor if the
|
||||
* Primary tensor is of type Device.
|
||||
* Primary tensor is of type Device. For staging tensors it performs a
|
||||
* mapDataIntoHostMemory which would perform immediately as opposed to
|
||||
* on sequence eval/submission.
|
||||
*/
|
||||
void init() override;
|
||||
|
||||
/**
|
||||
* Record runs the core actions to create the tensors. For device tensors
|
||||
* it records a copyCommand to move the data from the staging tensor to the
|
||||
* device tensor. For staging tensors it performs a mapDataIntoHostMemory
|
||||
* which would perform immediately as opposed to on sequence eval/submission.
|
||||
* device tensor. The mapping for staging tensors happens in the init function
|
||||
* not in the record function.
|
||||
*/
|
||||
void record() override;
|
||||
|
||||
|
|
@ -1567,7 +1570,7 @@ class OpTensorCreate : public OpBase
|
|||
namespace kp {
|
||||
|
||||
/**
|
||||
Operation that copies the data from the first tensor to the rest of the tensors provided, using a record command for all the vectors. This operation does not own/manage the memory of the tensors passed to it.
|
||||
Operation that copies the data from the first tensor to the rest of the tensors provided, using a record command for all the vectors. This operation does not own/manage the memory of the tensors passed to it. The operation must only receive tensors of type
|
||||
*/
|
||||
class OpTensorCopy : public OpBase
|
||||
{
|
||||
|
|
@ -1588,13 +1591,12 @@ class OpTensorCopy : public OpBase
|
|||
std::vector<std::shared_ptr<Tensor>> tensors);
|
||||
|
||||
/**
|
||||
* Default destructor which in this case expects the parent class to free
|
||||
* the tensors
|
||||
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
|
||||
*/
|
||||
~OpTensorCopy() override;
|
||||
|
||||
/**
|
||||
* TODO
|
||||
* Performs basic checks such as ensuring there are at least two tensors provided, that they are initialised and that they are not of type TensorTypes::eStorage.
|
||||
*/
|
||||
void init() override;
|
||||
|
||||
|
|
@ -1612,3 +1614,103 @@ class OpTensorCopy : public OpBase
|
|||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a staging tensor to perform the copy. For TensorTypes::eStaging it will only copy the data and perform a map, which will be executed during the record (as opposed to during the sequence eval/submit). This function cannot be carried out for TensorTypes::eStaging.
|
||||
*/
|
||||
class OpTensorSyncDevice : public OpBase
|
||||
{
|
||||
public:
|
||||
OpTensorSyncDevice();
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensos provided cannot be of type TensorTypes::eStorage.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device used to find device queues
|
||||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that will be used to create in operation.
|
||||
*/
|
||||
OpTensorSyncDevice(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors);
|
||||
|
||||
/**
|
||||
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
|
||||
*/
|
||||
~OpTensorSyncDevice() override;
|
||||
|
||||
/**
|
||||
* Performs basic checks such as ensuring that there is at least one tensor provided, that they are initialized and that they are not of type TensorTpes::eStaging. For staging tensors in host memory, the map is performed during the init function.
|
||||
*/
|
||||
void init() override;
|
||||
|
||||
/**
|
||||
* For device tensors, it records the copy command to the device tensor from the temporary staging tensor.
|
||||
*/
|
||||
void record() override;
|
||||
|
||||
/**
|
||||
* Does not perform any further sync functions. Frees the staging tensors together with their respective memory.
|
||||
*/
|
||||
void postSubmit() override;
|
||||
|
||||
private:
|
||||
// Never owned resources
|
||||
std::vector<std::shared_ptr<Tensor>> mStagingTensors;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
Operation that syncs tensor's local data by mapping the data from device memory into the local vector. For TensorTypes::eDevice it will use a staging tensor to perform the copy. For TensorTypes::eStaging it will only copy the data and perform a map, which will be executed during the postSubmit (there will be no copy during the sequence eval/submit). This function cannot be carried out for TensorTypes::eStaging.
|
||||
*/
|
||||
class OpTensorSyncLocal : public OpBase
|
||||
{
|
||||
public:
|
||||
OpTensorSyncLocal();
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensors provided cannot be of type TensorTypes::eStorage.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device used to find device queues
|
||||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that will be used to create in operation.
|
||||
*/
|
||||
OpTensorSyncLocal(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors);
|
||||
|
||||
/**
|
||||
* Default destructor. This class manages the memory of the staging tensors it owns but these are released in the postSubmit, before it arrives to the destructor.
|
||||
*/
|
||||
~OpTensorSyncLocal() override;
|
||||
|
||||
/**
|
||||
* Performs basic checks such as ensuring that there is at least one tensor provided, that they are initialized and that they are not of type TensorTpes::eStaging.
|
||||
*/
|
||||
void init() override;
|
||||
|
||||
/**
|
||||
* For device tensors, it records the copy command into the staging tensor from the device tensor.
|
||||
*/
|
||||
void record() override;
|
||||
|
||||
/**
|
||||
* For host tensors it performs the map command from the host memory into local memory. Frees the staging tensors together with their respective memory.
|
||||
*/
|
||||
void postSubmit() override;
|
||||
|
||||
private:
|
||||
// Never owned resources
|
||||
std::vector<std::shared_ptr<Tensor>> mStagingTensors;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ OpTensorCopy::init()
|
|||
throw std::runtime_error("Kompute OpTensorCopy tensor parameter has not been initialized");
|
||||
}
|
||||
if (tensor->tensorType() == Tensor::TensorTypes::eStorage) {
|
||||
throw std::runtime_error("Kompute OpTensorCopy tensor parameter is of type storage and hence cannot be used to receive or pass data.");
|
||||
throw std::runtime_error("Kompute OpTensorCopy tensor parameter is of TensorTypes::eStorage and hence cannot be used to receive or pass data.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,13 +23,6 @@ OpTensorCreate::OpTensorCreate(
|
|||
OpTensorCreate::~OpTensorCreate()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpTensorCreate destructor started");
|
||||
|
||||
SPDLOG_DEBUG("Kompute OpTensorCreate destroying staging tensors");
|
||||
for (size_t i = 0; i < this->mStagingTensors.size(); i++) {
|
||||
if (this->mStagingTensors[i]) {
|
||||
this->mStagingTensors[i]->freeMemoryDestroyGPUResources();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -65,6 +58,8 @@ OpTensorCreate::init()
|
|||
tensor->init(
|
||||
this->mPhysicalDevice, this->mDevice);
|
||||
|
||||
tensor->mapDataIntoHostMemory();
|
||||
|
||||
// We push a nullptr when no staging tensor is needed to match
|
||||
// index number in array to have one to one mapping with tensors
|
||||
this->mStagingTensors.push_back(nullptr);
|
||||
|
|
@ -80,9 +75,7 @@ OpTensorCreate::record()
|
|||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
|
||||
this->mTensors[i]->recordCopyFrom(this->mCommandBuffer, this->mStagingTensors[i], false);
|
||||
} else if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eStaging) {
|
||||
this->mTensors[i]->mapDataIntoHostMemory();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -91,13 +84,11 @@ OpTensorCreate::postSubmit()
|
|||
{
|
||||
SPDLOG_DEBUG("Kompute OpTensorCreate postSubmit called");
|
||||
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
|
||||
this->mStagingTensors[i]->mapDataFromHostMemory();
|
||||
|
||||
this->mTensors[i]->setData(this->mStagingTensors[i]->data());
|
||||
}
|
||||
}
|
||||
// TODO: Remove and add a test that checks that the memory in
|
||||
// the staging tensor is actually storing the data
|
||||
SPDLOG_DEBUG("Kompute OpTensorCreate destroying staging tensors");
|
||||
// TODO: This would cause issues if there is no CPU barrier
|
||||
this->mStagingTensors.clear();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
||||
#include "kompute/operations/OpTensorSyncDevice.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
|
@ -21,8 +23,6 @@ OpTensorSyncDevice::OpTensorSyncDevice(
|
|||
OpTensorSyncDevice::~OpTensorSyncDevice()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpTensorSyncDevice destructor started");
|
||||
|
||||
SPDLOG_DEBUG("Kompute OpTensorSyncDevice destroying staging tensors");
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -30,17 +30,37 @@ OpTensorSyncDevice::init()
|
|||
{
|
||||
SPDLOG_DEBUG("Kompute OpTensorSyncDevice init called");
|
||||
|
||||
if (this->mTensors.size() < 2) {
|
||||
if (this->mTensors.size() < 1) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpTensorSyncDevice called with less than 2 tensor");
|
||||
"Kompute OpTensorSyncDevice called with less than 1 tensor");
|
||||
}
|
||||
|
||||
for (std::shared_ptr<Tensor> tensor: this->mTensors) {
|
||||
if (!tensor->isInit()) {
|
||||
throw std::runtime_error("Kompute OpTensorSyncDevice tensor parameter has not been initialized");
|
||||
if (tensor->isInit()) {
|
||||
throw std::runtime_error("Kompute OpTensorSyncDevice: Tensor has already been initialized");
|
||||
}
|
||||
if (tensor->tensorType() == Tensor::TensorTypes::eStorage) {
|
||||
throw std::runtime_error("Kompute OpTensorSyncDevice tensor parameter is of type storage and hence cannot be used to receive or pass data.");
|
||||
throw std::runtime_error("Kompute OpTensorSyncLocal tensor parameter is of type TensorTypes::eStorage and hence cannot be used to receive or pass data.");
|
||||
}
|
||||
if (tensor->tensorType() == Tensor::TensorTypes::eDevice) {
|
||||
|
||||
std::shared_ptr<Tensor> stagingTensor = std::make_shared<Tensor>(
|
||||
tensor->data(), Tensor::TensorTypes::eStaging);
|
||||
|
||||
stagingTensor->init(
|
||||
this->mPhysicalDevice, this->mDevice);
|
||||
|
||||
stagingTensor->mapDataIntoHostMemory();
|
||||
|
||||
this->mStagingTensors.push_back(stagingTensor);
|
||||
|
||||
} else {
|
||||
|
||||
tensor->mapDataIntoHostMemory();
|
||||
|
||||
// We push a nullptr when no staging tensor is needed to match
|
||||
// index number in array to have one to one mapping with tensors
|
||||
this->mStagingTensors.push_back(nullptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -50,9 +70,10 @@ OpTensorSyncDevice::record()
|
|||
{
|
||||
SPDLOG_DEBUG("Kompute OpTensorSyncDevice record called");
|
||||
|
||||
// We iterate from the second tensor onwards and record a copy to all
|
||||
for (size_t i = 1; i < this->mTensors.size(); i++) {
|
||||
this->mTensors[i]->recordCopyFrom(this->mCommandBuffer, this->mTensors[0], false);
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
|
||||
this->mTensors[i]->recordCopyFrom(this->mCommandBuffer, this->mStagingTensors[i], false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -61,11 +82,10 @@ OpTensorSyncDevice::postSubmit()
|
|||
{
|
||||
SPDLOG_DEBUG("Kompute OpTensorSyncDevice postSubmit called");
|
||||
|
||||
// Copy the data from the first tensor into all the tensors
|
||||
for (size_t i = 1; i < this->mTensors.size(); i++) {
|
||||
this->mTensors[i]->setData(this->mTensors[0]->data());
|
||||
}
|
||||
// Remove all staging tensors as they are not required after operation
|
||||
SPDLOG_DEBUG("Kompute OpTensorSyncDevice destroying staging tensors");
|
||||
// TODO: This would cause issues if there is no CPU barrier
|
||||
this->mStagingTensors.clear();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
|
|||
97
src/OpTensorSyncLocal.cpp
Normal file
97
src/OpTensorSyncLocal.cpp
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
||||
#include "kompute/operations/OpTensorSyncLocal.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
OpTensorSyncLocal::OpTensorSyncLocal()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpTensorSyncLocal constructor base");
|
||||
}
|
||||
|
||||
OpTensorSyncLocal::OpTensorSyncLocal(
|
||||
std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors)
|
||||
: OpBase(physicalDevice, device, commandBuffer, tensors, false)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpTensorSyncLocal constructor with params");
|
||||
}
|
||||
|
||||
OpTensorSyncLocal::~OpTensorSyncLocal()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpTensorSyncLocal destructor started");
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorSyncLocal::init()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpTensorSyncLocal init called");
|
||||
|
||||
if (this->mTensors.size() < 1) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpTensorSyncLocal called with less than 1 tensor");
|
||||
}
|
||||
|
||||
for (std::shared_ptr<Tensor> tensor: this->mTensors) {
|
||||
if (!tensor->isInit()) {
|
||||
throw std::runtime_error("Kompute OpTensorSyncLocal: Tensor has not been initialized");
|
||||
}
|
||||
if (tensor->tensorType() == Tensor::TensorTypes::eStorage) {
|
||||
throw std::runtime_error("Kompute OpTensorSyncLocal tensor parameter is of type TensorTypes::eStorage and hence cannot be used to receive or pass data.");
|
||||
}
|
||||
if (tensor->tensorType() == Tensor::TensorTypes::eDevice) {
|
||||
|
||||
std::shared_ptr<Tensor> stagingTensor = std::make_shared<Tensor>(
|
||||
tensor->data(), Tensor::TensorTypes::eStaging);
|
||||
|
||||
stagingTensor->init(
|
||||
this->mPhysicalDevice, this->mDevice);
|
||||
|
||||
this->mStagingTensors.push_back(stagingTensor);
|
||||
|
||||
} else {
|
||||
|
||||
// We push a nullptr when no staging tensor is needed to match
|
||||
// index number in array to have one to one mapping with tensors
|
||||
this->mStagingTensors.push_back(nullptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorSyncLocal::record()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpTensorSyncLocal record called");
|
||||
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
|
||||
this->mStagingTensors[i]->recordCopyFrom(this->mCommandBuffer, this->mTensors[i], true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorSyncLocal::postSubmit()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpTensorSyncLocal postSubmit called");
|
||||
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
|
||||
this->mStagingTensors[i]->mapDataFromHostMemory();
|
||||
this->mTensors[i]->setData(this->mStagingTensors[i]->data());
|
||||
} else {
|
||||
this->mTensors[i]->mapDataFromHostMemory();
|
||||
}
|
||||
}
|
||||
|
||||
// Remove all staging tensors as they are not required after operation
|
||||
SPDLOG_DEBUG("Kompute OpTensorSyncLocal destroying staging tensors");
|
||||
// TODO: This would cause issues if there is no CPU barrier
|
||||
this->mStagingTensors.clear();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -304,6 +304,7 @@ OpAlgoBase<tX, tY, tZ>::record()
|
|||
}
|
||||
|
||||
// Record copy from and create barrier for STAGING tensors
|
||||
// TODO: This only accounts for device tensors need to account for staging and storage
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
this->mOutputStagingTensors[i]->recordCopyFrom(
|
||||
this->mCommandBuffer,
|
||||
|
|
|
|||
|
|
@ -42,15 +42,17 @@ class OpTensorCreate : public OpBase
|
|||
/**
|
||||
* In charge of initialising the primary Tensor as well as the staging
|
||||
* tensor as required. It will only initialise a staging tensor if the
|
||||
* Primary tensor is of type Device.
|
||||
* Primary tensor is of type Device. For staging tensors it performs a
|
||||
* mapDataIntoHostMemory which would perform immediately as opposed to
|
||||
* on sequence eval/submission.
|
||||
*/
|
||||
void init() override;
|
||||
|
||||
/**
|
||||
* Record runs the core actions to create the tensors. For device tensors
|
||||
* it records a copyCommand to move the data from the staging tensor to the
|
||||
* device tensor. For staging tensors it performs a mapDataIntoHostMemory
|
||||
* which would perform immediately as opposed to on sequence eval/submission.
|
||||
* device tensor. The mapping for staging tensors happens in the init function
|
||||
* not in the record function.
|
||||
*/
|
||||
void record() override;
|
||||
|
||||
|
|
|
|||
|
|
@ -35,21 +35,23 @@ class OpTensorSyncDevice : public OpBase
|
|||
~OpTensorSyncDevice() override;
|
||||
|
||||
/**
|
||||
* Performs basic checks such as ensuring that there is at least one tensor provided, that they are initialized and that they are not of type TensorTpes::eStaging.
|
||||
* Performs basic checks such as ensuring that there is at least one tensor provided, that they are initialized and that they are not of type TensorTpes::eStaging. For staging tensors in host memory, the map is performed during the init function.
|
||||
*/
|
||||
void init() override;
|
||||
|
||||
/**
|
||||
* Records the copy commands from teh first tensor into all the other tensors provided. Also optionally records a barrier.
|
||||
* For device tensors, it records the copy command to the device tensor from the temporary staging tensor.
|
||||
*/
|
||||
void record() override;
|
||||
|
||||
/**
|
||||
* Copies the local vectors for all the tensors to sync the data with the gpu.
|
||||
* Does not perform any further sync functions. Frees the staging tensors together with their respective memory.
|
||||
*/
|
||||
void postSubmit() override;
|
||||
|
||||
private:
|
||||
// Never owned resources
|
||||
std::vector<std::shared_ptr<Tensor>> mStagingTensors;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
|
|
|||
60
src/include/kompute/operations/OpTensorSyncLocal.hpp
Normal file
60
src/include/kompute/operations/OpTensorSyncLocal.hpp
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
#pragma once
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
Operation that syncs tensor's local data by mapping the data from device memory into the local vector. For TensorTypes::eDevice it will use a staging tensor to perform the copy. For TensorTypes::eStaging it will only copy the data and perform a map, which will be executed during the postSubmit (there will be no copy during the sequence eval/submit). This function cannot be carried out for TensorTypes::eStaging.
|
||||
*/
|
||||
class OpTensorSyncLocal : public OpBase
|
||||
{
|
||||
public:
|
||||
OpTensorSyncLocal();
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensors provided cannot be of type TensorTypes::eStorage.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device used to find device queues
|
||||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that will be used to create in operation.
|
||||
*/
|
||||
OpTensorSyncLocal(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>> tensors);
|
||||
|
||||
/**
|
||||
* Default destructor. This class manages the memory of the staging tensors it owns but these are released in the postSubmit, before it arrives to the destructor.
|
||||
*/
|
||||
~OpTensorSyncLocal() override;
|
||||
|
||||
/**
|
||||
* Performs basic checks such as ensuring that there is at least one tensor provided, that they are initialized and that they are not of type TensorTpes::eStaging.
|
||||
*/
|
||||
void init() override;
|
||||
|
||||
/**
|
||||
* For device tensors, it records the copy command into the staging tensor from the device tensor.
|
||||
*/
|
||||
void record() override;
|
||||
|
||||
/**
|
||||
* For host tensors it performs the map command from the host memory into local memory. Frees the staging tensors together with their respective memory.
|
||||
*/
|
||||
void postSubmit() override;
|
||||
|
||||
private:
|
||||
// Never owned resources
|
||||
std::vector<std::shared_ptr<Tensor>> mStagingTensors;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
||||
|
||||
|
||||
|
|
@ -8,25 +8,7 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensor) {
|
|||
kp::Manager mgr;
|
||||
|
||||
std::vector<float> testVecA{ 9, 8, 7 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{new kp::Tensor(testVecA)};
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({tensorA});
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
|
||||
EXPECT_EQ(tensorA->data(), testVecA);
|
||||
|
||||
tensorA->freeMemoryDestroyGPUResources();
|
||||
EXPECT_FALSE(tensorA->isInit());
|
||||
}
|
||||
|
||||
TEST(TestOpTensorCopy, CreateMultipleTensorSingleOp) {
|
||||
|
||||
kp::Manager mgr;
|
||||
|
||||
std::vector<float> testVecA{ 9, 8, 7 };
|
||||
std::vector<float> testVecB{ 6, 5, 4 };
|
||||
std::vector<float> testVecB{ 0, 0, 0 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{new kp::Tensor(testVecA)};
|
||||
std::shared_ptr<kp::Tensor> tensorB{new kp::Tensor(testVecB)};
|
||||
|
|
@ -36,76 +18,101 @@ TEST(TestOpTensorCopy, CreateMultipleTensorSingleOp) {
|
|||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
|
||||
EXPECT_EQ(tensorA->data(), testVecA);
|
||||
EXPECT_EQ(tensorB->data(), testVecB);
|
||||
mgr.evalOpDefault<kp::OpTensorCopy>({tensorA, tensorB});
|
||||
|
||||
EXPECT_EQ(tensorA->data(), tensorB->data());
|
||||
|
||||
// Making sure the GPU holds the same data
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({tensorB});
|
||||
EXPECT_EQ(tensorA->data(), tensorB->data());
|
||||
}
|
||||
|
||||
TEST(TestOpTensorCopy, CreateMultipleTensorMultipleOp) {
|
||||
TEST(TestOpTensorCopy, CopyDeviceToStagingTensor) {
|
||||
|
||||
kp::Manager mgr;
|
||||
|
||||
std::vector<float> testVecA{ 9, 8, 7 };
|
||||
std::vector<float> testVecB{ 6, 5, 4 };
|
||||
std::vector<float> testVecB{ 0, 0, 0 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{new kp::Tensor(testVecA)};
|
||||
std::shared_ptr<kp::Tensor> tensorB{new kp::Tensor(testVecB)};
|
||||
std::shared_ptr<kp::Tensor> tensorB{new kp::Tensor(testVecB, kp::Tensor::TensorTypes::eStaging)};
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({tensorA});
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({tensorB});
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({tensorA, tensorB});
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
|
||||
EXPECT_EQ(tensorA->data(), testVecA);
|
||||
EXPECT_EQ(tensorB->data(), testVecB);
|
||||
mgr.evalOpDefault<kp::OpTensorCopy>({tensorA, tensorB});
|
||||
|
||||
EXPECT_EQ(tensorA->data(), tensorB->data());
|
||||
|
||||
// Making sure the GPU holds the same data
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({tensorB});
|
||||
EXPECT_EQ(tensorA->data(), tensorB->data());
|
||||
}
|
||||
|
||||
TEST(TestOpTensorCopy, ManageTensorMemoryWhenOpTensorCreateDestroyed) {
|
||||
|
||||
std::vector<float> testVecA{ 9, 8, 7 };
|
||||
std::vector<float> testVecB{ 6, 5, 4 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{new kp::Tensor(testVecA)};
|
||||
std::shared_ptr<kp::Tensor> tensorB{new kp::Tensor(testVecB)};
|
||||
|
||||
{
|
||||
kp::Manager mgr;
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({tensorA});
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({tensorB});
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
|
||||
EXPECT_EQ(tensorA->data(), testVecA);
|
||||
EXPECT_EQ(tensorB->data(), testVecB);
|
||||
}
|
||||
|
||||
EXPECT_FALSE(tensorA->isInit());
|
||||
EXPECT_FALSE(tensorB->isInit());
|
||||
}
|
||||
|
||||
TEST(TestOpTensorCopy, NoErrorIfTensorFreedBefore) {
|
||||
|
||||
std::vector<float> testVecA{ 9, 8, 7 };
|
||||
std::vector<float> testVecB{ 6, 5, 4 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{new kp::Tensor(testVecA)};
|
||||
std::shared_ptr<kp::Tensor> tensorB{new kp::Tensor(testVecB)};
|
||||
TEST(TestOpTensorCopy, CopyStagingToDeviceTensor) {
|
||||
|
||||
kp::Manager mgr;
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({tensorA});
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({tensorB});
|
||||
std::vector<float> testVecA{ 9, 8, 7 };
|
||||
std::vector<float> testVecB{ 0, 0, 0 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{new kp::Tensor(testVecA, kp::Tensor::TensorTypes::eStaging)};
|
||||
std::shared_ptr<kp::Tensor> tensorB{new kp::Tensor(testVecB)};
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({tensorA, tensorB});
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
|
||||
EXPECT_EQ(tensorA->data(), testVecA);
|
||||
EXPECT_EQ(tensorB->data(), testVecB);
|
||||
mgr.evalOpDefault<kp::OpTensorCopy>({tensorA, tensorB});
|
||||
|
||||
tensorA->freeMemoryDestroyGPUResources();
|
||||
tensorB->freeMemoryDestroyGPUResources();
|
||||
EXPECT_FALSE(tensorA->isInit());
|
||||
EXPECT_FALSE(tensorB->isInit());
|
||||
EXPECT_EQ(tensorA->data(), tensorB->data());
|
||||
|
||||
// Making sure the GPU holds the same data
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({tensorB});
|
||||
EXPECT_EQ(tensorA->data(), tensorB->data());
|
||||
}
|
||||
|
||||
TEST(TestOpTensorCopy, CopyStagingToStagingTensor) {
|
||||
|
||||
kp::Manager mgr;
|
||||
|
||||
std::vector<float> testVecA{ 9, 8, 7 };
|
||||
std::vector<float> testVecB{ 0, 0, 0 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{new kp::Tensor(testVecA, kp::Tensor::TensorTypes::eStaging)};
|
||||
std::shared_ptr<kp::Tensor> tensorB{new kp::Tensor(testVecB, kp::Tensor::TensorTypes::eStaging)};
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({tensorA, tensorB});
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
EXPECT_TRUE(tensorB->isInit());
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCopy>({tensorA, tensorB});
|
||||
|
||||
EXPECT_EQ(tensorA->data(), tensorB->data());
|
||||
|
||||
// Making sure the GPU holds the same data
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({tensorB});
|
||||
EXPECT_EQ(tensorA->data(), tensorB->data());
|
||||
}
|
||||
|
||||
TEST(TestOpTensorCopy, SingleTensorShouldFail) {
|
||||
|
||||
kp::Manager mgr;
|
||||
|
||||
std::vector<float> testVecA{ 9, 8, 7 };
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorA{new kp::Tensor(testVecA, kp::Tensor::TensorTypes::eStaging)};
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorCreate>({tensorA});
|
||||
|
||||
EXPECT_TRUE(tensorA->isInit());
|
||||
|
||||
EXPECT_THROW(
|
||||
mgr.evalOpDefault<kp::OpTensorCopy>({tensorA}),
|
||||
std::runtime_error);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ TEST(TestTensor, CopyFromHostData) {
|
|||
vecA,
|
||||
kp::Tensor::TensorTypes::eStaging);
|
||||
std::shared_ptr<kp::Tensor> tensorB = std::make_shared<kp::Tensor>(
|
||||
vecA,
|
||||
vecB,
|
||||
kp::Tensor::TensorTypes::eStaging);
|
||||
|
||||
kp::Manager mgr;
|
||||
|
|
@ -35,8 +35,6 @@ TEST(TestTensor, CopyFromHostData) {
|
|||
sq->end();
|
||||
|
||||
sq->eval();
|
||||
|
||||
tensorB->mapDataFromHostMemory();
|
||||
}
|
||||
|
||||
EXPECT_EQ(tensorA->data(), tensorB->data());
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue