Merge pull request #137 from EthicalML/revamp_memory_tensor_mgmt

Removed Staging Tensors in favour of having two buffer & memory in a Tensor to minimise data transfer
This commit is contained in:
Alejandro Saucedo 2021-02-09 07:55:10 +00:00 committed by GitHub
commit 698883992f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
19 changed files with 361 additions and 238 deletions

View file

@ -513,10 +513,10 @@ function not in the record function.)doc";
static const char *__doc_kp_OpTensorSyncDevice =
R"doc(Operation that syncs tensor's device by mapping local data into the
device memory. For TensorTypes::eDevice it will use a staging tensor
to perform the copy. For TensorTypes::eStaging it will only copy the
to perform the copy. For TensorTypes::eHost it will only copy the
data and perform a map, which will be executed during the record (as
opposed to during the sequence eval/submit). This function cannot be
carried out for TensorTypes::eStaging.)doc";
carried out for TensorTypes::eHost.)doc";
static const char *__doc_kp_OpTensorSyncDevice_OpTensorSyncDevice = R"doc()doc";
@ -533,7 +533,7 @@ queues @param device Vulkan logical device for passing to Algorithm
static const char *__doc_kp_OpTensorSyncDevice_init =
R"doc(Performs basic checks such as ensuring that there is at least one
tensor provided, that they are initialized and that they are not of
type TensorTpes::eStaging. For staging tensors in host memory, the map
type TensorTpes::eHost. For staging tensors in host memory, the map
is performed during the init function.)doc";
static const char *__doc_kp_OpTensorSyncDevice_mStagingTensors = R"doc()doc";
@ -549,11 +549,11 @@ from the temporary staging tensor.)doc";
static const char *__doc_kp_OpTensorSyncLocal =
R"doc(Operation that syncs tensor's local data by mapping the data from
device memory into the local vector. For TensorTypes::eDevice it will
use a staging tensor to perform the copy. For TensorTypes::eStaging it
use a staging tensor to perform the copy. For TensorTypes::eHost it
will only copy the data and perform a map, which will be executed
during the postSubmit (there will be no copy during the sequence
eval/submit). This function cannot be carried out for
TensorTypes::eStaging.)doc";
TensorTypes::eHost.)doc";
static const char *__doc_kp_OpTensorSyncLocal_OpTensorSyncLocal = R"doc()doc";
@ -570,7 +570,7 @@ queues @param device Vulkan logical device for passing to Algorithm
static const char *__doc_kp_OpTensorSyncLocal_init =
R"doc(Performs basic checks such as ensuring that there is at least one
tensor provided, that they are initialized and that they are not of
type TensorTpes::eStaging.)doc";
type TensorTpes::eHost.)doc";
static const char *__doc_kp_OpTensorSyncLocal_mStagingTensors = R"doc()doc";
@ -719,7 +719,7 @@ shader storage).)doc";
static const char *__doc_kp_Tensor_TensorTypes_eDevice = R"doc(< Type is device memory, source and destination)doc";
static const char *__doc_kp_Tensor_TensorTypes_eStaging = R"doc(< Type is host memory, source and destination)doc";
static const char *__doc_kp_Tensor_TensorTypes_eHost = R"doc(< Type is host memory, source and destination)doc";
static const char *__doc_kp_Tensor_TensorTypes_eStorage = R"doc(< Type is Device memory (only))doc";

View file

@ -26,7 +26,7 @@ PYBIND11_MODULE(kp, m) {
py::enum_<kp::Tensor::TensorTypes>(m, "TensorTypes", DOC(kp, Tensor, TensorTypes))
.value("device", kp::Tensor::TensorTypes::eDevice, "Tensor holding data in GPU memory.")
.value("staging", kp::Tensor::TensorTypes::eStaging, "Tensor used for transfer of data to device.")
.value("host", kp::Tensor::TensorTypes::eHost, "Tensor used for CPU visible GPU data.")
.value("storage", kp::Tensor::TensorTypes::eStorage, "Tensor with host visible gpu memory.")
.export_values();
@ -112,7 +112,7 @@ PYBIND11_MODULE(kp, m) {
.def("record_tensor_sync_device", &kp::Sequence::record<kp::OpTensorSyncDevice>,
"Records operation to sync tensor from local memory to GPU memory")
.def("record_tensor_sync_local", &kp::Sequence::record<kp::OpTensorSyncLocal>,
"Records operation to sync tensor(s) from GPU memory to local memory using staging tensors")
"Records operation to sync tensor(s) from GPU memory to local memory")
.def("record_algo_mult", &kp::Sequence::record<kp::OpMult>,
"Records operation to run multiplication compute shader to two input tensors and an output tensor")
.def("record_algo_file", [](kp::Sequence &self,
@ -179,7 +179,7 @@ PYBIND11_MODULE(kp, m) {
.def("eval_tensor_sync_device_def", &kp::Manager::evalOpDefault<kp::OpTensorSyncDevice>,
"Evaluates operation to sync tensor from local memory to GPU memory with new anonymous Sequence")
.def("eval_tensor_sync_local_def", &kp::Manager::evalOpDefault<kp::OpTensorSyncLocal>,
"Evaluates operation to sync tensor(s) from GPU memory to local memory using staging tensors with new anonymous Sequence")
"Evaluates operation to sync tensor(s) from GPU memory to local memory with new anonymous Sequence")
.def("eval_algo_mult_def", &kp::Manager::evalOpDefault<kp::OpMult>,
"Evaluates operation to run multiplication compute shader to two input tensors and an output tensor with new anonymous Sequence")
.def("eval_algo_file_def", &kp::Manager::evalOpDefault<kp::OpAlgoBase, std::string>,
@ -216,7 +216,7 @@ PYBIND11_MODULE(kp, m) {
.def("eval_tensor_sync_device", &kp::Manager::evalOp<kp::OpTensorSyncDevice>,
"Evaluates operation to sync tensor from local memory to GPU memory with explicitly named Sequence")
.def("eval_tensor_sync_local", &kp::Manager::evalOp<kp::OpTensorSyncLocal>,
"Evaluates operation to sync tensor(s) from GPU memory to local memory using staging tensors with explicitly named Sequence")
"Evaluates operation to sync tensor(s) from GPU memory to local memory with explicitly named Sequence")
.def("eval_algo_mult", &kp::Manager::evalOp<kp::OpMult>,
"Evaluates operation to run multiplication compute shader to two input tensors and an output tensor with explicitly named Sequence")
.def("eval_algo_file", &kp::Manager::evalOp<kp::OpAlgoBase, std::string>,
@ -256,7 +256,7 @@ PYBIND11_MODULE(kp, m) {
.def("eval_async_tensor_sync_device_def", &kp::Manager::evalOpAsyncDefault<kp::OpTensorSyncDevice>,
"Evaluates asynchronously operation to sync tensor from local memory to GPU memory with anonymous Sequence")
.def("eval_async_tensor_sync_local_def", &kp::Manager::evalOpAsyncDefault<kp::OpTensorSyncLocal>,
"Evaluates asynchronously operation to sync tensor(s) from GPU memory to local memory using staging tensors with anonymous Sequence")
"Evaluates asynchronously operation to sync tensor(s) from GPU memory to local memory with anonymous Sequence")
.def("eval_async_algo_mult_def", &kp::Manager::evalOpAsyncDefault<kp::OpMult>,
"Evaluates asynchronously operation to run multiplication compute shader to two input tensors and an output tensor with anonymous Sequence")
.def("eval_async_algo_file_def", &kp::Manager::evalOpAsyncDefault<kp::OpAlgoBase, std::string>,
@ -293,7 +293,7 @@ PYBIND11_MODULE(kp, m) {
.def("eval_async_tensor_sync_device", &kp::Manager::evalOpAsync<kp::OpTensorSyncDevice>,
"Evaluates asynchronously operation to sync tensor from local memory to GPU memory with explicitly named Sequence")
.def("eval_async_tensor_sync_local", &kp::Manager::evalOpAsync<kp::OpTensorSyncLocal>,
"Evaluates asynchronously operation to sync tensor(s) from GPU memory to local memory using staging tensors with explicitly named Sequence")
"Evaluates asynchronously operation to sync tensor(s) from GPU memory to local memory with explicitly named Sequence")
.def("eval_async_algo_mult", &kp::Manager::evalOpAsync<kp::OpMult>,
"Evaluates asynchronously operation to run multiplication compute shader to two input tensors and an output tensor with explicitly named Sequence")
.def("eval_async_algo_file", &kp::Manager::evalOpAsync<kp::OpAlgoBase, std::string>,

View file

@ -723,7 +723,7 @@ class Tensor
enum class TensorTypes
{
eDevice = 0, ///< Type is device memory, source and destination
eStaging = 1, ///< Type is host memory, source and destination
eHost = 1, ///< Type is host memory, source and destination
eStorage = 2, ///< Type is Device memory (only)
};
@ -828,6 +828,26 @@ class Tensor
std::shared_ptr<Tensor> copyFromTensor,
bool createBarrier);
/**
* Records a copy from the internal staging memory to the device memory using an optional barrier to wait for the operation. This function would only be relevant for kp::Tensors of type eDevice.
*
* @param commandBuffer Vulkan Command Buffer to record the commands into
* @param createBarrier Whether to create a barrier that ensures the data is
* copied before further operations. Default is true.
*/
void recordCopyFromStagingToDevice(std::shared_ptr<vk::CommandBuffer> commandBuffer,
bool createBarrier);
/**
* Records a copy from the internal device memory to the staging memory using an optional barrier to wait for the operation. This function would only be relevant for kp::Tensors of type eDevice.
*
* @param commandBuffer Vulkan Command Buffer to record the commands into
* @param createBarrier Whether to create a barrier that ensures the data is
* copied before further operations. Default is true.
*/
void recordCopyFromDeviceToStaging(std::shared_ptr<vk::CommandBuffer> commandBuffer,
bool createBarrier);
/**
* Records the buffer memory barrier into the command buffer which
* ensures that relevant data transfers are carried out correctly.
@ -870,10 +890,14 @@ class Tensor
std::shared_ptr<vk::Device> mDevice;
// -------------- OPTIONALLY OWNED RESOURCES
std::shared_ptr<vk::Buffer> mBuffer;
bool mFreeBuffer;
std::shared_ptr<vk::DeviceMemory> mMemory;
bool mFreeMemory;
std::shared_ptr<vk::Buffer> mPrimaryBuffer;
bool mFreePrimaryBuffer = false;
std::shared_ptr<vk::Buffer> mStagingBuffer;
bool mFreeStagingBuffer = false;
std::shared_ptr<vk::DeviceMemory> mPrimaryMemory;
bool mFreePrimaryMemory = false;
std::shared_ptr<vk::DeviceMemory> mStagingMemory;
bool mFreeStagingMemory = false;
// -------------- ALWAYS OWNED RESOURCES
std::vector<float> mData;
@ -883,11 +907,16 @@ class Tensor
std::array<uint32_t, KP_MAX_DIM_SIZE> mShape;
bool mIsInit = false;
void createBuffer(); // Creates the vulkan buffer
void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer
void createBuffer(std::shared_ptr<vk::Buffer> buffer, vk::BufferUsageFlags bufferUsageFlags);
void allocateBindMemory(std::shared_ptr<vk::Buffer> buffer, std::shared_ptr<vk::DeviceMemory> memory, vk::MemoryPropertyFlags memoryPropertyFlags);
void copyBuffer(std::shared_ptr<vk::CommandBuffer> commandBuffer, std::shared_ptr<vk::Buffer> bufferFrom, std::shared_ptr<vk::Buffer> bufferTo, vk::DeviceSize bufferSize, vk::BufferCopy copyRegion, bool createBarrier);
// Private util functions
vk::BufferUsageFlags getBufferUsageFlags();
vk::MemoryPropertyFlags getMemoryPropertyFlags();
vk::BufferUsageFlags getPrimaryBufferUsageFlags();
vk::MemoryPropertyFlags getPrimaryMemoryPropertyFlags();
vk::BufferUsageFlags getStagingBufferUsageFlags();
vk::MemoryPropertyFlags getStagingMemoryPropertyFlags();
uint64_t memorySize();
};
@ -958,7 +987,7 @@ class OpBase
if (tensor && tensor->isInit()) {
tensor->freeMemoryDestroyGPUResources();
} else {
SPDLOG_ERROR("Kompute OpBase expected to free "
SPDLOG_WARN("Kompute OpBase expected to free "
"tensor but has already been freed.");
}
}
@ -1264,8 +1293,6 @@ class OpTensorCreate : public OpBase
virtual void postEval() override;
private:
// Never owned resources
std::vector<std::shared_ptr<Tensor>> mStagingTensors;
};
} // End namespace kp
@ -1836,9 +1863,6 @@ class OpAlgoLhsRhsOut : public OpAlgoBase
std::shared_ptr<Tensor> mTensorLHS; ///< Reference to the parameter used in the left hand side equation of the shader
std::shared_ptr<Tensor> mTensorRHS; ///< Reference to the parameter used in the right hand side equation of the shader
std::shared_ptr<Tensor> mTensorOutput; ///< Reference to the parameter used in the output of the shader and will be copied with a staging vector
// -------------- ALWAYS OWNED RESOURCES
std::shared_ptr<Tensor> mTensorOutputStaging; ///< Staging temporary tensor user do to copy the output of the tensor
};
} // End namespace kp
@ -1976,7 +2000,7 @@ class OpTensorCopy : public OpBase
namespace kp {
/**
Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a staging tensor to perform the copy. For TensorTypes::eStaging it will only copy the data and perform a map, which will be executed during the record (as opposed to during the sequence eval/submit). This function cannot be carried out for TensorTypes::eStaging.
Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging.
*/
class OpTensorSyncDevice : public OpBase
{
@ -2002,12 +2026,12 @@ class OpTensorSyncDevice : public OpBase
~OpTensorSyncDevice() override;
/**
* Performs basic checks such as ensuring that there is at least one tensor provided, that they are initialized and that they are not of type TensorTpes::eStaging. For staging tensors in host memory, the map is performed during the init function.
* Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element.
*/
void init() override;
/**
* For device tensors, it records the copy command to the device tensor from the temporary staging tensor.
* For device tensors, it records the copy command for the tensor to copy the data from its staging to device memory.
*/
void record() override;
@ -2022,8 +2046,6 @@ class OpTensorSyncDevice : public OpBase
virtual void postEval() override;
private:
// Never owned resources
std::vector<std::shared_ptr<Tensor>> mStagingTensors;
};
} // End namespace kp
@ -2031,7 +2053,7 @@ class OpTensorSyncDevice : public OpBase
namespace kp {
/**
Operation that syncs tensor's local data by mapping the data from device memory into the local vector. For TensorTypes::eDevice it will use a staging tensor to perform the copy. For TensorTypes::eStaging it will only copy the data and perform a map, which will be executed during the postSubmit (there will be no copy during the sequence eval/submit). This function cannot be carried out for TensorTypes::eStaging.
Operation that syncs tensor's local memory by mapping device data into the local CPU memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging.
*/
class OpTensorSyncLocal : public OpBase
{
@ -2052,17 +2074,17 @@ class OpTensorSyncLocal : public OpBase
std::vector<std::shared_ptr<Tensor>> tensors);
/**
* Default destructor. This class manages the memory of the staging tensors it owns but these are released in the postSubmit, before it arrives to the destructor.
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
*/
~OpTensorSyncLocal() override;
/**
* Performs basic checks such as ensuring that there is at least one tensor provided, that they are initialized and that they are not of type TensorTpes::eStaging.
* Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element.
*/
void init() override;
/**
* For device tensors, it records the copy command into the staging tensor from the device tensor.
* For device tensors, it records the copy command for the tensor to copy the data from its device to staging memory.
*/
void record() override;
@ -2077,8 +2099,6 @@ class OpTensorSyncLocal : public OpBase
virtual void postEval() override;
private:
// Never owned resources
std::vector<std::shared_ptr<Tensor>> mStagingTensors;
};
} // End namespace kp

View file

@ -65,11 +65,6 @@ OpAlgoLhsRhsOut::init()
" Output: " + std::to_string(this->mTensorOutput->size()));
}
this->mTensorOutputStaging = std::make_shared<Tensor>(
this->mTensorOutput->data(), Tensor::TensorTypes::eStaging);
this->mTensorOutputStaging->init(this->mPhysicalDevice, this->mDevice);
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut fetching spirv data");
std::vector<char> shaderFileData = this->fetchSpirvBinaryData();
@ -110,8 +105,10 @@ OpAlgoLhsRhsOut::record()
vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eTransfer);
this->mTensorOutputStaging->recordCopyFrom(
this->mCommandBuffer, this->mTensorOutput, true);
if (this->mTensorOutput->tensorType() == Tensor::TensorTypes::eDevice) {
this->mTensorOutput->recordCopyFromDeviceToStaging(this->mCommandBuffer,
true);
}
}
void
@ -119,9 +116,7 @@ OpAlgoLhsRhsOut::postEval()
{
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut postSubmit called");
this->mTensorOutputStaging->mapDataFromHostMemory();
this->mTensorOutput->setData(this->mTensorOutputStaging->data());
this->mTensorOutput->mapDataFromHostMemory();
}
}

View file

@ -23,16 +23,6 @@ OpTensorCreate::OpTensorCreate(
OpTensorCreate::~OpTensorCreate()
{
SPDLOG_DEBUG("Kompute OpTensorCreate destructor started");
SPDLOG_DEBUG("Kompute OpTensorCreate freeing staging tensors");
for (std::shared_ptr<Tensor> tensor : this->mStagingTensors) {
if (tensor && tensor->isInit()) {
tensor->freeMemoryDestroyGPUResources();
} else {
SPDLOG_ERROR("Kompute OpTensorCreate expected to free "
"tensor but has already been freed.");
}
}
}
void
@ -50,27 +40,10 @@ OpTensorCreate::init()
throw std::runtime_error(
"Kompute OpTensorCreate: Tensor has already been initialized");
}
if (tensor->tensorType() == Tensor::TensorTypes::eDevice) {
tensor->init(this->mPhysicalDevice, this->mDevice);
std::shared_ptr<Tensor> stagingTensor = std::make_shared<Tensor>(
tensor->data(), Tensor::TensorTypes::eStaging);
stagingTensor->init(this->mPhysicalDevice, this->mDevice);
stagingTensor->mapDataIntoHostMemory();
this->mStagingTensors.push_back(stagingTensor);
} else {
if (tensor->tensorType() != Tensor::TensorTypes::eStorage) {
tensor->init(this->mPhysicalDevice, this->mDevice);
tensor->mapDataIntoHostMemory();
// We push a nullptr when no staging tensor is needed to match
// index number in array to have one to one mapping with tensors
this->mStagingTensors.push_back(nullptr);
}
}
}
@ -82,8 +55,8 @@ OpTensorCreate::record()
for (size_t i = 0; i < this->mTensors.size(); i++) {
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
this->mTensors[i]->recordCopyFrom(
this->mCommandBuffer, this->mStagingTensors[i], false);
this->mTensors[i]->recordCopyFromStagingToDevice(
this->mCommandBuffer, false);
}
}
}

View file

@ -41,25 +41,11 @@ OpTensorSyncDevice::init()
"has not been initialized");
}
if (tensor->tensorType() == Tensor::TensorTypes::eStorage) {
throw std::runtime_error(
SPDLOG_WARN(
"Kompute OpTensorSyncLocal tensor parameter is of type "
"TensorTypes::eStorage and hence cannot be used to receive or "
"pass data.");
}
if (tensor->tensorType() == Tensor::TensorTypes::eDevice) {
std::shared_ptr<Tensor> stagingTensor = std::make_shared<Tensor>(
tensor->data(), Tensor::TensorTypes::eStaging);
stagingTensor->init(this->mPhysicalDevice, this->mDevice);
this->mStagingTensors.push_back(stagingTensor);
} else {
// We push a nullptr when no staging tensor is needed to match
// index number in array to have one to one mapping with tensors
this->mStagingTensors.push_back(nullptr);
}
}
}
@ -70,8 +56,8 @@ OpTensorSyncDevice::record()
for (size_t i = 0; i < this->mTensors.size(); i++) {
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
this->mTensors[i]->recordCopyFrom(
this->mCommandBuffer, this->mStagingTensors[i], false);
this->mTensors[i]->recordCopyFromStagingToDevice(
this->mCommandBuffer, false);
}
}
}
@ -83,11 +69,8 @@ OpTensorSyncDevice::preEval()
// Performing sync of data as eval can be called multiple times with same op
for (size_t i = 0; i < this->mTensors.size(); i++) {
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
this->mStagingTensors[i]->setData(this->mTensors[i]->data());
this->mStagingTensors[i]->mapDataIntoHostMemory();
} else {
this->mTensors[i]->mapDataFromHostMemory();
if (this->mTensors[i]->tensorType() != Tensor::TensorTypes::eStorage) {
this->mTensors[i]->mapDataIntoHostMemory();
}
}
}

View file

@ -41,26 +41,11 @@ OpTensorSyncLocal::init()
"Kompute OpTensorSyncLocal: Tensor has not been initialized");
}
if (tensor->tensorType() == Tensor::TensorTypes::eStorage) {
throw std::runtime_error(
SPDLOG_WARN(
"Kompute OpTensorSyncLocal tensor parameter is of type "
"TensorTypes::eStorage and hence cannot be used to receive or "
"pass data.");
}
if (tensor->tensorType() == Tensor::TensorTypes::eDevice) {
std::shared_ptr<Tensor> stagingTensor = std::make_shared<Tensor>(
tensor->data(), Tensor::TensorTypes::eStaging);
stagingTensor->init(this->mPhysicalDevice, this->mDevice);
this->mStagingTensors.push_back(stagingTensor);
} else {
// We push a nullptr when no staging tensor is needed to match
// index number in array to have one to one mapping with tensors
this->mStagingTensors.push_back(nullptr);
}
}
}
@ -71,8 +56,8 @@ OpTensorSyncLocal::record()
for (size_t i = 0; i < this->mTensors.size(); i++) {
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
this->mStagingTensors[i]->recordCopyFrom(
this->mCommandBuffer, this->mTensors[i], true);
this->mTensors[i]->recordCopyFromDeviceToStaging(
this->mCommandBuffer, true);
}
}
}
@ -90,10 +75,7 @@ OpTensorSyncLocal::postEval()
SPDLOG_DEBUG("Kompute OpTensorSyncLocal mapping data into tensor local");
for (size_t i = 0; i < this->mTensors.size(); i++) {
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
this->mStagingTensors[i]->mapDataFromHostMemory();
this->mTensors[i]->setData(this->mStagingTensors[i]->data());
} else {
if (this->mTensors[i]->tensorType() != Tensor::TensorTypes::eStorage) {
this->mTensors[i]->mapDataFromHostMemory();
}
}

View file

@ -47,7 +47,7 @@ Tensor::init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
this->mIsInit = true;
this->createBuffer();
this->allocateMemoryCreateGPUResources();
}
std::vector<float>&
@ -89,7 +89,7 @@ Tensor::tensorType()
bool
Tensor::isInit()
{
return this->mIsInit && this->mBuffer && this->mMemory;
return this->mIsInit && this->mPrimaryBuffer && this->mPrimaryMemory;
}
void
@ -107,20 +107,71 @@ Tensor::recordCopyFrom(std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::shared_ptr<Tensor> copyFromTensor,
bool createBarrier)
{
SPDLOG_DEBUG("Kompute Tensor recordCopyFrom called");
if (!this->mIsInit || !copyFromTensor->mIsInit) {
throw std::runtime_error(
"Kompute Tensor attempted to run createBuffer without init");
}
vk::DeviceSize bufferSize(this->memorySize());
vk::BufferCopy copyRegion(0, 0, bufferSize);
SPDLOG_DEBUG("Kompute Tensor recordCopyFrom data size {}.", bufferSize);
this->copyBuffer(commandBuffer,
copyFromTensor->mPrimaryBuffer,
this->mPrimaryBuffer,
bufferSize,
copyRegion,
createBarrier);
}
void
Tensor::recordCopyFromStagingToDevice(
std::shared_ptr<vk::CommandBuffer> commandBuffer,
bool createBarrier)
{
vk::DeviceSize bufferSize(this->memorySize());
vk::BufferCopy copyRegion(0, 0, bufferSize);
SPDLOG_DEBUG("Kompute Tensor copying data size {}.", bufferSize);
commandBuffer->copyBuffer(
*copyFromTensor->mBuffer, *this->mBuffer, copyRegion);
this->copyBuffer(commandBuffer,
this->mStagingBuffer,
this->mPrimaryBuffer,
bufferSize,
copyRegion,
createBarrier);
}
void
Tensor::recordCopyFromDeviceToStaging(
std::shared_ptr<vk::CommandBuffer> commandBuffer,
bool createBarrier)
{
vk::DeviceSize bufferSize(this->memorySize());
vk::BufferCopy copyRegion(0, 0, bufferSize);
SPDLOG_DEBUG("Kompute Tensor copying data size {}.", bufferSize);
this->copyBuffer(commandBuffer,
this->mPrimaryBuffer,
this->mStagingBuffer,
bufferSize,
copyRegion,
createBarrier);
}
void
Tensor::copyBuffer(std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::shared_ptr<vk::Buffer> bufferFrom,
std::shared_ptr<vk::Buffer> bufferTo,
vk::DeviceSize bufferSize,
vk::BufferCopy copyRegion,
bool createBarrier)
{
if (!this->mIsInit) {
throw std::runtime_error(
"Kompute Tensor attempted to run copyBuffer without init");
}
commandBuffer->copyBuffer(*bufferFrom, *bufferTo, copyRegion);
if (createBarrier) {
// Buffer to ensure wait until data is copied to staging buffer
@ -145,7 +196,7 @@ Tensor::recordBufferMemoryBarrier(
vk::DeviceSize bufferSize = this->memorySize();
vk::BufferMemoryBarrier bufferMemoryBarrier;
bufferMemoryBarrier.buffer = *this->mBuffer;
bufferMemoryBarrier.buffer = *this->mPrimaryBuffer;
bufferMemoryBarrier.size = bufferSize;
bufferMemoryBarrier.srcAccessMask = srcAccessMask;
bufferMemoryBarrier.dstAccessMask = dstAccessMask;
@ -164,7 +215,7 @@ vk::DescriptorBufferInfo
Tensor::constructDescriptorBufferInfo()
{
vk::DeviceSize bufferSize = this->memorySize();
return vk::DescriptorBufferInfo(*this->mBuffer,
return vk::DescriptorBufferInfo(*this->mPrimaryBuffer,
0, // offset
bufferSize);
}
@ -174,20 +225,21 @@ Tensor::mapDataFromHostMemory()
{
SPDLOG_DEBUG("Kompute Tensor mapping data from host buffer");
if (this->mTensorType != TensorTypes::eStaging) {
SPDLOG_ERROR(
"Mapping tensor data manually from DEVICE buffer instead of "
"using record GPU command with staging buffer");
return;
std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
if (this->mTensorType == TensorTypes::eHost) {
hostVisibleMemory = this->mPrimaryMemory;
} else {
hostVisibleMemory = this->mStagingMemory;
}
vk::DeviceSize bufferSize = this->memorySize();
void* mapped = this->mDevice->mapMemory(
*this->mMemory, 0, bufferSize, vk::MemoryMapFlags());
vk::MappedMemoryRange mappedMemoryRange(*this->mMemory, 0, bufferSize);
*hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());
vk::MappedMemoryRange mappedMemoryRange(*hostVisibleMemory, 0, bufferSize);
this->mDevice->invalidateMappedMemoryRanges(mappedMemoryRange);
memcpy(this->mData.data(), mapped, bufferSize);
this->mDevice->unmapMemory(*this->mMemory);
this->mDevice->unmapMemory(*hostVisibleMemory);
}
void
@ -196,24 +248,26 @@ Tensor::mapDataIntoHostMemory()
SPDLOG_DEBUG("Kompute Tensor local mapping tensor data to host buffer");
if (this->mTensorType != TensorTypes::eStaging) {
SPDLOG_ERROR("Mapping tensor data manually to DEVICE memory instead of "
"using record GPU command with staging buffer");
return;
std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
if (this->mTensorType == TensorTypes::eHost) {
hostVisibleMemory = this->mPrimaryMemory;
} else {
hostVisibleMemory = this->mStagingMemory;
}
vk::DeviceSize bufferSize = this->memorySize();
void* mapped = this->mDevice->mapMemory(
*this->mMemory, 0, bufferSize, vk::MemoryMapFlags());
*hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());
memcpy(mapped, this->mData.data(), bufferSize);
vk::MappedMemoryRange mappedRange(*this->mMemory, 0, bufferSize);
vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize);
this->mDevice->flushMappedMemoryRanges(1, &mappedRange);
this->mDevice->unmapMemory(*this->mMemory);
this->mDevice->unmapMemory(*hostVisibleMemory);
}
vk::BufferUsageFlags
Tensor::getBufferUsageFlags()
Tensor::getPrimaryBufferUsageFlags()
{
switch (this->mTensorType) {
case TensorTypes::eDevice:
@ -221,8 +275,9 @@ Tensor::getBufferUsageFlags()
vk::BufferUsageFlagBits::eTransferSrc |
vk::BufferUsageFlagBits::eTransferDst;
break;
case TensorTypes::eStaging:
return vk::BufferUsageFlagBits::eTransferSrc |
case TensorTypes::eHost:
return vk::BufferUsageFlagBits::eStorageBuffer |
vk::BufferUsageFlagBits::eTransferSrc |
vk::BufferUsageFlagBits::eTransferDst;
break;
case TensorTypes::eStorage:
@ -234,13 +289,13 @@ Tensor::getBufferUsageFlags()
}
vk::MemoryPropertyFlags
Tensor::getMemoryPropertyFlags()
Tensor::getPrimaryMemoryPropertyFlags()
{
switch (this->mTensorType) {
case TensorTypes::eDevice:
return vk::MemoryPropertyFlagBits::eDeviceLocal;
break;
case TensorTypes::eStaging:
case TensorTypes::eHost:
return vk::MemoryPropertyFlagBits::eHostVisible;
break;
case TensorTypes::eStorage:
@ -251,8 +306,33 @@ Tensor::getMemoryPropertyFlags()
}
}
vk::BufferUsageFlags
Tensor::getStagingBufferUsageFlags()
{
switch (this->mTensorType) {
case TensorTypes::eDevice:
return vk::BufferUsageFlagBits::eTransferSrc |
vk::BufferUsageFlagBits::eTransferDst;
break;
default:
throw std::runtime_error("Kompute Tensor invalid tensor type");
}
}
vk::MemoryPropertyFlags
Tensor::getStagingMemoryPropertyFlags()
{
switch (this->mTensorType) {
case TensorTypes::eDevice:
return vk::MemoryPropertyFlagBits::eHostVisible;
break;
default:
throw std::runtime_error("Kompute Tensor invalid tensor type");
}
}
void
Tensor::createBuffer()
Tensor::allocateMemoryCreateGPUResources()
{
SPDLOG_DEBUG("Kompute Tensor creating buffer");
@ -268,43 +348,79 @@ Tensor::createBuffer()
throw std::runtime_error("Kompute Tensor device is null");
}
SPDLOG_DEBUG("Kompute Tensor creating primary buffer and memory");
vk::BufferUsageFlags usageFlags = this->getBufferUsageFlags();
vk::DeviceSize bufferSize = this->memorySize();
if(bufferSize<1){
throw std::runtime_error("Kompute Tensor attempted to create a zero-sized buffer");
this->mPrimaryBuffer = std::make_shared<vk::Buffer>();
this->createBuffer(this->mPrimaryBuffer,
this->getPrimaryBufferUsageFlags());
this->mFreePrimaryBuffer = true;
this->mPrimaryMemory = std::make_shared<vk::DeviceMemory>();
this->allocateBindMemory(this->mPrimaryBuffer,
this->mPrimaryMemory,
this->getPrimaryMemoryPropertyFlags());
this->mFreePrimaryMemory = true;
if (this->mTensorType == TensorTypes::eDevice) {
SPDLOG_DEBUG("Kompute Tensor creating staging buffer and memory");
this->mStagingBuffer = std::make_shared<vk::Buffer>();
this->createBuffer(this->mStagingBuffer,
this->getStagingBufferUsageFlags());
this->mFreeStagingBuffer = true;
this->mStagingMemory = std::make_shared<vk::DeviceMemory>();
this->allocateBindMemory(this->mStagingBuffer,
this->mStagingMemory,
this->getStagingMemoryPropertyFlags());
this->mFreeStagingMemory = true;
}
SPDLOG_DEBUG("Kompute Tensor buffer & memory creation successful");
}
void
Tensor::createBuffer(std::shared_ptr<vk::Buffer> buffer,
vk::BufferUsageFlags bufferUsageFlags)
{
vk::DeviceSize bufferSize = this->memorySize();
if (bufferSize < 1) {
throw std::runtime_error(
"Kompute Tensor attempted to create a zero-sized buffer");
}
this->mFreeBuffer = true;
SPDLOG_DEBUG("Kompute Tensor creating buffer with memory size: {}, and "
"usage flags: {}",
bufferSize,
vk::to_string(usageFlags));
vk::to_string(bufferUsageFlags));
// TODO: Explore having concurrent sharing mode (with option)
vk::BufferCreateInfo bufferInfo(vk::BufferCreateFlags(),
bufferSize,
usageFlags,
bufferUsageFlags,
vk::SharingMode::eExclusive);
this->mBuffer = std::make_shared<vk::Buffer>();
this->mDevice->createBuffer(&bufferInfo, nullptr, this->mBuffer.get());
this->mDevice->createBuffer(&bufferInfo, nullptr, buffer.get());
}
SPDLOG_DEBUG("Kompute Tensor buffer created now creating memory");
void
Tensor::allocateBindMemory(std::shared_ptr<vk::Buffer> buffer,
std::shared_ptr<vk::DeviceMemory> memory,
vk::MemoryPropertyFlags memoryPropertyFlags)
{
SPDLOG_DEBUG("Kompute Tensor allocating and binding memory");
vk::PhysicalDeviceMemoryProperties memoryProperties =
this->mPhysicalDevice->getMemoryProperties();
vk::MemoryRequirements memoryRequirements =
this->mDevice->getBufferMemoryRequirements(*this->mBuffer);
vk::MemoryPropertyFlags memoryPropertyFlags =
this->getMemoryPropertyFlags();
this->mDevice->getBufferMemoryRequirements(*buffer);
uint32_t memoryTypeIndex = -1;
for (uint32_t i = 0; i < memoryProperties.memoryTypeCount; i++) {
if (memoryRequirements.memoryTypeBits & (1 << i)) {
if ((memoryProperties.memoryTypes[i].propertyFlags &
if (((memoryProperties.memoryTypes[i]).propertyFlags &
memoryPropertyFlags) == memoryPropertyFlags) {
memoryTypeIndex = i;
break;
@ -316,8 +432,6 @@ Tensor::createBuffer()
"Memory type index for buffer creation not found");
}
this->mFreeMemory = true;
SPDLOG_DEBUG(
"Kompute Tensor allocating memory index: {}, size {}, flags: {}",
memoryTypeIndex,
@ -327,13 +441,9 @@ Tensor::createBuffer()
vk::MemoryAllocateInfo memoryAllocateInfo(memoryRequirements.size,
memoryTypeIndex);
this->mMemory = std::make_shared<vk::DeviceMemory>();
this->mDevice->allocateMemory(
&memoryAllocateInfo, nullptr, this->mMemory.get());
this->mDevice->allocateMemory(&memoryAllocateInfo, nullptr, memory.get());
this->mDevice->bindBufferMemory(*this->mBuffer, *this->mMemory, 0);
SPDLOG_DEBUG("Kompute Tensor buffer & memory creation successful");
this->mDevice->bindBufferMemory(*buffer, *memory, 0);
}
void
@ -349,29 +459,55 @@ Tensor::freeMemoryDestroyGPUResources()
return;
}
if (this->mFreeBuffer) {
if (!this->mBuffer) {
SPDLOG_ERROR(
"Kompose Tensor expected to free buffer but got null buffer");
if (this->mFreePrimaryBuffer) {
if (!this->mPrimaryBuffer) {
SPDLOG_ERROR("Kompose Tensor expected to destroy primary buffer "
"but got null buffer");
} else {
SPDLOG_DEBUG("Kompose Tensor destroying buffer");
SPDLOG_DEBUG("Kompose Tensor destroying primary buffer");
this->mDevice->destroy(
*this->mBuffer,
*this->mPrimaryBuffer,
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
this->mBuffer = nullptr;
this->mPrimaryBuffer = nullptr;
}
}
if (this->mFreeMemory) {
if (!this->mMemory) {
SPDLOG_ERROR(
"Kompose Tensor expected to free buffer but got null memory");
if (this->mFreeStagingBuffer) {
if (!this->mStagingBuffer) {
SPDLOG_ERROR("Kompose Tensor expected to destroy staging buffer "
"but got null buffer");
} else {
SPDLOG_DEBUG("Kompose Tensor freeing memory");
this->mDevice->freeMemory(
*this->mMemory,
SPDLOG_DEBUG("Kompose Tensor destroying staging buffer");
this->mDevice->destroy(
*this->mStagingBuffer,
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
this->mDevice = nullptr;
this->mStagingBuffer = nullptr;
}
}
if (this->mFreePrimaryMemory) {
if (!this->mPrimaryMemory) {
SPDLOG_ERROR("Kompose Tensor expected to free primary memory but "
"got null memory");
} else {
SPDLOG_DEBUG("Kompose Tensor freeing primary memory");
this->mDevice->freeMemory(
*this->mPrimaryMemory,
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
this->mPrimaryMemory = nullptr;
}
}
if (this->mFreeStagingMemory) {
if (!this->mStagingMemory) {
SPDLOG_ERROR("Kompose Tensor expected to free staging memory but "
"got null memory");
} else {
SPDLOG_DEBUG("Kompose Tensor freeing staging memory");
this->mDevice->freeMemory(
*this->mStagingMemory,
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
this->mStagingMemory = nullptr;
}
}

View file

@ -26,7 +26,7 @@ class Tensor
enum class TensorTypes
{
eDevice = 0, ///< Type is device memory, source and destination
eStaging = 1, ///< Type is host memory, source and destination
eHost = 1, ///< Type is host memory, source and destination
eStorage = 2, ///< Type is Device memory (only)
};
@ -39,7 +39,8 @@ class Tensor
* Default constructor with data provided which would be used to create the
* respective vulkan buffer and memory.
*
* @param data Non-zero-sized vector of data that will be used by the tensor
* @param data Non-zero-sized vector of data that will be used by the
* tensor
* @param tensorType Type for the tensor which is of type TensorTypes
*/
Tensor(const std::vector<float>& data,
@ -131,6 +132,32 @@ class Tensor
std::shared_ptr<Tensor> copyFromTensor,
bool createBarrier);
/**
* Records a copy from the internal staging memory to the device memory
* using an optional barrier to wait for the operation. This function would
* only be relevant for kp::Tensors of type eDevice.
*
* @param commandBuffer Vulkan Command Buffer to record the commands into
* @param createBarrier Whether to create a barrier that ensures the data is
* copied before further operations. Default is true.
*/
void recordCopyFromStagingToDevice(
std::shared_ptr<vk::CommandBuffer> commandBuffer,
bool createBarrier);
/**
* Records a copy from the internal device memory to the staging memory
* using an optional barrier to wait for the operation. This function would
* only be relevant for kp::Tensors of type eDevice.
*
* @param commandBuffer Vulkan Command Buffer to record the commands into
* @param createBarrier Whether to create a barrier that ensures the data is
* copied before further operations. Default is true.
*/
void recordCopyFromDeviceToStaging(
std::shared_ptr<vk::CommandBuffer> commandBuffer,
bool createBarrier);
/**
* Records the buffer memory barrier into the command buffer which
* ensures that relevant data transfers are carried out correctly.
@ -173,10 +200,14 @@ class Tensor
std::shared_ptr<vk::Device> mDevice;
// -------------- OPTIONALLY OWNED RESOURCES
std::shared_ptr<vk::Buffer> mBuffer;
bool mFreeBuffer;
std::shared_ptr<vk::DeviceMemory> mMemory;
bool mFreeMemory;
std::shared_ptr<vk::Buffer> mPrimaryBuffer;
bool mFreePrimaryBuffer = false;
std::shared_ptr<vk::Buffer> mStagingBuffer;
bool mFreeStagingBuffer = false;
std::shared_ptr<vk::DeviceMemory> mPrimaryMemory;
bool mFreePrimaryMemory = false;
std::shared_ptr<vk::DeviceMemory> mStagingMemory;
bool mFreeStagingMemory = false;
// -------------- ALWAYS OWNED RESOURCES
std::vector<float> mData;
@ -186,11 +217,24 @@ class Tensor
std::array<uint32_t, KP_MAX_DIM_SIZE> mShape;
bool mIsInit = false;
void createBuffer(); // Creates the vulkan buffer
void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer
void createBuffer(std::shared_ptr<vk::Buffer> buffer,
vk::BufferUsageFlags bufferUsageFlags);
void allocateBindMemory(std::shared_ptr<vk::Buffer> buffer,
std::shared_ptr<vk::DeviceMemory> memory,
vk::MemoryPropertyFlags memoryPropertyFlags);
void copyBuffer(std::shared_ptr<vk::CommandBuffer> commandBuffer,
std::shared_ptr<vk::Buffer> bufferFrom,
std::shared_ptr<vk::Buffer> bufferTo,
vk::DeviceSize bufferSize,
vk::BufferCopy copyRegion,
bool createBarrier);
// Private util functions
vk::BufferUsageFlags getBufferUsageFlags();
vk::MemoryPropertyFlags getMemoryPropertyFlags();
vk::BufferUsageFlags getPrimaryBufferUsageFlags();
vk::MemoryPropertyFlags getPrimaryMemoryPropertyFlags();
vk::BufferUsageFlags getStagingBufferUsageFlags();
vk::MemoryPropertyFlags getStagingMemoryPropertyFlags();
uint64_t memorySize();
};

View file

@ -78,9 +78,6 @@ class OpAlgoLhsRhsOut : public OpAlgoBase
std::shared_ptr<Tensor> mTensorLHS; ///< Reference to the parameter used in the left hand side equation of the shader
std::shared_ptr<Tensor> mTensorRHS; ///< Reference to the parameter used in the right hand side equation of the shader
std::shared_ptr<Tensor> mTensorOutput; ///< Reference to the parameter used in the output of the shader and will be copied with a staging vector
// -------------- ALWAYS OWNED RESOURCES
std::shared_ptr<Tensor> mTensorOutputStaging; ///< Staging temporary tensor user do to copy the output of the tensor
};
} // End namespace kp

View file

@ -69,7 +69,7 @@ class OpBase
if (tensor && tensor->isInit()) {
tensor->freeMemoryDestroyGPUResources();
} else {
SPDLOG_ERROR("Kompute OpBase expected to free "
SPDLOG_WARN("Kompute OpBase expected to free "
"tensor but has already been freed.");
}
}

View file

@ -69,8 +69,6 @@ class OpTensorCreate : public OpBase
private:
// Never owned resources
std::vector<std::shared_ptr<Tensor>> mStagingTensors;
};
} // End namespace kp

View file

@ -9,7 +9,7 @@
namespace kp {
/**
Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a staging tensor to perform the copy. For TensorTypes::eStaging it will only copy the data and perform a map, which will be executed during the record (as opposed to during the sequence eval/submit). This function cannot be carried out for TensorTypes::eStaging.
Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging.
*/
class OpTensorSyncDevice : public OpBase
{
@ -35,12 +35,12 @@ class OpTensorSyncDevice : public OpBase
~OpTensorSyncDevice() override;
/**
* Performs basic checks such as ensuring that there is at least one tensor provided, that they are initialized and that they are not of type TensorTpes::eStaging. For staging tensors in host memory, the map is performed during the init function.
* Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element.
*/
void init() override;
/**
* For device tensors, it records the copy command to the device tensor from the temporary staging tensor.
* For device tensors, it records the copy command for the tensor to copy the data from its staging to device memory.
*/
void record() override;
@ -55,8 +55,6 @@ class OpTensorSyncDevice : public OpBase
virtual void postEval() override;
private:
// Never owned resources
std::vector<std::shared_ptr<Tensor>> mStagingTensors;
};
} // End namespace kp

View file

@ -9,7 +9,7 @@
namespace kp {
/**
Operation that syncs tensor's local data by mapping the data from device memory into the local vector. For TensorTypes::eDevice it will use a staging tensor to perform the copy. For TensorTypes::eStaging it will only copy the data and perform a map, which will be executed during the postSubmit (there will be no copy during the sequence eval/submit). This function cannot be carried out for TensorTypes::eStaging.
Operation that syncs tensor's local memory by mapping device data into the local CPU memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging.
*/
class OpTensorSyncLocal : public OpBase
{
@ -30,17 +30,17 @@ class OpTensorSyncLocal : public OpBase
std::vector<std::shared_ptr<Tensor>> tensors);
/**
* Default destructor. This class manages the memory of the staging tensors it owns but these are released in the postSubmit, before it arrives to the destructor.
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
*/
~OpTensorSyncLocal() override;
/**
* Performs basic checks such as ensuring that there is at least one tensor provided, that they are initialized and that they are not of type TensorTpes::eStaging.
* Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element.
*/
void init() override;
/**
* For device tensors, it records the copy command into the staging tensor from the device tensor.
* For device tensors, it records the copy command for the tensor to copy the data from its device to staging memory.
*/
void record() override;
@ -56,8 +56,6 @@ class OpTensorSyncLocal : public OpBase
private:
// Never owned resources
std::vector<std::shared_ptr<Tensor>> mStagingTensors;
};
} // End namespace kp

View file

@ -105,12 +105,12 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegressionManualCopy)
std::shared_ptr<kp::Tensor> y{ new kp::Tensor({ 0, 0, 0, 1, 1 }) };
std::shared_ptr<kp::Tensor> wIn{ new kp::Tensor(
wInVec, kp::Tensor::TensorTypes::eStaging) };
wInVec, kp::Tensor::TensorTypes::eHost) };
std::shared_ptr<kp::Tensor> wOutI{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
std::shared_ptr<kp::Tensor> wOutJ{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
std::shared_ptr<kp::Tensor> bIn{ new kp::Tensor(
bInVec, kp::Tensor::TensorTypes::eStaging) };
bInVec, kp::Tensor::TensorTypes::eHost) };
std::shared_ptr<kp::Tensor> bOut{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };
std::shared_ptr<kp::Tensor> lOut{ new kp::Tensor({ 0, 0, 0, 0, 0 }) };

View file

@ -126,7 +126,7 @@ TEST(TestManager, TestCreateInitTensor)
EXPECT_EQ(tensorB->data(), std::vector<float>({ 0, 1, 2 }));
std::shared_ptr<kp::Tensor> tensorC =
mgr.buildTensor({ 0, 0, 0 }, kp::Tensor::TensorTypes::eStaging);
mgr.buildTensor({ 0, 0, 0 }, kp::Tensor::TensorTypes::eHost);
mgr.evalOpDefault<kp::OpTensorCopy>({ tensorA, tensorC });

View file

@ -58,7 +58,7 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti)
EXPECT_EQ(tensorA->data(), tensorC->data());
}
TEST(TestOpTensorCopy, CopyDeviceToStagingTensor)
TEST(TestOpTensorCopy, CopyDeviceToHostTensor)
{
kp::Manager mgr;
@ -68,7 +68,7 @@ TEST(TestOpTensorCopy, CopyDeviceToStagingTensor)
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(
testVecB, kp::Tensor::TensorTypes::eStaging) };
testVecB, kp::Tensor::TensorTypes::eHost) };
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
@ -84,7 +84,7 @@ TEST(TestOpTensorCopy, CopyDeviceToStagingTensor)
EXPECT_EQ(tensorA->data(), tensorB->data());
}
TEST(TestOpTensorCopy, CopyStagingToDeviceTensor)
TEST(TestOpTensorCopy, CopyHostToDeviceTensor)
{
kp::Manager mgr;
@ -93,7 +93,7 @@ TEST(TestOpTensorCopy, CopyStagingToDeviceTensor)
std::vector<float> testVecB{ 0, 0, 0 };
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(
testVecA, kp::Tensor::TensorTypes::eStaging) };
testVecA, kp::Tensor::TensorTypes::eHost) };
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
@ -110,7 +110,7 @@ TEST(TestOpTensorCopy, CopyStagingToDeviceTensor)
EXPECT_EQ(tensorA->data(), tensorB->data());
}
TEST(TestOpTensorCopy, CopyStagingToStagingTensor)
TEST(TestOpTensorCopy, CopyHostToHostTensor)
{
kp::Manager mgr;
@ -119,9 +119,9 @@ TEST(TestOpTensorCopy, CopyStagingToStagingTensor)
std::vector<float> testVecB{ 0, 0, 0 };
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(
testVecA, kp::Tensor::TensorTypes::eStaging) };
testVecA, kp::Tensor::TensorTypes::eHost) };
std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(
testVecB, kp::Tensor::TensorTypes::eStaging) };
testVecB, kp::Tensor::TensorTypes::eHost) };
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
@ -145,7 +145,7 @@ TEST(TestOpTensorCopy, SingleTensorShouldFail)
std::vector<float> testVecA{ 9, 8, 7 };
std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(
testVecA, kp::Tensor::TensorTypes::eStaging) };
testVecA, kp::Tensor::TensorTypes::eHost) };
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA });

View file

@ -114,7 +114,6 @@ TEST(TestOpTensorCreate, NoErrorIfTensorFreedBefore)
EXPECT_FALSE(tensorB->isInit());
}
TEST(TestOpTensorCreate, ExceptionOnZeroSizeTensor)
{
std::vector<float> testVecA;
@ -123,11 +122,11 @@ TEST(TestOpTensorCreate, ExceptionOnZeroSizeTensor)
kp::Manager mgr;
try{
try {
mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA });
} catch( const std::runtime_error& err ) {
// check exception
ASSERT_TRUE( std::string(err.what()).find("zero-sized") != std::string::npos );
} catch (const std::runtime_error& err) {
// check exception
ASSERT_TRUE(std::string(err.what()).find("zero-sized") !=
std::string::npos);
}
}

View file

@ -17,9 +17,9 @@ TEST(TestTensor, CopyFromHostData)
std::vector<float> vecB{ 0, 0, 0 };
std::shared_ptr<kp::Tensor> tensorA =
std::make_shared<kp::Tensor>(vecA, kp::Tensor::TensorTypes::eStaging);
std::make_shared<kp::Tensor>(vecA, kp::Tensor::TensorTypes::eHost);
std::shared_ptr<kp::Tensor> tensorB =
std::make_shared<kp::Tensor>(vecB, kp::Tensor::TensorTypes::eStaging);
std::make_shared<kp::Tensor>(vecB, kp::Tensor::TensorTypes::eHost);
kp::Manager mgr;