Added functional optensorsyncDevice and optensorsynclocal

This commit is contained in:
Alejandro Saucedo 2020-09-06 15:51:31 +01:00
parent e6f4097acb
commit e68d09dbdc
15 changed files with 258 additions and 50 deletions

View file

@ -121,7 +121,11 @@ Now that we have the inputs and outputs we will be able to use them in the proce
Once we re-record, all the instructions that were recorded previosuly are cleared.
Because of this we can record now the new command which is just the OpAlgoBase with the LR shader.
Because of this we can record now the new commands which will consist of the following:
1. Copy the tensor data from local to device
2. Run the logistic regression shader
3. Copy the output data
.. code-block:: cpp
:linenos:
@ -131,11 +135,15 @@ Because of this we can record now the new command which is just the OpAlgoBase w
sq->begin();
sq->record<kp::OpTensorSyncDevice>({wIn, bIn});
sq->record<kp::OpAlgoBase<>>(
params,
true, // Whether to copy output from device
false, // Whether to copy output from device
"test/shaders/glsl/test_logistic_regression.comp");
sq->record<kp::OpTensorSyncLocal>({wOutI, wOutJ, bOut});
sq->end();
4. Loop across number of iterations + 4-a. Submit algo operation on LR shader

View file

@ -470,11 +470,24 @@ class OpBase
virtual void record() = 0;
/**
* Post submit is called after the Sequence has submitted the commands to
* the GPU for processing, and can be used to perform any tear-down steps
* required as the computation iteration finishes.
* Pre eval is called before the Sequence has called eval and submitted the commands to
* the GPU for processing, and can be used to perform any per-eval setup steps
* required as the computation iteration begins. It's worth noting that
* there are situations where eval can be called multiple times, so the
* resources that are created should be idempotent in case it's called multiple
* times in a row.
*/
virtual void postSubmit() = 0;
virtual void preEval() = 0;
/**
* Post eval is called after the Sequence has called eval and submitted the commands to
* the GPU for processing, and can be used to perform any tear-down steps
* required as the computation iteration finishes. It's worth noting that
* there are situations where eval can be called multiple times, so the
* resources that are destroyed should not require a re-init unless explicitly
* provided by the user.
*/
virtual void postEval() = 0;
protected:
// -------------- NEVER OWNED RESOURCES
@ -966,12 +979,17 @@ class OpAlgoBase : public OpBase
*/
virtual void record() override;
/**
* Does not perform any preEval commands.
*/
virtual void preEval() override;
/**
* Executes after the recorded commands are submitted, and performs a copy
* of the GPU Device memory into the staging buffer so the output data can
* be retrieved.
*/
virtual void postSubmit() override;
virtual void postEval() override;
protected:
// -------------- NEVER OWNED RESOURCES
@ -1162,7 +1180,14 @@ OpAlgoBase<tX, tY, tZ>::record()
template<uint32_t tX, uint32_t tY, uint32_t tZ>
void
OpAlgoBase<tX, tY, tZ>::postSubmit()
OpAlgoBase<tX, tY, tZ>::preEval()
{
SPDLOG_DEBUG("Kompute OpAlgoBase preEval called");
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
void
OpAlgoBase<tX, tY, tZ>::postEval()
{
SPDLOG_DEBUG("Kompute OpAlgoBase postSubmit called");
@ -1554,11 +1579,16 @@ class OpTensorCreate : public OpBase
*/
void record() override;
/**
* Does not perform any preEval commands.
*/
virtual void preEval() override;
/**
* Performs a copy back into the main tensor to ensure that the data
* contained is the one that is now being stored in the GPU.
*/
void postSubmit() override;
virtual void postEval() override;
private:
// Never owned resources
@ -1605,10 +1635,15 @@ class OpTensorCopy : public OpBase
*/
void record() override;
/**
* Does not perform any preEval commands.
*/
virtual void preEval() override;
/**
* Copies the local vectors for all the tensors to sync the data with the gpu.
*/
void postSubmit() override;
virtual void postEval() override;
private:
};
@ -1654,9 +1689,14 @@ class OpTensorSyncDevice : public OpBase
void record() override;
/**
* Does not perform any further sync functions. Frees the staging tensors together with their respective memory.
* Does not perform any preEval commands.
*/
void postSubmit() override;
virtual void preEval() override;
/**
* Does not perform any postEval commands.
*/
virtual void postEval() override;
private:
// Never owned resources
@ -1704,9 +1744,14 @@ class OpTensorSyncLocal : public OpBase
void record() override;
/**
* For host tensors it performs the map command from the host memory into local memory. Frees the staging tensors together with their respective memory.
* Does not perform any preEval commands.
*/
void postSubmit() override;
virtual void preEval() override;
/**
* For host tensors it performs the map command from the host memory into local memory.
*/
virtual void postEval() override;
private:
// Never owned resources

View file

@ -57,9 +57,15 @@ OpTensorCopy::record()
}
void
OpTensorCopy::postSubmit()
OpTensorCopy::preEval()
{
SPDLOG_DEBUG("Kompute OpTensorCopy postSubmit called");
SPDLOG_DEBUG("Kompute OpTensorCopy preEval called");
}
void
OpTensorCopy::postEval()
{
SPDLOG_DEBUG("Kompute OpTensorCopy postEval called");
// Copy the data from the first tensor into all the tensors
for (size_t i = 1; i < this->mTensors.size(); i++) {

View file

@ -80,12 +80,15 @@ OpTensorCreate::record()
}
void
OpTensorCreate::postSubmit()
OpTensorCreate::preEval()
{
SPDLOG_DEBUG("Kompute OpTensorCreate postSubmit called");
SPDLOG_DEBUG("Kompute OpTensorCreate preEval called");
}
SPDLOG_DEBUG("Kompute OpTensorCreate destroying staging tensors");
this->mStagingTensors.clear();
void
OpTensorCreate::postEval()
{
SPDLOG_DEBUG("Kompute OpTensorCreate postEval called");
}
}

View file

@ -36,8 +36,8 @@ OpTensorSyncDevice::init()
}
for (std::shared_ptr<Tensor> tensor: this->mTensors) {
if (tensor->isInit()) {
throw std::runtime_error("Kompute OpTensorSyncDevice: Tensor has already been initialized");
if (!tensor->isInit()) {
throw std::runtime_error("Kompute OpTensorSyncDevice: Tensor param has not been initialized");
}
if (tensor->tensorType() == Tensor::TensorTypes::eStorage) {
throw std::runtime_error("Kompute OpTensorSyncLocal tensor parameter is of type TensorTypes::eStorage and hence cannot be used to receive or pass data.");
@ -78,14 +78,25 @@ OpTensorSyncDevice::record()
}
void
OpTensorSyncDevice::postSubmit()
OpTensorSyncDevice::preEval()
{
SPDLOG_DEBUG("Kompute OpTensorSyncDevice postSubmit called");
SPDLOG_DEBUG("Kompute OpTensorSyncDevice preEval called");
// Remove all staging tensors as they are not required after operation
SPDLOG_DEBUG("Kompute OpTensorSyncDevice destroying staging tensors");
// TODO: This would cause issues if there is no CPU barrier
this->mStagingTensors.clear();
// Performing sync of data as eval can be called multiple times with same op
for (size_t i = 0; i < this->mTensors.size(); i++) {
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
this->mStagingTensors[i]->setData(this->mTensors[i]->data());
this->mStagingTensors[i]->mapDataIntoHostMemory();
} else {
this->mTensors[i]->mapDataFromHostMemory();
}
}
}
void
OpTensorSyncDevice::postEval()
{
SPDLOG_DEBUG("Kompute OpTensorSyncDevice postEval called");
}
}

View file

@ -74,10 +74,17 @@ OpTensorSyncLocal::record()
}
void
OpTensorSyncLocal::postSubmit()
OpTensorSyncLocal::preEval()
{
SPDLOG_DEBUG("Kompute OpTensorSyncLocal postSubmit called");
SPDLOG_DEBUG("Kompute OpTensorSyncLocal preEval called");
}
void
OpTensorSyncLocal::postEval()
{
SPDLOG_DEBUG("Kompute OpTensorSyncLocal postEval called");
SPDLOG_DEBUG("Kompute OpTensorSyncLocal mapping data into tensor local");
for (size_t i = 0; i < this->mTensors.size(); i++) {
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
this->mStagingTensors[i]->mapDataFromHostMemory();
@ -86,10 +93,6 @@ OpTensorSyncLocal::postSubmit()
this->mTensors[i]->mapDataFromHostMemory();
}
}
// Remove all staging tensors as they are not required after operation
SPDLOG_DEBUG("Kompute OpTensorSyncLocal destroying staging tensors");
this->mStagingTensors.clear();
}
}

View file

@ -125,6 +125,10 @@ Sequence::eval()
return false;
}
for (size_t i = 0; i < this->mOperations.size(); i++) {
this->mOperations[i]->preEval();
}
const vk::PipelineStageFlags waitStageMask =
vk::PipelineStageFlagBits::eTransfer;
vk::SubmitInfo submitInfo(
@ -140,7 +144,7 @@ Sequence::eval()
this->mDevice->destroy(fence);
for (size_t i = 0; i < this->mOperations.size(); i++) {
this->mOperations[i]->postSubmit();
this->mOperations[i]->postEval();
}
SPDLOG_DEBUG("Kompute sequence EVAL success");

View file

@ -120,12 +120,18 @@ class OpAlgoBase : public OpBase
*/
virtual void record() override;
/**
* Does not perform any preEval commands.
*/
virtual void preEval() override;
/**
* Executes after the recorded commands are submitted, and performs a copy
* of the GPU Device memory into the staging buffer so the output data can
* be retrieved.
*/
virtual void postSubmit() override;
virtual void postEval() override;
protected:
// -------------- NEVER OWNED RESOURCES
@ -316,7 +322,14 @@ OpAlgoBase<tX, tY, tZ>::record()
template<uint32_t tX, uint32_t tY, uint32_t tZ>
void
OpAlgoBase<tX, tY, tZ>::postSubmit()
OpAlgoBase<tX, tY, tZ>::preEval()
{
SPDLOG_DEBUG("Kompute OpAlgoBase preEval called");
}
template<uint32_t tX, uint32_t tY, uint32_t tZ>
void
OpAlgoBase<tX, tY, tZ>::postEval()
{
SPDLOG_DEBUG("Kompute OpAlgoBase postSubmit called");

View file

@ -90,11 +90,24 @@ class OpBase
virtual void record() = 0;
/**
* Post submit is called after the Sequence has submitted the commands to
* the GPU for processing, and can be used to perform any tear-down steps
* required as the computation iteration finishes.
* Pre eval is called before the Sequence has called eval and submitted the commands to
* the GPU for processing, and can be used to perform any per-eval setup steps
* required as the computation iteration begins. It's worth noting that
* there are situations where eval can be called multiple times, so the
* resources that are created should be idempotent in case it's called multiple
* times in a row.
*/
virtual void postSubmit() = 0;
virtual void preEval() = 0;
/**
* Post eval is called after the Sequence has called eval and submitted the commands to
* the GPU for processing, and can be used to perform any tear-down steps
* required as the computation iteration finishes. It's worth noting that
* there are situations where eval can be called multiple times, so the
* resources that are destroyed should not require a re-init unless explicitly
* provided by the user.
*/
virtual void postEval() = 0;
protected:
// -------------- NEVER OWNED RESOURCES

View file

@ -44,10 +44,15 @@ class OpTensorCopy : public OpBase
*/
void record() override;
/**
* Does not perform any preEval commands.
*/
virtual void preEval() override;
/**
* Copies the local vectors for all the tensors to sync the data with the gpu.
*/
void postSubmit() override;
virtual void postEval() override;
private:
};

View file

@ -56,11 +56,17 @@ class OpTensorCreate : public OpBase
*/
void record() override;
/**
* Does not perform any preEval commands.
*/
virtual void preEval() override;
/**
* Performs a copy back into the main tensor to ensure that the data
* contained is the one that is now being stored in the GPU.
*/
void postSubmit() override;
virtual void postEval() override;
private:
// Never owned resources

View file

@ -45,9 +45,14 @@ class OpTensorSyncDevice : public OpBase
void record() override;
/**
* Does not perform any further sync functions. Frees the staging tensors together with their respective memory.
* Does not perform any preEval commands.
*/
void postSubmit() override;
virtual void preEval() override;
/**
* Does not perform any postEval commands.
*/
virtual void postEval() override;
private:
// Never owned resources

View file

@ -45,9 +45,15 @@ class OpTensorSyncLocal : public OpBase
void record() override;
/**
* For host tensors it performs the map command from the host memory into local memory. Frees the staging tensors together with their respective memory.
* Does not perform any preEval commands.
*/
void postSubmit() override;
virtual void preEval() override;
/**
* For host tensors it performs the map command from the host memory into local memory.
*/
virtual void postEval() override;
private:
// Never owned resources

View file

@ -1,9 +1,89 @@
#include "gtest/gtest.h"
#include "fmt/ranges.h"
#include "kompute/Kompute.hpp"
TEST(LogisticRegressionAlgorithm, TestMainLogisticRegression) {
TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegression) {
uint32_t ITERATIONS = 100;
std::vector<float> wInVec = { 0.001, 0.001 };
std::vector<float> bInVec = { 0 };
std::shared_ptr<kp::Tensor> xI{ new kp::Tensor({ 0, 1, 1, 1, 1 })};
std::shared_ptr<kp::Tensor> xJ{ new kp::Tensor({ 0, 0, 0, 1, 1 })};
std::shared_ptr<kp::Tensor> y{ new kp::Tensor({ 0, 0, 0, 1, 1 })};
std::shared_ptr<kp::Tensor> wIn{
new kp::Tensor(wInVec)};
std::shared_ptr<kp::Tensor> wOutI{ new kp::Tensor({ 0, 0, 0, 0, 0 })};
std::shared_ptr<kp::Tensor> wOutJ{ new kp::Tensor({ 0, 0, 0, 0, 0 })};
std::shared_ptr<kp::Tensor> bIn{
new kp::Tensor(bInVec)};
std::shared_ptr<kp::Tensor> bOut{ new kp::Tensor({ 0, 0, 0, 0, 0 })};
std::vector<std::shared_ptr<kp::Tensor>> params =
{xI, xJ, y, wIn, wOutI, wOutJ, bIn, bOut};
{
kp::Manager mgr;
if (std::shared_ptr<kp::Sequence> sq =
mgr.getOrCreateManagedSequence("createTensors").lock()) {
sq->begin();
sq->record<kp::OpTensorCreate>(params);
sq->end();
sq->eval();
// Record op algo base
sq->begin();
sq->record<kp::OpTensorSyncDevice>({wIn, bIn});
sq->record<kp::OpAlgoBase<>>(
params,
false, // Whether to copy output from device
"test/shaders/glsl/test_logistic_regression.comp");
sq->record<kp::OpTensorSyncLocal>({wOutI, wOutJ, bOut});
sq->end();
// Iterate across all expected iterations
for (size_t i = 0; i < ITERATIONS; i++) {
sq->eval();
for(size_t j = 0; j < bOut->size(); j++) {
wIn->data()[0] -= wOutI->data()[j];
wIn->data()[1] -= wOutJ->data()[j];
bIn->data()[0] -= bOut->data()[j];
}
}
}
}
// Based on the inputs the outputs should be at least:
// * wi < 0.01
// * wj > 1.0
// * b < 0
// TODO: Add EXPECT_DOUBLE_EQ instead
EXPECT_LT(wIn->data()[0], 0.01);
EXPECT_GT(wIn->data()[1], 1.0);
EXPECT_LT(bIn->data()[0], 0.0);
SPDLOG_ERROR("Result wIn: {}, bIn: {}",
wIn->data(), bIn->data());
}
TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegressionManualCopy) {
uint32_t ITERATIONS = 100;
@ -76,6 +156,6 @@ TEST(LogisticRegressionAlgorithm, TestMainLogisticRegression) {
EXPECT_GT(wIn->data()[1], 1.0);
EXPECT_LT(bIn->data()[0], 0.0);
//SPDLOG_DEBUG("Result wIn: {}, bIn: {}",
// wIn->data(), bIn->data());
SPDLOG_ERROR("Result wIn: {}, bIn: {}",
wIn->data(), bIn->data());
}

View file