Updated OpAlgoBase to not copy data as optensorsync operations are introduced
This commit is contained in:
parent
4171786b6f
commit
9f8508075a
10 changed files with 92 additions and 180 deletions
|
|
@ -48,7 +48,6 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegression) {
|
|||
|
||||
sq->record<kp::OpAlgoBase<>>(
|
||||
params,
|
||||
false, // Whether to copy output from device
|
||||
"test/shaders/glsl/test_logistic_regression.comp");
|
||||
|
||||
sq->record<kp::OpTensorSyncLocal>({wOutI, wOutJ, bOut});
|
||||
|
|
@ -125,9 +124,10 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegressionManualCopy) {
|
|||
|
||||
sq->record<kp::OpAlgoBase<>>(
|
||||
params,
|
||||
true, // Whether to copy output from device
|
||||
"test/shaders/glsl/test_logistic_regression.comp");
|
||||
|
||||
sq->record<kp::OpTensorSyncLocal>({wOutI, wOutJ, bOut});
|
||||
|
||||
sq->end();
|
||||
|
||||
// Iterate across all expected iterations
|
||||
|
|
|
|||
|
|
@ -10,16 +10,17 @@ TEST(TestManager, EndToEndOpMultFlow)
|
|||
std::shared_ptr<kp::Tensor> tensorLHS{ new kp::Tensor({ 0, 1, 2 }) };
|
||||
mgr.evalOp<kp::OpTensorCreate>({ tensorLHS });
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorRHS{ new kp::Tensor(
|
||||
{ 2, 4, 6 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorRHS{ new kp::Tensor( { 2, 4, 6 }) };
|
||||
mgr.evalOp<kp::OpTensorCreate>({ tensorRHS });
|
||||
|
||||
std::shared_ptr<kp::Tensor> tensorOutput{ new kp::Tensor(
|
||||
{ 0, 0, 0 }) };
|
||||
std::shared_ptr<kp::Tensor> tensorOutput{ new kp::Tensor( { 0, 0, 0 }) };
|
||||
|
||||
mgr.evalOp<kp::OpTensorCreate>({ tensorOutput });
|
||||
|
||||
mgr.evalOp<kp::OpMult<>>({ tensorLHS, tensorRHS, tensorOutput });
|
||||
|
||||
mgr.evalOp<kp::OpTensorSyncLocal>({ tensorOutput });
|
||||
|
||||
EXPECT_EQ(tensorOutput->data(), std::vector<float>({0, 4, 12}));
|
||||
}
|
||||
|
||||
|
|
@ -46,6 +47,8 @@ TEST(TestManager, OpMultSequenceFlow) {
|
|||
|
||||
sq->record<kp::OpMult<>>({ tensorLHS, tensorRHS, tensorOutput });
|
||||
|
||||
sq->record<kp::OpTensorSyncLocal>({ tensorOutput });
|
||||
|
||||
sq->end();
|
||||
sq->eval();
|
||||
}
|
||||
|
|
@ -100,6 +103,8 @@ TEST(TestManager, TestMultipleTensorsAtOnce) {
|
|||
|
||||
sq->record<kp::OpMult<>>({ tensorLHS, tensorRHS, tensorOutput });
|
||||
|
||||
sq->record<kp::OpTensorSyncLocal>({ tensorOutput });
|
||||
|
||||
sq->end();
|
||||
sq->eval();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,17 +27,16 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord) {
|
|||
|
||||
sq->record<kp::OpAlgoBase<3, 1, 1>>(
|
||||
{ tensorA },
|
||||
false, // Whether to copy output from device
|
||||
std::vector<char>(shader.begin(), shader.end()));
|
||||
sq->record<kp::OpAlgoBase<3, 1, 1>>(
|
||||
{ tensorA },
|
||||
false, // Whether to copy output from device
|
||||
std::vector<char>(shader.begin(), shader.end()));
|
||||
sq->record<kp::OpAlgoBase<3, 1, 1>>(
|
||||
{ tensorA },
|
||||
true, // Whether to copy output from device
|
||||
std::vector<char>(shader.begin(), shader.end()));
|
||||
|
||||
sq->record<kp::OpTensorSyncLocal>({ tensorA });
|
||||
|
||||
sq->end();
|
||||
sq->eval();
|
||||
}
|
||||
|
|
@ -70,7 +69,6 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords) {
|
|||
|
||||
sq->record<kp::OpAlgoBase<3, 1, 1>>(
|
||||
{ tensorA },
|
||||
false, // Whether to copy output from device
|
||||
std::vector<char>(shader.begin(), shader.end()));
|
||||
|
||||
sq->end();
|
||||
|
|
@ -80,7 +78,6 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords) {
|
|||
|
||||
sq->record<kp::OpAlgoBase<3, 1, 1>>(
|
||||
{ tensorA },
|
||||
false, // Whether to copy output from device
|
||||
std::vector<char>(shader.begin(), shader.end()));
|
||||
|
||||
sq->end();
|
||||
|
|
@ -90,11 +87,18 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords) {
|
|||
|
||||
sq->record<kp::OpAlgoBase<3, 1, 1>>(
|
||||
{ tensorA },
|
||||
true, // Whether to copy output from device
|
||||
std::vector<char>(shader.begin(), shader.end()));
|
||||
|
||||
sq->end();
|
||||
sq->eval();
|
||||
|
||||
sq->begin();
|
||||
|
||||
sq->record<kp::OpTensorSyncLocal>(
|
||||
{ tensorA });
|
||||
|
||||
sq->end();
|
||||
sq->eval();
|
||||
}
|
||||
sqWeakPtr.reset();
|
||||
|
||||
|
|
@ -126,7 +130,6 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) {
|
|||
|
||||
sq->record<kp::OpAlgoBase<3, 1, 1>>(
|
||||
{ tensorA },
|
||||
true, // Whether to copy output from device
|
||||
std::vector<char>(shader.begin(), shader.end()));
|
||||
|
||||
sq->end();
|
||||
|
|
@ -134,12 +137,11 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) {
|
|||
}
|
||||
|
||||
std::weak_ptr<kp::Sequence> sqWeakPtr2 = mgr.getOrCreateManagedSequence("newSequence2");
|
||||
if (std::shared_ptr<kp::Sequence> sq = sqWeakPtr.lock()) {
|
||||
if (std::shared_ptr<kp::Sequence> sq = sqWeakPtr2.lock()) {
|
||||
sq->begin();
|
||||
|
||||
sq->record<kp::OpAlgoBase<3, 1, 1>>(
|
||||
{ tensorA },
|
||||
true, // Whether to copy output from device
|
||||
std::vector<char>(shader.begin(), shader.end()));
|
||||
|
||||
sq->end();
|
||||
|
|
@ -148,18 +150,28 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) {
|
|||
|
||||
|
||||
std::weak_ptr<kp::Sequence> sqWeakPtr3 = mgr.getOrCreateManagedSequence("newSequence3");
|
||||
if (std::shared_ptr<kp::Sequence> sq = sqWeakPtr.lock()) {
|
||||
if (std::shared_ptr<kp::Sequence> sq = sqWeakPtr3.lock()) {
|
||||
sq->begin();
|
||||
|
||||
sq->record<kp::OpAlgoBase<3, 1, 1>>(
|
||||
{ tensorA },
|
||||
true, // Whether to copy output from device
|
||||
std::vector<char>(shader.begin(), shader.end()));
|
||||
|
||||
sq->end();
|
||||
sq->eval();
|
||||
}
|
||||
|
||||
std::weak_ptr<kp::Sequence> sqWeakPtr4 = mgr.getOrCreateManagedSequence("newSequence5");
|
||||
if (std::shared_ptr<kp::Sequence> sq = sqWeakPtr4.lock()) {
|
||||
sq->begin();
|
||||
|
||||
sq->record<kp::OpTensorSyncLocal>(
|
||||
{ tensorA });
|
||||
|
||||
sq->end();
|
||||
sq->eval();
|
||||
}
|
||||
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({3, 3, 3}));
|
||||
}
|
||||
|
||||
|
|
@ -190,12 +202,11 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval) {
|
|||
}
|
||||
|
||||
std::weak_ptr<kp::Sequence> sqWeakPtr2 = mgr.getOrCreateManagedSequence("newSequence2");
|
||||
if (std::shared_ptr<kp::Sequence> sq = sqWeakPtr.lock()) {
|
||||
if (std::shared_ptr<kp::Sequence> sq = sqWeakPtr2.lock()) {
|
||||
sq->begin();
|
||||
|
||||
sq->record<kp::OpAlgoBase<3, 1, 1>>(
|
||||
{ tensorA },
|
||||
true, // Whether to copy output from device
|
||||
std::vector<char>(shader.begin(), shader.end()));
|
||||
|
||||
sq->end();
|
||||
|
|
@ -205,6 +216,20 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval) {
|
|||
sq->eval();
|
||||
}
|
||||
|
||||
std::weak_ptr<kp::Sequence> sqWeakPtr3 = mgr.getOrCreateManagedSequence("newSequence3");
|
||||
if (std::shared_ptr<kp::Sequence> sq = sqWeakPtr2.lock()) {
|
||||
sq->begin();
|
||||
|
||||
sq->record<kp::OpTensorSyncLocal>(
|
||||
{ tensorA });
|
||||
|
||||
sq->end();
|
||||
|
||||
sq->eval();
|
||||
sq->eval();
|
||||
sq->eval();
|
||||
}
|
||||
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({3, 3, 3}));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -50,7 +50,6 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies) {
|
|||
|
||||
sq->record<kp::OpAlgoBase<>>(
|
||||
{ tensorA, tensorB },
|
||||
true, // Whether to copy output from device
|
||||
std::vector<char>(shader.begin(), shader.end()));
|
||||
|
||||
sq->record<kp::OpTensorCopy>({tensorB, tensorA});
|
||||
|
|
|
|||
|
|
@ -29,9 +29,10 @@ TEST(TestOpAlgoBase, ShaderRawDataFromConstructor) {
|
|||
|
||||
mgr.evalOpDefault<kp::OpAlgoBase<>>(
|
||||
{ tensorA, tensorB },
|
||||
true, // Whether to copy output from device
|
||||
std::vector<char>(shader.begin(), shader.end()));
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({tensorA, tensorB});
|
||||
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({0, 1, 2}));
|
||||
EXPECT_EQ(tensorB->data(), std::vector<float>({3, 4, 5}));
|
||||
}
|
||||
|
|
@ -45,12 +46,13 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromConstructor) {
|
|||
|
||||
mgr.evalOpDefault<kp::OpAlgoBase<>>(
|
||||
{ tensorA, tensorB },
|
||||
true, // Whether to copy output from device
|
||||
std::vector<char>(
|
||||
kp::shader_data::test_shaders_glsl_test_op_custom_shader_comp_spv,
|
||||
kp::shader_data::test_shaders_glsl_test_op_custom_shader_comp_spv +
|
||||
kp::shader_data::test_shaders_glsl_test_op_custom_shader_comp_spv_len));
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({tensorA, tensorB});
|
||||
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({0, 1, 2}));
|
||||
EXPECT_EQ(tensorB->data(), std::vector<float>({3, 4, 5}));
|
||||
}
|
||||
|
|
@ -64,9 +66,10 @@ TEST(TestOpAlgoBase, ShaderRawDataFromFile) {
|
|||
|
||||
mgr.evalOpDefault<kp::OpAlgoBase<>>(
|
||||
{ tensorA, tensorB },
|
||||
true, // Whether to copy output from device
|
||||
"test/shaders/glsl/test_op_custom_shader.comp");
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({tensorA, tensorB});
|
||||
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({0, 1, 2}));
|
||||
EXPECT_EQ(tensorB->data(), std::vector<float>({3, 4, 5}));
|
||||
}
|
||||
|
|
@ -80,9 +83,10 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromFile) {
|
|||
|
||||
mgr.evalOpDefault<kp::OpAlgoBase<>>(
|
||||
{ tensorA, tensorB },
|
||||
true, // Whether to copy output from device
|
||||
"test/shaders/glsl/test_op_custom_shader.comp.spv");
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>({tensorA, tensorB});
|
||||
|
||||
EXPECT_EQ(tensorA->data(), std::vector<float>({0, 1, 2}));
|
||||
EXPECT_EQ(tensorB->data(), std::vector<float>({3, 4, 5}));
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue