Updated to enable for opmult to work
This commit is contained in:
parent
fb617d1722
commit
3304767f2c
11 changed files with 49 additions and 39 deletions
|
|
@ -80,8 +80,11 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
|
|||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> inputsAsyncB;
|
||||
|
||||
std::vector<std::shared_ptr<kp::Algorithm>> algosAsync;
|
||||
|
||||
for (uint32_t i = 0; i < numParallel; i++) {
|
||||
inputsAsyncB.push_back(mgr.tensor(data));
|
||||
algosAsync.push_back(mgr.algorithm({inputsAsyncB[i]}, spirv));
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<kp::Sequence>> sqs;
|
||||
|
|
@ -93,7 +96,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
|
|||
auto startAsync = std::chrono::high_resolution_clock::now();
|
||||
|
||||
for (uint32_t i = 0; i < numParallel; i++) {
|
||||
sqs[i]->evalAsync<kp::OpAlgoDispatch>(algorithms[i]);
|
||||
sqs[i]->evalAsync<kp::OpAlgoDispatch>(algosAsync[i]);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < numParallel; i++) {
|
||||
|
|
@ -157,8 +160,8 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution)
|
|||
|
||||
sq1->eval<kp::OpTensorSyncLocal>({ tensorA, tensorB });
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo1 = mgr.algorithm({tensorA});
|
||||
std::shared_ptr<kp::Algorithm> algo2 = mgr.algorithm({tensorB});
|
||||
std::shared_ptr<kp::Algorithm> algo1 = mgr.algorithm({tensorA}, spirv);
|
||||
std::shared_ptr<kp::Algorithm> algo2 = mgr.algorithm({tensorB}, spirv);
|
||||
|
||||
sq1->evalAsync<kp::OpAlgoDispatch>(algo1);
|
||||
sq2->evalAsync<kp::OpAlgoDispatch>(algo2);
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ TEST(TestDestroy, TestDestroyTensorVector)
|
|||
mgr.sequence()
|
||||
->record<kp::OpTensorSyncDevice>(algo->getTensors())
|
||||
->record<kp::OpAlgoDispatch>(algo)
|
||||
->record<kp::OpTensorSyncDevice>(algo->getTensors())
|
||||
->record<kp::OpTensorSyncLocal>(algo->getTensors())
|
||||
->eval();
|
||||
|
||||
tensorA->destroy();
|
||||
|
|
@ -111,12 +111,14 @@ TEST(TestDestroy, TestDestroySequenceSingle)
|
|||
|
||||
tensorA = mgr.tensor({0, 0, 0});
|
||||
|
||||
mgr.sequence()
|
||||
sq = mgr.sequence()
|
||||
->record<kp::OpTensorSyncDevice>({tensorA})
|
||||
->record<kp::OpAlgoDispatch>(mgr.algorithm({tensorA}, spirv))
|
||||
->record<kp::OpTensorSyncLocal>({tensorA})
|
||||
->eval();
|
||||
|
||||
sq->destroy();
|
||||
|
||||
EXPECT_FALSE(sq->isInit());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,13 +32,15 @@ TEST(TestLogisticRegression, TestMainLogisticRegression)
|
|||
wIn, wOutI, wOutJ,
|
||||
bIn, bOut, lOut };
|
||||
|
||||
mgr.sequence()->eval<kp::OpTensorSyncDevice>(params);
|
||||
|
||||
std::vector<uint32_t> spirv = std::vector<uint32_t>(
|
||||
(uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
|
||||
(uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv +
|
||||
kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));
|
||||
(uint32_t*)kp::shader_data::test_shaders_glsl_test_logistic_regression_comp_spv,
|
||||
(uint32_t*)(kp::shader_data::test_shaders_glsl_test_logistic_regression_comp_spv +
|
||||
kp::shader_data::test_shaders_glsl_test_logistic_regression_comp_spv_len));
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algorithm =
|
||||
mgr.algorithm(params, spirv, kp::Workgroup(), kp::Constants({5.0}));
|
||||
mgr.algorithm(params, spirv, kp::Workgroup({5}), kp::Constants({5.0}));
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.sequence()
|
||||
|
|
@ -104,6 +106,8 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy)
|
|||
wIn, wOutI, wOutJ,
|
||||
bIn, bOut, lOut };
|
||||
|
||||
mgr.sequence()->record<kp::OpTensorSyncDevice>(params)->eval();
|
||||
|
||||
std::vector<uint32_t> spirv = std::vector<uint32_t>(
|
||||
(uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
|
||||
(uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv +
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue