UPdated to have shorter dispatch size and larger loop size
This commit is contained in:
parent
9e79b9f352
commit
c69fcb7e60
1 changed files with 26 additions and 10 deletions
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
TEST(TestAsyncOperations, TestManagerAsync)
|
||||
{
|
||||
uint32_t size = 100000;
|
||||
uint32_t size = 10;
|
||||
|
||||
uint32_t numParallel = 6;
|
||||
|
||||
|
|
@ -19,22 +19,38 @@ TEST(TestAsyncOperations, TestManagerAsync)
|
|||
layout(set = 0, binding = 0) buffer a { float pa[]; };
|
||||
layout(set = 0, binding = 1) buffer b { float pb[]; };
|
||||
|
||||
shared uint sharedTotal[1];
|
||||
|
||||
void main() {
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
|
||||
for (int i = 0; i < 10000; i++)
|
||||
sharedTotal[0] = 0;
|
||||
|
||||
barrier();
|
||||
memoryBarrierShared();
|
||||
|
||||
for (int i = 0; i < 100000000; i++)
|
||||
{
|
||||
pa[index] += 1.0;
|
||||
atomicAdd(sharedTotal[0], 1);
|
||||
atomicAdd(sharedTotal[0], -1);
|
||||
atomicAdd(sharedTotal[0], 1);
|
||||
atomicAdd(sharedTotal[0], -1);
|
||||
atomicAdd(sharedTotal[0], 1);
|
||||
atomicAdd(sharedTotal[0], -1);
|
||||
atomicAdd(sharedTotal[0], 1);
|
||||
}
|
||||
|
||||
pb[index] = pa[index];
|
||||
barrier();
|
||||
memoryBarrierShared();
|
||||
|
||||
pb[index] = sharedTotal[0];
|
||||
pa[index] = 0;
|
||||
}
|
||||
)");
|
||||
|
||||
std::vector<float> data(size, 0.0);
|
||||
std::vector<float> resultSync(size, 10000);
|
||||
std::vector<float> resultAsync(size, 10000);
|
||||
std::vector<float> resultSync(size, 100000000);
|
||||
std::vector<float> resultAsync(size, 100000000);
|
||||
|
||||
kp::Manager mgr;
|
||||
|
||||
|
|
@ -58,11 +74,11 @@ TEST(TestAsyncOperations, TestManagerAsync)
|
|||
|
||||
}
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>(inputsSyncB);
|
||||
|
||||
auto endSync = std::chrono::high_resolution_clock::now();
|
||||
auto durationSync = std::chrono::duration_cast<std::chrono::microseconds>(endSync - startSync).count();
|
||||
|
||||
mgr.evalOpDefault<kp::OpTensorSyncLocal>(inputsSyncB);
|
||||
|
||||
for (uint32_t i = 0; i < numParallel; i++) {
|
||||
EXPECT_EQ(inputsSyncB[i]->data(), resultSync);
|
||||
}
|
||||
|
|
@ -97,11 +113,11 @@ TEST(TestAsyncOperations, TestManagerAsync)
|
|||
mgrAsync.evalOpAwait("async" + std::to_string(i));
|
||||
}
|
||||
|
||||
mgrAsync.evalOpDefault<kp::OpTensorSyncLocal>({ inputsAsyncB });
|
||||
|
||||
auto endAsync = std::chrono::high_resolution_clock::now();
|
||||
auto durationAsync = std::chrono::duration_cast<std::chrono::microseconds>(endAsync - startAsync).count();
|
||||
|
||||
mgrAsync.evalOpDefault<kp::OpTensorSyncLocal>({ inputsAsyncB });
|
||||
|
||||
for (uint32_t i = 0; i < numParallel; i++) {
|
||||
EXPECT_EQ(inputsAsyncB[i]->data(), resultAsync);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue