llama-cpp-turboquant/shaders/glsl/computeheadless.comp
2020-08-04 21:12:09 +01:00

39 lines
708 B
Text

#version 450
// This variable is set by the pipeline
layout (constant_id = 0) const uint BUF_SIZE = 32;
layout(binding = 0) buffer Pos {
uint values[ ];
};
layout (local_size_x = 4, local_size_y = 1, local_size_z = 1) in;
shared uint sharedTotal[1];
uint mod(uint x, uint N) {
return (x % N + N) % N;
}
void main()
{
uint index = gl_GlobalInvocationID.x;
if (index >= BUF_SIZE)
return;
sharedTotal[0] = 0;
barrier();
memoryBarrierShared();
atomicAdd(sharedTotal[0], values[index]);
barrier();
memoryBarrierShared();
uint adjacentSum = values[mod(index+1, BUF_SIZE)] + values[mod(index-1, BUF_SIZE)];
values[index] = sharedTotal[0] + adjacentSum;
}