Added new OpLhsRhsOut class to simplify common equations with two inputs one output
This commit is contained in:
parent
23cf43e231
commit
95061c8009
4 changed files with 492 additions and 311 deletions
|
|
@ -36,7 +36,7 @@ namespace kp {
|
|||
namespace shader_data {
|
||||
static unsigned const char shaders_glsl_opmult_comp_spv[] = {
|
||||
0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x08, 0x00, 0x08, 0x00,
|
||||
0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
|
||||
0x2d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
|
||||
0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
|
|
@ -56,19 +56,19 @@ static unsigned const char shaders_glsl_opmult_comp_spv[] = {
|
|||
0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0x61, 0x6c, 0x75,
|
||||
0x65, 0x73, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x00, 0x00, 0x00, 0x00,
|
||||
0x05, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x05, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x4c, 0x45, 0x4e, 0x5f,
|
||||
0x52, 0x48, 0x53, 0x00, 0x05, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00,
|
||||
0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x4c, 0x68, 0x73, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x06, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x4c, 0x68, 0x73, 0x00, 0x00, 0x00,
|
||||
0x05, 0x00, 0x03, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x05, 0x00, 0x05, 0x00, 0x1f, 0x00, 0x00, 0x00, 0x74, 0x65, 0x6e, 0x73,
|
||||
0x6f, 0x72, 0x52, 0x68, 0x73, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00,
|
||||
0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0x61, 0x6c, 0x75,
|
||||
0x65, 0x73, 0x52, 0x68, 0x73, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00,
|
||||
0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00,
|
||||
0x22, 0x00, 0x00, 0x00, 0x4c, 0x45, 0x4e, 0x5f, 0x4c, 0x48, 0x53, 0x00,
|
||||
0x05, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, 0x4c, 0x45, 0x4e, 0x5f,
|
||||
0x05, 0x00, 0x05, 0x00, 0x18, 0x00, 0x00, 0x00, 0x74, 0x65, 0x6e, 0x73,
|
||||
0x6f, 0x72, 0x4c, 0x68, 0x73, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00,
|
||||
0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0x61, 0x6c, 0x75,
|
||||
0x65, 0x73, 0x4c, 0x68, 0x73, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00,
|
||||
0x1a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x05, 0x00,
|
||||
0x20, 0x00, 0x00, 0x00, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x52, 0x68,
|
||||
0x73, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, 0x20, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x52, 0x68,
|
||||
0x73, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x28, 0x00, 0x00, 0x00,
|
||||
0x4c, 0x45, 0x4e, 0x5f, 0x4c, 0x48, 0x53, 0x00, 0x05, 0x00, 0x04, 0x00,
|
||||
0x29, 0x00, 0x00, 0x00, 0x4c, 0x45, 0x4e, 0x5f, 0x52, 0x48, 0x53, 0x00,
|
||||
0x05, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x4c, 0x45, 0x4e, 0x5f,
|
||||
0x4f, 0x55, 0x54, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
|
||||
0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00,
|
||||
0x10, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
|
|
@ -78,24 +78,24 @@ static unsigned const char shaders_glsl_opmult_comp_spv[] = {
|
|||
0x13, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00,
|
||||
0x1a, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x48, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00,
|
||||
0x1b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00,
|
||||
0x1d, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x47, 0x00, 0x04, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x1e, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00,
|
||||
0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x1f, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00,
|
||||
0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x1a, 0x00, 0x00, 0x00,
|
||||
0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00,
|
||||
0x21, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
|
||||
0x1a, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x47, 0x00, 0x04, 0x00, 0x1f, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x47, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
|
||||
0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00,
|
||||
0x28, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x47, 0x00, 0x04, 0x00, 0x29, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00,
|
||||
0x25, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
|
||||
0x2c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
|
||||
0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
|
|
@ -114,25 +114,25 @@ static unsigned const char shaders_glsl_opmult_comp_spv[] = {
|
|||
0x13, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
|
||||
0x14, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x2b, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x32, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
|
||||
0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x1d, 0x00, 0x03, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x1e, 0x00, 0x03, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x17, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00,
|
||||
0x17, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x19, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
|
||||
0x19, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x20, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x1b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00,
|
||||
0x1d, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
|
||||
0x1e, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00,
|
||||
0x1f, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
|
||||
0x20, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00,
|
||||
0x3b, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x32, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x32, 0x00, 0x04, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x1f, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00,
|
||||
0x1f, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
|
||||
0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x32, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x32, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x32, 0x00, 0x04, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00,
|
||||
0x25, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
|
||||
0x24, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x2c, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00,
|
||||
0x2b, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
|
||||
0x07, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
|
||||
|
|
@ -141,12 +141,23 @@ static unsigned const char shaders_glsl_opmult_comp_spv[] = {
|
|||
0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00,
|
||||
0x3e, 0x00, 0x03, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00,
|
||||
0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x18, 0x00, 0x00, 0x00,
|
||||
0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
|
||||
0x16, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x19, 0x00, 0x00, 0x00,
|
||||
0x17, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00
|
||||
0x08, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x1b, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
|
||||
0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00,
|
||||
0x15, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
|
||||
0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x1c, 0x00, 0x00, 0x00,
|
||||
0x24, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
|
||||
0x23, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x25, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
|
||||
0x25, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x1c, 0x00, 0x00, 0x00,
|
||||
0x27, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
|
||||
0x16, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x27, 0x00, 0x00, 0x00,
|
||||
0x26, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00
|
||||
};
|
||||
static unsigned const int shaders_glsl_opmult_comp_spv_len = 1320;
|
||||
static unsigned const int shaders_glsl_opmult_comp_spv_len = 1452;
|
||||
}
|
||||
}
|
||||
#endif // define SHADEROP_SHADEROPMULT_HPP
|
||||
|
|
@ -974,6 +985,8 @@ OpAlgoBase<tX, tY, tZ>::postSubmit()
|
|||
|
||||
#endif
|
||||
|
||||
#include <fstream>
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
|
|
@ -983,13 +996,13 @@ namespace kp {
|
|||
* input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)"
|
||||
*/
|
||||
template<uint32_t tX = 0, uint32_t tY = 0, uint32_t tZ = 0>
|
||||
class OpMult : public OpAlgoBase<tX, tY, tZ>
|
||||
class OpAlgoLhsRhsOut : public OpAlgoBase<tX, tY, tZ>
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Base constructor, should not be used unless explicitly intended.
|
||||
*/
|
||||
OpMult();
|
||||
OpAlgoLhsRhsOut();
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
|
|
@ -1002,7 +1015,7 @@ class OpMult : public OpAlgoBase<tX, tY, tZ>
|
|||
* @param tensors Tensors that are to be used in this operation
|
||||
* @param freeTensors Whether operation manages the memory of the Tensors
|
||||
*/
|
||||
OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
OpAlgoLhsRhsOut(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors);
|
||||
|
|
@ -1011,7 +1024,7 @@ class OpMult : public OpAlgoBase<tX, tY, tZ>
|
|||
* Default destructor, which is in charge of destroying the algorithm
|
||||
* components but does not destroy the underlying tensors
|
||||
*/
|
||||
~OpMult();
|
||||
~OpAlgoLhsRhsOut();
|
||||
|
||||
/**
|
||||
* The init function is responsible for ensuring that all of the tensors
|
||||
|
|
@ -1038,57 +1051,57 @@ class OpMult : public OpAlgoBase<tX, tY, tZ>
|
|||
*/
|
||||
void postSubmit() override;
|
||||
|
||||
private:
|
||||
protected:
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
std::shared_ptr<Tensor> mTensorLHS;
|
||||
std::shared_ptr<Tensor> mTensorRHS;
|
||||
std::shared_ptr<Tensor> mTensorOutput;
|
||||
std::shared_ptr<Tensor> mTensorLHS; ///< Reference to the parameter used in the left hand side equation of the shader
|
||||
std::shared_ptr<Tensor> mTensorRHS; ///< Reference to the parameter used in the right hand side equation of the shader
|
||||
std::shared_ptr<Tensor> mTensorOutput; ///< Reference to the parameter used in the output of the shader and will be copied with a staging vector
|
||||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::shared_ptr<Tensor> mTensorOutputStaging;
|
||||
std::shared_ptr<Tensor> mTensorOutputStaging; ///< Staging temporary tensor user do to copy the output of the tensor
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
||||
// Including implemenation for template class
|
||||
#ifndef OPMULT_CPP
|
||||
#define OPMULT_CPP
|
||||
#ifndef OPALGOLHSRHSOUT_CPP
|
||||
#define OPALGOLHSRHSOUT_CPP
|
||||
|
||||
namespace kp {
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
OpMult<tX, tY, tZ>::OpMult()
|
||||
OpAlgoLhsRhsOut<tX, tY, tZ>::OpAlgoLhsRhsOut()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpMult constructor base");
|
||||
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor base");
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
OpMult<tX, tY, tZ>::OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
OpAlgoLhsRhsOut<tX, tY, tZ>::OpAlgoLhsRhsOut(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors)
|
||||
: OpAlgoBase<tX, tY, tZ>(physicalDevice, device, commandBuffer, tensors)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpMult constructor with params");
|
||||
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor with params");
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
OpMult<tX, tY, tZ>::~OpMult()
|
||||
OpAlgoLhsRhsOut<tX, tY, tZ>::~OpAlgoLhsRhsOut()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpMult destructor started");
|
||||
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut destructor started");
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
void
|
||||
OpMult<tX, tY, tZ>::init()
|
||||
OpAlgoLhsRhsOut<tX, tY, tZ>::init()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpMult init called");
|
||||
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut init called");
|
||||
|
||||
if (this->mTensors.size() < 3) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpMult called with less than 1 tensor");
|
||||
"Kompute OpAlgoLhsRhsOut called with less than 1 tensor");
|
||||
} else if (this->mTensors.size() > 3) {
|
||||
spdlog::warn("Kompute OpMult called with more than 3 this->mTensors");
|
||||
spdlog::warn("Kompute OpAlgoLhsRhsOut called with more than 3 this->mTensors");
|
||||
}
|
||||
|
||||
this->mTensorLHS = this->mTensors[0];
|
||||
|
|
@ -1099,7 +1112,7 @@ OpMult<tX, tY, tZ>::init()
|
|||
if (!(this->mTensorLHS->isInit() && this->mTensorRHS->isInit() &&
|
||||
this->mTensorOutput->isInit())) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpMult all tensor parameters must be initialised. LHS: " +
|
||||
"Kompute OpAlgoLhsRhsOut all tensor parameters must be initialised. LHS: " +
|
||||
std::to_string(this->mTensorLHS->isInit()) +
|
||||
" RHS: " + std::to_string(this->mTensorRHS->isInit()) +
|
||||
" Output: " + std::to_string(this->mTensorOutput->isInit()));
|
||||
|
|
@ -1110,7 +1123,7 @@ OpMult<tX, tY, tZ>::init()
|
|||
if (!(this->mTensorLHS->size() == this->mTensorRHS->size() &&
|
||||
this->mTensorRHS->size() == this->mTensorOutput->size())) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpMult all tensor parameters must be the same size LHS: " +
|
||||
"Kompute OpAlgoLhsRhsOut all tensor parameters must be the same size LHS: " +
|
||||
std::to_string(this->mTensorLHS->size()) +
|
||||
" RHS: " + std::to_string(this->mTensorRHS->size()) +
|
||||
" Output: " + std::to_string(this->mTensorOutput->size()));
|
||||
|
|
@ -1122,26 +1135,20 @@ OpMult<tX, tY, tZ>::init()
|
|||
this->mTensorOutputStaging->init(
|
||||
this->mPhysicalDevice, this->mDevice, this->mCommandBuffer);
|
||||
|
||||
#if RELEASE
|
||||
std::vector<char> shaderFileData(
|
||||
shader_data::shaders_glsl_opmult_comp_spv,
|
||||
shader_data::shaders_glsl_opmult_comp_spv +
|
||||
kp::shader_data::shaders_glsl_opmult_comp_spv_len);
|
||||
#else
|
||||
this->mOptSpirvBinPath = "shaders/glsl/opmult.comp.spv";
|
||||
std::vector<char>& shaderFileData = this->fetchSpirvBinaryData();
|
||||
#endif
|
||||
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut fetching spirv data");
|
||||
|
||||
SPDLOG_DEBUG("Kompute OpMult Initialising algorithm component");
|
||||
std::vector<char>& shaderFileData = this->fetchSpirvBinaryData();
|
||||
|
||||
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut Initialising algorithm component");
|
||||
|
||||
this->mAlgorithm->init(shaderFileData, this->mTensors);
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
void
|
||||
OpMult<tX, tY, tZ>::record()
|
||||
OpAlgoLhsRhsOut<tX, tY, tZ>::record()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpMult record called");
|
||||
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut record called");
|
||||
|
||||
// Barrier to ensure the data is finished writing to buffer memory
|
||||
this->mTensorLHS->recordBufferMemoryBarrier(
|
||||
|
|
@ -1169,9 +1176,9 @@ OpMult<tX, tY, tZ>::record()
|
|||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
void
|
||||
OpMult<tX, tY, tZ>::postSubmit()
|
||||
OpAlgoLhsRhsOut<tX, tY, tZ>::postSubmit()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpMult postSubmit called");
|
||||
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut postSubmit called");
|
||||
|
||||
this->mTensorOutputStaging->mapDataFromHostMemory();
|
||||
|
||||
|
|
@ -1180,7 +1187,86 @@ OpMult<tX, tY, tZ>::postSubmit()
|
|||
|
||||
}
|
||||
|
||||
#endif // #ifndef OPMULT_CPP
|
||||
#endif // #ifndef OPALGOLHSRHSOUT_CPP
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
* Operation that performs multiplication on two tensors and outpus on third
|
||||
* tensor. The template parameters specify the processing GPU layout number of
|
||||
* iterations for each x, y, z parameter. More specifically, this will be the
|
||||
* input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)"
|
||||
*/
|
||||
template<uint32_t tX = 0, uint32_t tY = 0, uint32_t tZ = 0>
|
||||
class OpMult : public OpAlgoLhsRhsOut<tX, tY, tZ>
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Base constructor, should not be used unless explicitly intended.
|
||||
*/
|
||||
OpMult() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
* requirements for the operations to be able to create and manage their
|
||||
* sub-components.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device used to find device queues
|
||||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
* @param freeTensors Whether operation manages the memory of the Tensors
|
||||
*/
|
||||
OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors)
|
||||
: OpAlgoLhsRhsOut<tX, tY, tZ>(physicalDevice, device, commandBuffer, tensors)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpMult constructor with params");
|
||||
|
||||
#ifndef RELEASE
|
||||
this->mOptSpirvBinPath = "shaders/glsl/opmult.comp.spv";
|
||||
#endif
|
||||
}
|
||||
|
||||
#if RELEASE
|
||||
/**
|
||||
* If release it will be using the static version of the shader which is
|
||||
* loaded using this file directly.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device used to find device queues
|
||||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
* @param freeTensors Whether operation manages the memory of the Tensors
|
||||
*/
|
||||
std::vector<char> fetchSpirvBinaryData() override
|
||||
{
|
||||
SPDLOG_WARN(
|
||||
"Kompute OpMult Running shaders directly from header");
|
||||
|
||||
return std::vector<char>(
|
||||
shader_data::shaders_glsl_opmult_comp_spv,
|
||||
shader_data::shaders_glsl_opmult_comp_spv +
|
||||
kp::shader_data::shaders_glsl_opmult_comp_spv_len);
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Default destructor, which is in charge of destroying the algorithm
|
||||
* components but does not destroy the underlying tensors
|
||||
*/
|
||||
~OpMult() {
|
||||
SPDLOG_DEBUG("Kompute OpMult destructor started");
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
||||
namespace kp {
|
||||
|
||||
|
|
|
|||
214
src/include/kompute/operations/OpAlgoLhsRhsOut.hpp
Normal file
214
src/include/kompute/operations/OpAlgoLhsRhsOut.hpp
Normal file
|
|
@ -0,0 +1,214 @@
|
|||
#pragma once
|
||||
|
||||
#include <fstream>
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
#include "kompute/Algorithm.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
||||
#include "kompute/operations/OpAlgoBase.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
* Operation that performs multiplication on two tensors and outpus on third
|
||||
* tensor. The template parameters specify the processing GPU layout number of
|
||||
* iterations for each x, y, z parameter. More specifically, this will be the
|
||||
* input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)"
|
||||
*/
|
||||
template<uint32_t tX = 0, uint32_t tY = 0, uint32_t tZ = 0>
|
||||
class OpAlgoLhsRhsOut : public OpAlgoBase<tX, tY, tZ>
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Base constructor, should not be used unless explicitly intended.
|
||||
*/
|
||||
OpAlgoLhsRhsOut();
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
* requirements for the operations to be able to create and manage their
|
||||
* sub-components.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device used to find device queues
|
||||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
* @param freeTensors Whether operation manages the memory of the Tensors
|
||||
*/
|
||||
OpAlgoLhsRhsOut(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors);
|
||||
|
||||
/**
|
||||
* Default destructor, which is in charge of destroying the algorithm
|
||||
* components but does not destroy the underlying tensors
|
||||
*/
|
||||
~OpAlgoLhsRhsOut();
|
||||
|
||||
/**
|
||||
* The init function is responsible for ensuring that all of the tensors
|
||||
* provided are aligned with requirements such as LHS, RHS and Output
|
||||
* tensors, and creates the algorithm component which processes the
|
||||
* computation.
|
||||
*/
|
||||
void init() override;
|
||||
|
||||
/**
|
||||
* This records the commands that are to be sent to the GPU. This includes
|
||||
* the barriers that ensure the memory has been copied before going in and
|
||||
* out of the shader, as well as the dispatch operation that sends the
|
||||
* shader processing to the gpu. This function also records the GPU memory
|
||||
* copy of the output data for the staging bufffer so it can be read by the
|
||||
* host.
|
||||
*/
|
||||
void record() override;
|
||||
|
||||
/**
|
||||
* Executes after the recorded commands are submitted, and performs a copy
|
||||
* of the GPU Device memory into the staging buffer so the output data can
|
||||
* be retrieved.
|
||||
*/
|
||||
void postSubmit() override;
|
||||
|
||||
protected:
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
std::shared_ptr<Tensor> mTensorLHS; ///< Reference to the parameter used in the left hand side equation of the shader
|
||||
std::shared_ptr<Tensor> mTensorRHS; ///< Reference to the parameter used in the right hand side equation of the shader
|
||||
std::shared_ptr<Tensor> mTensorOutput; ///< Reference to the parameter used in the output of the shader and will be copied with a staging vector
|
||||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::shared_ptr<Tensor> mTensorOutputStaging; ///< Staging temporary tensor user do to copy the output of the tensor
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
||||
// Including implemenation for template class
|
||||
#ifndef OPALGOLHSRHSOUT_CPP
|
||||
#define OPALGOLHSRHSOUT_CPP
|
||||
|
||||
namespace kp {
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
OpAlgoLhsRhsOut<tX, tY, tZ>::OpAlgoLhsRhsOut()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor base");
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
OpAlgoLhsRhsOut<tX, tY, tZ>::OpAlgoLhsRhsOut(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors)
|
||||
: OpAlgoBase<tX, tY, tZ>(physicalDevice, device, commandBuffer, tensors)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor with params");
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
OpAlgoLhsRhsOut<tX, tY, tZ>::~OpAlgoLhsRhsOut()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut destructor started");
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
void
|
||||
OpAlgoLhsRhsOut<tX, tY, tZ>::init()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut init called");
|
||||
|
||||
if (this->mTensors.size() < 3) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpAlgoLhsRhsOut called with less than 1 tensor");
|
||||
} else if (this->mTensors.size() > 3) {
|
||||
spdlog::warn("Kompute OpAlgoLhsRhsOut called with more than 3 this->mTensors");
|
||||
}
|
||||
|
||||
this->mTensorLHS = this->mTensors[0];
|
||||
this->mTensorRHS = this->mTensors[1];
|
||||
this->mTensorOutput = this->mTensors[2];
|
||||
|
||||
|
||||
// TODO: Explore adding a validate function
|
||||
if (!(this->mTensorLHS->isInit() && this->mTensorRHS->isInit() &&
|
||||
this->mTensorOutput->isInit())) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpAlgoLhsRhsOut all tensor parameters must be initialised. LHS: " +
|
||||
std::to_string(this->mTensorLHS->isInit()) +
|
||||
" RHS: " + std::to_string(this->mTensorRHS->isInit()) +
|
||||
" Output: " + std::to_string(this->mTensorOutput->isInit()));
|
||||
}
|
||||
|
||||
// TODO: Explore use-cases where tensors shouldn't be the same size, and how
|
||||
// to deal with those situations
|
||||
if (!(this->mTensorLHS->size() == this->mTensorRHS->size() &&
|
||||
this->mTensorRHS->size() == this->mTensorOutput->size())) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpAlgoLhsRhsOut all tensor parameters must be the same size LHS: " +
|
||||
std::to_string(this->mTensorLHS->size()) +
|
||||
" RHS: " + std::to_string(this->mTensorRHS->size()) +
|
||||
" Output: " + std::to_string(this->mTensorOutput->size()));
|
||||
}
|
||||
|
||||
this->mTensorOutputStaging = std::make_shared<Tensor>(
|
||||
this->mTensorOutput->data(), Tensor::TensorTypes::eStaging);
|
||||
|
||||
this->mTensorOutputStaging->init(
|
||||
this->mPhysicalDevice, this->mDevice, this->mCommandBuffer);
|
||||
|
||||
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut fetching spirv data");
|
||||
|
||||
std::vector<char>& shaderFileData = this->fetchSpirvBinaryData();
|
||||
|
||||
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut Initialising algorithm component");
|
||||
|
||||
this->mAlgorithm->init(shaderFileData, this->mTensors);
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
void
|
||||
OpAlgoLhsRhsOut<tX, tY, tZ>::record()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut record called");
|
||||
|
||||
// Barrier to ensure the data is finished writing to buffer memory
|
||||
this->mTensorLHS->recordBufferMemoryBarrier(
|
||||
vk::AccessFlagBits::eHostWrite,
|
||||
vk::AccessFlagBits::eShaderRead,
|
||||
vk::PipelineStageFlagBits::eHost,
|
||||
vk::PipelineStageFlagBits::eComputeShader);
|
||||
this->mTensorRHS->recordBufferMemoryBarrier(
|
||||
vk::AccessFlagBits::eHostWrite,
|
||||
vk::AccessFlagBits::eShaderRead,
|
||||
vk::PipelineStageFlagBits::eHost,
|
||||
vk::PipelineStageFlagBits::eComputeShader);
|
||||
|
||||
this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ);
|
||||
|
||||
// Barrier to ensure the shader code is executed before buffer read
|
||||
this->mTensorOutput->recordBufferMemoryBarrier(
|
||||
vk::AccessFlagBits::eShaderWrite,
|
||||
vk::AccessFlagBits::eTransferRead,
|
||||
vk::PipelineStageFlagBits::eComputeShader,
|
||||
vk::PipelineStageFlagBits::eTransfer);
|
||||
|
||||
this->mTensorOutputStaging->recordCopyFrom(this->mTensorOutput);
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
void
|
||||
OpAlgoLhsRhsOut<tX, tY, tZ>::postSubmit()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut postSubmit called");
|
||||
|
||||
this->mTensorOutputStaging->mapDataFromHostMemory();
|
||||
|
||||
this->mTensorOutput->setData(this->mTensorOutputStaging->data());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif // #ifndef OPALGOLHSRHSOUT_CPP
|
||||
|
||||
|
|
@ -11,7 +11,7 @@
|
|||
#include "kompute/Algorithm.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
||||
#include "kompute/operations/OpAlgoBase.hpp"
|
||||
#include "kompute/operations/OpAlgoLhsRhsOut.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
|
|
@ -22,13 +22,15 @@ namespace kp {
|
|||
* input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)"
|
||||
*/
|
||||
template<uint32_t tX = 0, uint32_t tY = 0, uint32_t tZ = 0>
|
||||
class OpMult : public OpAlgoBase<tX, tY, tZ>
|
||||
class OpMult : public OpAlgoLhsRhsOut<tX, tY, tZ>
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Base constructor, should not be used unless explicitly intended.
|
||||
*/
|
||||
OpMult();
|
||||
OpMult() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
|
|
@ -44,180 +46,48 @@ class OpMult : public OpAlgoBase<tX, tY, tZ>
|
|||
OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors);
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors)
|
||||
: OpAlgoLhsRhsOut<tX, tY, tZ>(physicalDevice, device, commandBuffer, tensors)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpMult constructor with params");
|
||||
|
||||
#ifndef RELEASE
|
||||
this->mOptSpirvBinPath = "shaders/glsl/opmult.comp.spv";
|
||||
#endif
|
||||
}
|
||||
|
||||
#if RELEASE
|
||||
/**
|
||||
* If release it will be using the static version of the shader which is
|
||||
* loaded using this file directly.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device used to find device queues
|
||||
* @param device Vulkan logical device for passing to Algorithm
|
||||
* @param commandBuffer Vulkan Command Buffer to record commands into
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
* @param freeTensors Whether operation manages the memory of the Tensors
|
||||
*/
|
||||
std::vector<char> fetchSpirvBinaryData() override
|
||||
{
|
||||
SPDLOG_WARN(
|
||||
"Kompute OpMult Running shaders directly from header");
|
||||
|
||||
return std::vector<char>(
|
||||
shader_data::shaders_glsl_opmult_comp_spv,
|
||||
shader_data::shaders_glsl_opmult_comp_spv +
|
||||
kp::shader_data::shaders_glsl_opmult_comp_spv_len);
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Default destructor, which is in charge of destroying the algorithm
|
||||
* components but does not destroy the underlying tensors
|
||||
*/
|
||||
~OpMult();
|
||||
~OpMult() {
|
||||
SPDLOG_DEBUG("Kompute OpMult destructor started");
|
||||
}
|
||||
|
||||
/**
|
||||
* The init function is responsible for ensuring that all of the tensors
|
||||
* provided are aligned with requirements such as LHS, RHS and Output
|
||||
* tensors, and creates the algorithm component which processes the
|
||||
* computation.
|
||||
*/
|
||||
void init() override;
|
||||
|
||||
/**
|
||||
* This records the commands that are to be sent to the GPU. This includes
|
||||
* the barriers that ensure the memory has been copied before going in and
|
||||
* out of the shader, as well as the dispatch operation that sends the
|
||||
* shader processing to the gpu. This function also records the GPU memory
|
||||
* copy of the output data for the staging bufffer so it can be read by the
|
||||
* host.
|
||||
*/
|
||||
void record() override;
|
||||
|
||||
/**
|
||||
* Executes after the recorded commands are submitted, and performs a copy
|
||||
* of the GPU Device memory into the staging buffer so the output data can
|
||||
* be retrieved.
|
||||
*/
|
||||
void postSubmit() override;
|
||||
|
||||
private:
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
std::shared_ptr<Tensor> mTensorLHS;
|
||||
std::shared_ptr<Tensor> mTensorRHS;
|
||||
std::shared_ptr<Tensor> mTensorOutput;
|
||||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::shared_ptr<Tensor> mTensorOutputStaging;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
||||
// Including implemenation for template class
|
||||
#ifndef OPMULT_CPP
|
||||
#define OPMULT_CPP
|
||||
|
||||
namespace kp {
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
OpMult<tX, tY, tZ>::OpMult()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpMult constructor base");
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
OpMult<tX, tY, tZ>::OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::CommandBuffer> commandBuffer,
|
||||
std::vector<std::shared_ptr<Tensor>>& tensors)
|
||||
: OpAlgoBase<tX, tY, tZ>(physicalDevice, device, commandBuffer, tensors)
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpMult constructor with params");
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
OpMult<tX, tY, tZ>::~OpMult()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpMult destructor started");
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
void
|
||||
OpMult<tX, tY, tZ>::init()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpMult init called");
|
||||
|
||||
if (this->mTensors.size() < 3) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpMult called with less than 1 tensor");
|
||||
} else if (this->mTensors.size() > 3) {
|
||||
spdlog::warn("Kompute OpMult called with more than 3 this->mTensors");
|
||||
}
|
||||
|
||||
this->mTensorLHS = this->mTensors[0];
|
||||
this->mTensorRHS = this->mTensors[1];
|
||||
this->mTensorOutput = this->mTensors[2];
|
||||
|
||||
|
||||
// TODO: Explore adding a validate function
|
||||
if (!(this->mTensorLHS->isInit() && this->mTensorRHS->isInit() &&
|
||||
this->mTensorOutput->isInit())) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpMult all tensor parameters must be initialised. LHS: " +
|
||||
std::to_string(this->mTensorLHS->isInit()) +
|
||||
" RHS: " + std::to_string(this->mTensorRHS->isInit()) +
|
||||
" Output: " + std::to_string(this->mTensorOutput->isInit()));
|
||||
}
|
||||
|
||||
// TODO: Explore use-cases where tensors shouldn't be the same size, and how
|
||||
// to deal with those situations
|
||||
if (!(this->mTensorLHS->size() == this->mTensorRHS->size() &&
|
||||
this->mTensorRHS->size() == this->mTensorOutput->size())) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpMult all tensor parameters must be the same size LHS: " +
|
||||
std::to_string(this->mTensorLHS->size()) +
|
||||
" RHS: " + std::to_string(this->mTensorRHS->size()) +
|
||||
" Output: " + std::to_string(this->mTensorOutput->size()));
|
||||
}
|
||||
|
||||
this->mTensorOutputStaging = std::make_shared<Tensor>(
|
||||
this->mTensorOutput->data(), Tensor::TensorTypes::eStaging);
|
||||
|
||||
this->mTensorOutputStaging->init(
|
||||
this->mPhysicalDevice, this->mDevice, this->mCommandBuffer);
|
||||
|
||||
#if RELEASE
|
||||
std::vector<char> shaderFileData(
|
||||
shader_data::shaders_glsl_opmult_comp_spv,
|
||||
shader_data::shaders_glsl_opmult_comp_spv +
|
||||
kp::shader_data::shaders_glsl_opmult_comp_spv_len);
|
||||
#else
|
||||
this->mOptSpirvBinPath = "shaders/glsl/opmult.comp.spv";
|
||||
std::vector<char>& shaderFileData = this->fetchSpirvBinaryData();
|
||||
#endif
|
||||
|
||||
SPDLOG_DEBUG("Kompute OpMult Initialising algorithm component");
|
||||
|
||||
this->mAlgorithm->init(shaderFileData, this->mTensors);
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
void
|
||||
OpMult<tX, tY, tZ>::record()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpMult record called");
|
||||
|
||||
// Barrier to ensure the data is finished writing to buffer memory
|
||||
this->mTensorLHS->recordBufferMemoryBarrier(
|
||||
vk::AccessFlagBits::eHostWrite,
|
||||
vk::AccessFlagBits::eShaderRead,
|
||||
vk::PipelineStageFlagBits::eHost,
|
||||
vk::PipelineStageFlagBits::eComputeShader);
|
||||
this->mTensorRHS->recordBufferMemoryBarrier(
|
||||
vk::AccessFlagBits::eHostWrite,
|
||||
vk::AccessFlagBits::eShaderRead,
|
||||
vk::PipelineStageFlagBits::eHost,
|
||||
vk::PipelineStageFlagBits::eComputeShader);
|
||||
|
||||
this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ);
|
||||
|
||||
// Barrier to ensure the shader code is executed before buffer read
|
||||
this->mTensorOutput->recordBufferMemoryBarrier(
|
||||
vk::AccessFlagBits::eShaderWrite,
|
||||
vk::AccessFlagBits::eTransferRead,
|
||||
vk::PipelineStageFlagBits::eComputeShader,
|
||||
vk::PipelineStageFlagBits::eTransfer);
|
||||
|
||||
this->mTensorOutputStaging->recordCopyFrom(this->mTensorOutput);
|
||||
}
|
||||
|
||||
template<uint32_t tX, uint32_t tY, uint32_t tZ>
|
||||
void
|
||||
OpMult<tX, tY, tZ>::postSubmit()
|
||||
{
|
||||
SPDLOG_DEBUG("Kompute OpMult postSubmit called");
|
||||
|
||||
this->mTensorOutputStaging->mapDataFromHostMemory();
|
||||
|
||||
this->mTensorOutput->setData(this->mTensorOutputStaging->data());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif // #ifndef OPMULT_CPP
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ namespace kp {
|
|||
namespace shader_data {
|
||||
static unsigned const char shaders_glsl_opmult_comp_spv[] = {
|
||||
0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x08, 0x00, 0x08, 0x00,
|
||||
0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
|
||||
0x2d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
|
||||
0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
|
|
@ -45,19 +45,19 @@ static unsigned const char shaders_glsl_opmult_comp_spv[] = {
|
|||
0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0x61, 0x6c, 0x75,
|
||||
0x65, 0x73, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x00, 0x00, 0x00, 0x00,
|
||||
0x05, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x05, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x4c, 0x45, 0x4e, 0x5f,
|
||||
0x52, 0x48, 0x53, 0x00, 0x05, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00,
|
||||
0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x4c, 0x68, 0x73, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x06, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x4c, 0x68, 0x73, 0x00, 0x00, 0x00,
|
||||
0x05, 0x00, 0x03, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x05, 0x00, 0x05, 0x00, 0x1f, 0x00, 0x00, 0x00, 0x74, 0x65, 0x6e, 0x73,
|
||||
0x6f, 0x72, 0x52, 0x68, 0x73, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00,
|
||||
0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0x61, 0x6c, 0x75,
|
||||
0x65, 0x73, 0x52, 0x68, 0x73, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00,
|
||||
0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00,
|
||||
0x22, 0x00, 0x00, 0x00, 0x4c, 0x45, 0x4e, 0x5f, 0x4c, 0x48, 0x53, 0x00,
|
||||
0x05, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, 0x4c, 0x45, 0x4e, 0x5f,
|
||||
0x05, 0x00, 0x05, 0x00, 0x18, 0x00, 0x00, 0x00, 0x74, 0x65, 0x6e, 0x73,
|
||||
0x6f, 0x72, 0x4c, 0x68, 0x73, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00,
|
||||
0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0x61, 0x6c, 0x75,
|
||||
0x65, 0x73, 0x4c, 0x68, 0x73, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00,
|
||||
0x1a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x05, 0x00,
|
||||
0x20, 0x00, 0x00, 0x00, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x52, 0x68,
|
||||
0x73, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, 0x20, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x52, 0x68,
|
||||
0x73, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x28, 0x00, 0x00, 0x00,
|
||||
0x4c, 0x45, 0x4e, 0x5f, 0x4c, 0x48, 0x53, 0x00, 0x05, 0x00, 0x04, 0x00,
|
||||
0x29, 0x00, 0x00, 0x00, 0x4c, 0x45, 0x4e, 0x5f, 0x52, 0x48, 0x53, 0x00,
|
||||
0x05, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x4c, 0x45, 0x4e, 0x5f,
|
||||
0x4f, 0x55, 0x54, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
|
||||
0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00,
|
||||
0x10, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
|
|
@ -67,24 +67,24 @@ static unsigned const char shaders_glsl_opmult_comp_spv[] = {
|
|||
0x13, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00,
|
||||
0x1a, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x48, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00,
|
||||
0x1b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00,
|
||||
0x1d, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x47, 0x00, 0x04, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x1e, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00,
|
||||
0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x1f, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00,
|
||||
0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x1a, 0x00, 0x00, 0x00,
|
||||
0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00,
|
||||
0x21, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
|
||||
0x1a, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x47, 0x00, 0x04, 0x00, 0x1f, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x47, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
|
||||
0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00,
|
||||
0x28, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x47, 0x00, 0x04, 0x00, 0x29, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00,
|
||||
0x25, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
|
||||
0x2c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
|
||||
0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
|
|
@ -103,25 +103,25 @@ static unsigned const char shaders_glsl_opmult_comp_spv[] = {
|
|||
0x13, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
|
||||
0x14, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x2b, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x32, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
|
||||
0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x1d, 0x00, 0x03, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x1e, 0x00, 0x03, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x17, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00,
|
||||
0x17, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x19, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
|
||||
0x19, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x20, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x1b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00,
|
||||
0x1d, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
|
||||
0x1e, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00,
|
||||
0x1f, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
|
||||
0x20, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00,
|
||||
0x3b, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x32, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x32, 0x00, 0x04, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x1f, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00,
|
||||
0x1f, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
|
||||
0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x32, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x32, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x32, 0x00, 0x04, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00,
|
||||
0x25, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
|
||||
0x24, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x2c, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00,
|
||||
0x2b, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
|
||||
0x07, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
|
||||
|
|
@ -130,12 +130,23 @@ static unsigned const char shaders_glsl_opmult_comp_spv[] = {
|
|||
0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00,
|
||||
0x3e, 0x00, 0x03, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00,
|
||||
0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x18, 0x00, 0x00, 0x00,
|
||||
0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
|
||||
0x16, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x19, 0x00, 0x00, 0x00,
|
||||
0x17, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00
|
||||
0x08, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x1b, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
|
||||
0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00,
|
||||
0x15, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
|
||||
0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x1c, 0x00, 0x00, 0x00,
|
||||
0x24, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
|
||||
0x23, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x25, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
|
||||
0x25, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x1c, 0x00, 0x00, 0x00,
|
||||
0x27, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
|
||||
0x16, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x27, 0x00, 0x00, 0x00,
|
||||
0x26, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00
|
||||
};
|
||||
static unsigned const int shaders_glsl_opmult_comp_spv_len = 1320;
|
||||
static unsigned const int shaders_glsl_opmult_comp_spv_len = 1452;
|
||||
}
|
||||
}
|
||||
#endif // define SHADEROP_SHADEROPMULT_HPP
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue