diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index a4a4cbb35..67ab95763 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -36,7 +36,7 @@ namespace kp { namespace shader_data { static unsigned const char shaders_glsl_opmult_comp_spv[] = { 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x08, 0x00, 0x08, 0x00, - 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, + 0x2d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -56,19 +56,19 @@ static unsigned const char shaders_glsl_opmult_comp_spv[] = { 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x4c, 0x45, 0x4e, 0x5f, - 0x52, 0x48, 0x53, 0x00, 0x05, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, - 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x4c, 0x68, 0x73, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x06, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x4c, 0x68, 0x73, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x03, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x05, 0x00, 0x1f, 0x00, 0x00, 0x00, 0x74, 0x65, 0x6e, 0x73, - 0x6f, 0x72, 0x52, 0x68, 0x73, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, - 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0x61, 0x6c, 0x75, - 0x65, 0x73, 0x52, 0x68, 0x73, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, - 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x4c, 0x45, 0x4e, 0x5f, 0x4c, 0x48, 0x53, 0x00, - 0x05, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, 0x4c, 0x45, 0x4e, 0x5f, + 0x05, 0x00, 0x05, 0x00, 0x18, 0x00, 0x00, 0x00, 0x74, 0x65, 0x6e, 0x73, + 0x6f, 0x72, 0x4c, 0x68, 0x73, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x73, 0x4c, 0x68, 0x73, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, + 0x1a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x05, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x52, 0x68, + 0x73, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x52, 0x68, + 0x73, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x4c, 0x45, 0x4e, 0x5f, 0x4c, 0x48, 0x53, 0x00, 0x05, 0x00, 0x04, 0x00, + 0x29, 0x00, 0x00, 0x00, 0x4c, 0x45, 0x4e, 0x5f, 0x52, 0x48, 0x53, 0x00, + 0x05, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x4c, 0x45, 0x4e, 0x5f, 0x4f, 0x55, 0x54, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, @@ -78,24 +78,24 @@ static unsigned const char shaders_glsl_opmult_comp_spv[] = { 0x13, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, - 0x1a, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, - 0x1b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, - 0x1d, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, - 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x1f, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, - 0x21, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x1a, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x1f, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x29, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, - 0x25, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x2c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -114,25 +114,25 @@ static unsigned const char shaders_glsl_opmult_comp_spv[] = { 0x13, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x32, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x1d, 0x00, 0x03, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x1e, 0x00, 0x03, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x17, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x1b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x1d, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, - 0x1e, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, - 0x1f, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00, - 0x3b, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x32, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x32, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x1f, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x1f, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, + 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x32, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x32, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x32, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x25, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x2c, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, + 0x2b, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, @@ -141,12 +141,23 @@ static unsigned const char shaders_glsl_opmult_comp_spv[] = { 0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, - 0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x16, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x19, 0x00, 0x00, 0x00, - 0x17, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00 + 0x08, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x1b, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, + 0x15, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, + 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x1c, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x25, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, + 0x25, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x1c, 0x00, 0x00, 0x00, + 0x27, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x27, 0x00, 0x00, 0x00, + 0x26, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00 }; -static unsigned const int shaders_glsl_opmult_comp_spv_len = 1320; +static unsigned const int shaders_glsl_opmult_comp_spv_len = 1452; } } #endif // define SHADEROP_SHADEROPMULT_HPP @@ -974,6 +985,8 @@ OpAlgoBase::postSubmit() #endif +#include + namespace kp { /** @@ -983,13 +996,13 @@ namespace kp { * input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)" */ template -class OpMult : public OpAlgoBase +class OpAlgoLhsRhsOut : public OpAlgoBase { public: /** * Base constructor, should not be used unless explicitly intended. */ - OpMult(); + OpAlgoLhsRhsOut(); /** * Default constructor with parameters that provides the bare minimum @@ -1002,7 +1015,7 @@ class OpMult : public OpAlgoBase * @param tensors Tensors that are to be used in this operation * @param freeTensors Whether operation manages the memory of the Tensors */ - OpMult(std::shared_ptr physicalDevice, + OpAlgoLhsRhsOut(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, std::vector>& tensors); @@ -1011,7 +1024,7 @@ class OpMult : public OpAlgoBase * Default destructor, which is in charge of destroying the algorithm * components but does not destroy the underlying tensors */ - ~OpMult(); + ~OpAlgoLhsRhsOut(); /** * The init function is responsible for ensuring that all of the tensors @@ -1038,57 +1051,57 @@ class OpMult : public OpAlgoBase */ void postSubmit() override; - private: + protected: // -------------- NEVER OWNED RESOURCES - std::shared_ptr mTensorLHS; - std::shared_ptr mTensorRHS; - std::shared_ptr mTensorOutput; + std::shared_ptr mTensorLHS; ///< Reference to the parameter used in the left hand side equation of the shader + std::shared_ptr mTensorRHS; ///< Reference to the parameter used in the right hand side equation of the shader + std::shared_ptr mTensorOutput; ///< Reference to the parameter used in the output of the shader and will be copied with a staging vector // -------------- ALWAYS OWNED RESOURCES - std::shared_ptr mTensorOutputStaging; + std::shared_ptr mTensorOutputStaging; ///< Staging temporary tensor user do to copy the output of the tensor }; } // End namespace kp // Including implemenation for template class -#ifndef OPMULT_CPP -#define OPMULT_CPP +#ifndef OPALGOLHSRHSOUT_CPP +#define OPALGOLHSRHSOUT_CPP namespace kp { template -OpMult::OpMult() +OpAlgoLhsRhsOut::OpAlgoLhsRhsOut() { - SPDLOG_DEBUG("Kompute OpMult constructor base"); + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor base"); } template -OpMult::OpMult(std::shared_ptr physicalDevice, +OpAlgoLhsRhsOut::OpAlgoLhsRhsOut(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, std::vector>& tensors) : OpAlgoBase(physicalDevice, device, commandBuffer, tensors) { - SPDLOG_DEBUG("Kompute OpMult constructor with params"); + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor with params"); } template -OpMult::~OpMult() +OpAlgoLhsRhsOut::~OpAlgoLhsRhsOut() { - SPDLOG_DEBUG("Kompute OpMult destructor started"); + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut destructor started"); } template void -OpMult::init() +OpAlgoLhsRhsOut::init() { - SPDLOG_DEBUG("Kompute OpMult init called"); + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut init called"); if (this->mTensors.size() < 3) { throw std::runtime_error( - "Kompute OpMult called with less than 1 tensor"); + "Kompute OpAlgoLhsRhsOut called with less than 1 tensor"); } else if (this->mTensors.size() > 3) { - spdlog::warn("Kompute OpMult called with more than 3 this->mTensors"); + spdlog::warn("Kompute OpAlgoLhsRhsOut called with more than 3 this->mTensors"); } this->mTensorLHS = this->mTensors[0]; @@ -1099,7 +1112,7 @@ OpMult::init() if (!(this->mTensorLHS->isInit() && this->mTensorRHS->isInit() && this->mTensorOutput->isInit())) { throw std::runtime_error( - "Kompute OpMult all tensor parameters must be initialised. LHS: " + + "Kompute OpAlgoLhsRhsOut all tensor parameters must be initialised. LHS: " + std::to_string(this->mTensorLHS->isInit()) + " RHS: " + std::to_string(this->mTensorRHS->isInit()) + " Output: " + std::to_string(this->mTensorOutput->isInit())); @@ -1110,7 +1123,7 @@ OpMult::init() if (!(this->mTensorLHS->size() == this->mTensorRHS->size() && this->mTensorRHS->size() == this->mTensorOutput->size())) { throw std::runtime_error( - "Kompute OpMult all tensor parameters must be the same size LHS: " + + "Kompute OpAlgoLhsRhsOut all tensor parameters must be the same size LHS: " + std::to_string(this->mTensorLHS->size()) + " RHS: " + std::to_string(this->mTensorRHS->size()) + " Output: " + std::to_string(this->mTensorOutput->size())); @@ -1122,26 +1135,20 @@ OpMult::init() this->mTensorOutputStaging->init( this->mPhysicalDevice, this->mDevice, this->mCommandBuffer); -#if RELEASE - std::vector shaderFileData( - shader_data::shaders_glsl_opmult_comp_spv, - shader_data::shaders_glsl_opmult_comp_spv + - kp::shader_data::shaders_glsl_opmult_comp_spv_len); -#else - this->mOptSpirvBinPath = "shaders/glsl/opmult.comp.spv"; - std::vector& shaderFileData = this->fetchSpirvBinaryData(); -#endif + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut fetching spirv data"); - SPDLOG_DEBUG("Kompute OpMult Initialising algorithm component"); + std::vector& shaderFileData = this->fetchSpirvBinaryData(); + + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut Initialising algorithm component"); this->mAlgorithm->init(shaderFileData, this->mTensors); } template void -OpMult::record() +OpAlgoLhsRhsOut::record() { - SPDLOG_DEBUG("Kompute OpMult record called"); + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut record called"); // Barrier to ensure the data is finished writing to buffer memory this->mTensorLHS->recordBufferMemoryBarrier( @@ -1169,9 +1176,9 @@ OpMult::record() template void -OpMult::postSubmit() +OpAlgoLhsRhsOut::postSubmit() { - SPDLOG_DEBUG("Kompute OpMult postSubmit called"); + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut postSubmit called"); this->mTensorOutputStaging->mapDataFromHostMemory(); @@ -1180,7 +1187,86 @@ OpMult::postSubmit() } -#endif // #ifndef OPMULT_CPP +#endif // #ifndef OPALGOLHSRHSOUT_CPP + +namespace kp { + +/** + * Operation that performs multiplication on two tensors and outpus on third + * tensor. The template parameters specify the processing GPU layout number of + * iterations for each x, y, z parameter. More specifically, this will be the + * input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)" + */ +template +class OpMult : public OpAlgoLhsRhsOut +{ + public: + /** + * Base constructor, should not be used unless explicitly intended. + */ + OpMult() { + + } + + /** + * Default constructor with parameters that provides the bare minimum + * requirements for the operations to be able to create and manage their + * sub-components. + * + * @param physicalDevice Vulkan physical device used to find device queues + * @param device Vulkan logical device for passing to Algorithm + * @param commandBuffer Vulkan Command Buffer to record commands into + * @param tensors Tensors that are to be used in this operation + * @param freeTensors Whether operation manages the memory of the Tensors + */ + OpMult(std::shared_ptr physicalDevice, + std::shared_ptr device, + std::shared_ptr commandBuffer, + std::vector>& tensors) + : OpAlgoLhsRhsOut(physicalDevice, device, commandBuffer, tensors) + { + SPDLOG_DEBUG("Kompute OpMult constructor with params"); + +#ifndef RELEASE + this->mOptSpirvBinPath = "shaders/glsl/opmult.comp.spv"; +#endif + } + +#if RELEASE + /** + * If release it will be using the static version of the shader which is + * loaded using this file directly. + * + * @param physicalDevice Vulkan physical device used to find device queues + * @param device Vulkan logical device for passing to Algorithm + * @param commandBuffer Vulkan Command Buffer to record commands into + * @param tensors Tensors that are to be used in this operation + * @param freeTensors Whether operation manages the memory of the Tensors + */ + std::vector fetchSpirvBinaryData() override + { + SPDLOG_WARN( + "Kompute OpMult Running shaders directly from header"); + + return std::vector( + shader_data::shaders_glsl_opmult_comp_spv, + shader_data::shaders_glsl_opmult_comp_spv + + kp::shader_data::shaders_glsl_opmult_comp_spv_len); + + } +#endif + + /** + * Default destructor, which is in charge of destroying the algorithm + * components but does not destroy the underlying tensors + */ + ~OpMult() { + SPDLOG_DEBUG("Kompute OpMult destructor started"); + } + +}; + +} // End namespace kp namespace kp { diff --git a/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp b/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp new file mode 100644 index 000000000..bd21aa351 --- /dev/null +++ b/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp @@ -0,0 +1,214 @@ +#pragma once + +#include + +#include "kompute/Core.hpp" + +#include "kompute/Algorithm.hpp" +#include "kompute/Tensor.hpp" + +#include "kompute/operations/OpAlgoBase.hpp" + +namespace kp { + +/** + * Operation that performs multiplication on two tensors and outpus on third + * tensor. The template parameters specify the processing GPU layout number of + * iterations for each x, y, z parameter. More specifically, this will be the + * input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)" + */ +template +class OpAlgoLhsRhsOut : public OpAlgoBase +{ + public: + /** + * Base constructor, should not be used unless explicitly intended. + */ + OpAlgoLhsRhsOut(); + + /** + * Default constructor with parameters that provides the bare minimum + * requirements for the operations to be able to create and manage their + * sub-components. + * + * @param physicalDevice Vulkan physical device used to find device queues + * @param device Vulkan logical device for passing to Algorithm + * @param commandBuffer Vulkan Command Buffer to record commands into + * @param tensors Tensors that are to be used in this operation + * @param freeTensors Whether operation manages the memory of the Tensors + */ + OpAlgoLhsRhsOut(std::shared_ptr physicalDevice, + std::shared_ptr device, + std::shared_ptr commandBuffer, + std::vector>& tensors); + + /** + * Default destructor, which is in charge of destroying the algorithm + * components but does not destroy the underlying tensors + */ + ~OpAlgoLhsRhsOut(); + + /** + * The init function is responsible for ensuring that all of the tensors + * provided are aligned with requirements such as LHS, RHS and Output + * tensors, and creates the algorithm component which processes the + * computation. + */ + void init() override; + + /** + * This records the commands that are to be sent to the GPU. This includes + * the barriers that ensure the memory has been copied before going in and + * out of the shader, as well as the dispatch operation that sends the + * shader processing to the gpu. This function also records the GPU memory + * copy of the output data for the staging bufffer so it can be read by the + * host. + */ + void record() override; + + /** + * Executes after the recorded commands are submitted, and performs a copy + * of the GPU Device memory into the staging buffer so the output data can + * be retrieved. + */ + void postSubmit() override; + + protected: + // -------------- NEVER OWNED RESOURCES + std::shared_ptr mTensorLHS; ///< Reference to the parameter used in the left hand side equation of the shader + std::shared_ptr mTensorRHS; ///< Reference to the parameter used in the right hand side equation of the shader + std::shared_ptr mTensorOutput; ///< Reference to the parameter used in the output of the shader and will be copied with a staging vector + + // -------------- ALWAYS OWNED RESOURCES + std::shared_ptr mTensorOutputStaging; ///< Staging temporary tensor user do to copy the output of the tensor +}; + +} // End namespace kp + +// Including implemenation for template class +#ifndef OPALGOLHSRHSOUT_CPP +#define OPALGOLHSRHSOUT_CPP + +namespace kp { + +template +OpAlgoLhsRhsOut::OpAlgoLhsRhsOut() +{ + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor base"); +} + +template +OpAlgoLhsRhsOut::OpAlgoLhsRhsOut(std::shared_ptr physicalDevice, + std::shared_ptr device, + std::shared_ptr commandBuffer, + std::vector>& tensors) + : OpAlgoBase(physicalDevice, device, commandBuffer, tensors) +{ + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut constructor with params"); +} + +template +OpAlgoLhsRhsOut::~OpAlgoLhsRhsOut() +{ + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut destructor started"); +} + +template +void +OpAlgoLhsRhsOut::init() +{ + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut init called"); + + if (this->mTensors.size() < 3) { + throw std::runtime_error( + "Kompute OpAlgoLhsRhsOut called with less than 1 tensor"); + } else if (this->mTensors.size() > 3) { + spdlog::warn("Kompute OpAlgoLhsRhsOut called with more than 3 this->mTensors"); + } + + this->mTensorLHS = this->mTensors[0]; + this->mTensorRHS = this->mTensors[1]; + this->mTensorOutput = this->mTensors[2]; + + + // TODO: Explore adding a validate function + if (!(this->mTensorLHS->isInit() && this->mTensorRHS->isInit() && + this->mTensorOutput->isInit())) { + throw std::runtime_error( + "Kompute OpAlgoLhsRhsOut all tensor parameters must be initialised. LHS: " + + std::to_string(this->mTensorLHS->isInit()) + + " RHS: " + std::to_string(this->mTensorRHS->isInit()) + + " Output: " + std::to_string(this->mTensorOutput->isInit())); + } + + // TODO: Explore use-cases where tensors shouldn't be the same size, and how + // to deal with those situations + if (!(this->mTensorLHS->size() == this->mTensorRHS->size() && + this->mTensorRHS->size() == this->mTensorOutput->size())) { + throw std::runtime_error( + "Kompute OpAlgoLhsRhsOut all tensor parameters must be the same size LHS: " + + std::to_string(this->mTensorLHS->size()) + + " RHS: " + std::to_string(this->mTensorRHS->size()) + + " Output: " + std::to_string(this->mTensorOutput->size())); + } + + this->mTensorOutputStaging = std::make_shared( + this->mTensorOutput->data(), Tensor::TensorTypes::eStaging); + + this->mTensorOutputStaging->init( + this->mPhysicalDevice, this->mDevice, this->mCommandBuffer); + + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut fetching spirv data"); + + std::vector& shaderFileData = this->fetchSpirvBinaryData(); + + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut Initialising algorithm component"); + + this->mAlgorithm->init(shaderFileData, this->mTensors); +} + +template +void +OpAlgoLhsRhsOut::record() +{ + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut record called"); + + // Barrier to ensure the data is finished writing to buffer memory + this->mTensorLHS->recordBufferMemoryBarrier( + vk::AccessFlagBits::eHostWrite, + vk::AccessFlagBits::eShaderRead, + vk::PipelineStageFlagBits::eHost, + vk::PipelineStageFlagBits::eComputeShader); + this->mTensorRHS->recordBufferMemoryBarrier( + vk::AccessFlagBits::eHostWrite, + vk::AccessFlagBits::eShaderRead, + vk::PipelineStageFlagBits::eHost, + vk::PipelineStageFlagBits::eComputeShader); + + this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ); + + // Barrier to ensure the shader code is executed before buffer read + this->mTensorOutput->recordBufferMemoryBarrier( + vk::AccessFlagBits::eShaderWrite, + vk::AccessFlagBits::eTransferRead, + vk::PipelineStageFlagBits::eComputeShader, + vk::PipelineStageFlagBits::eTransfer); + + this->mTensorOutputStaging->recordCopyFrom(this->mTensorOutput); +} + +template +void +OpAlgoLhsRhsOut::postSubmit() +{ + SPDLOG_DEBUG("Kompute OpAlgoLhsRhsOut postSubmit called"); + + this->mTensorOutputStaging->mapDataFromHostMemory(); + + this->mTensorOutput->setData(this->mTensorOutputStaging->data()); +} + +} + +#endif // #ifndef OPALGOLHSRHSOUT_CPP + diff --git a/src/include/kompute/operations/OpMult.hpp b/src/include/kompute/operations/OpMult.hpp index 32128643d..3c56d6143 100644 --- a/src/include/kompute/operations/OpMult.hpp +++ b/src/include/kompute/operations/OpMult.hpp @@ -11,7 +11,7 @@ #include "kompute/Algorithm.hpp" #include "kompute/Tensor.hpp" -#include "kompute/operations/OpAlgoBase.hpp" +#include "kompute/operations/OpAlgoLhsRhsOut.hpp" namespace kp { @@ -22,13 +22,15 @@ namespace kp { * input to ".dispatch(uint32_t tX, uint32_t tY, uint32_t, tZ)" */ template -class OpMult : public OpAlgoBase +class OpMult : public OpAlgoLhsRhsOut { public: /** * Base constructor, should not be used unless explicitly intended. */ - OpMult(); + OpMult() { + + } /** * Default constructor with parameters that provides the bare minimum @@ -44,180 +46,48 @@ class OpMult : public OpAlgoBase OpMult(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, - std::vector>& tensors); + std::vector>& tensors) + : OpAlgoLhsRhsOut(physicalDevice, device, commandBuffer, tensors) + { + SPDLOG_DEBUG("Kompute OpMult constructor with params"); + +#ifndef RELEASE + this->mOptSpirvBinPath = "shaders/glsl/opmult.comp.spv"; +#endif + } + +#if RELEASE + /** + * If release it will be using the static version of the shader which is + * loaded using this file directly. + * + * @param physicalDevice Vulkan physical device used to find device queues + * @param device Vulkan logical device for passing to Algorithm + * @param commandBuffer Vulkan Command Buffer to record commands into + * @param tensors Tensors that are to be used in this operation + * @param freeTensors Whether operation manages the memory of the Tensors + */ + std::vector fetchSpirvBinaryData() override + { + SPDLOG_WARN( + "Kompute OpMult Running shaders directly from header"); + + return std::vector( + shader_data::shaders_glsl_opmult_comp_spv, + shader_data::shaders_glsl_opmult_comp_spv + + kp::shader_data::shaders_glsl_opmult_comp_spv_len); + + } +#endif /** * Default destructor, which is in charge of destroying the algorithm * components but does not destroy the underlying tensors */ - ~OpMult(); + ~OpMult() { + SPDLOG_DEBUG("Kompute OpMult destructor started"); + } - /** - * The init function is responsible for ensuring that all of the tensors - * provided are aligned with requirements such as LHS, RHS and Output - * tensors, and creates the algorithm component which processes the - * computation. - */ - void init() override; - - /** - * This records the commands that are to be sent to the GPU. This includes - * the barriers that ensure the memory has been copied before going in and - * out of the shader, as well as the dispatch operation that sends the - * shader processing to the gpu. This function also records the GPU memory - * copy of the output data for the staging bufffer so it can be read by the - * host. - */ - void record() override; - - /** - * Executes after the recorded commands are submitted, and performs a copy - * of the GPU Device memory into the staging buffer so the output data can - * be retrieved. - */ - void postSubmit() override; - - private: - // -------------- NEVER OWNED RESOURCES - std::shared_ptr mTensorLHS; - std::shared_ptr mTensorRHS; - std::shared_ptr mTensorOutput; - - // -------------- ALWAYS OWNED RESOURCES - std::shared_ptr mTensorOutputStaging; }; } // End namespace kp - -// Including implemenation for template class -#ifndef OPMULT_CPP -#define OPMULT_CPP - -namespace kp { - -template -OpMult::OpMult() -{ - SPDLOG_DEBUG("Kompute OpMult constructor base"); -} - -template -OpMult::OpMult(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector>& tensors) - : OpAlgoBase(physicalDevice, device, commandBuffer, tensors) -{ - SPDLOG_DEBUG("Kompute OpMult constructor with params"); -} - -template -OpMult::~OpMult() -{ - SPDLOG_DEBUG("Kompute OpMult destructor started"); -} - -template -void -OpMult::init() -{ - SPDLOG_DEBUG("Kompute OpMult init called"); - - if (this->mTensors.size() < 3) { - throw std::runtime_error( - "Kompute OpMult called with less than 1 tensor"); - } else if (this->mTensors.size() > 3) { - spdlog::warn("Kompute OpMult called with more than 3 this->mTensors"); - } - - this->mTensorLHS = this->mTensors[0]; - this->mTensorRHS = this->mTensors[1]; - this->mTensorOutput = this->mTensors[2]; - - - // TODO: Explore adding a validate function - if (!(this->mTensorLHS->isInit() && this->mTensorRHS->isInit() && - this->mTensorOutput->isInit())) { - throw std::runtime_error( - "Kompute OpMult all tensor parameters must be initialised. LHS: " + - std::to_string(this->mTensorLHS->isInit()) + - " RHS: " + std::to_string(this->mTensorRHS->isInit()) + - " Output: " + std::to_string(this->mTensorOutput->isInit())); - } - - // TODO: Explore use-cases where tensors shouldn't be the same size, and how - // to deal with those situations - if (!(this->mTensorLHS->size() == this->mTensorRHS->size() && - this->mTensorRHS->size() == this->mTensorOutput->size())) { - throw std::runtime_error( - "Kompute OpMult all tensor parameters must be the same size LHS: " + - std::to_string(this->mTensorLHS->size()) + - " RHS: " + std::to_string(this->mTensorRHS->size()) + - " Output: " + std::to_string(this->mTensorOutput->size())); - } - - this->mTensorOutputStaging = std::make_shared( - this->mTensorOutput->data(), Tensor::TensorTypes::eStaging); - - this->mTensorOutputStaging->init( - this->mPhysicalDevice, this->mDevice, this->mCommandBuffer); - -#if RELEASE - std::vector shaderFileData( - shader_data::shaders_glsl_opmult_comp_spv, - shader_data::shaders_glsl_opmult_comp_spv + - kp::shader_data::shaders_glsl_opmult_comp_spv_len); -#else - this->mOptSpirvBinPath = "shaders/glsl/opmult.comp.spv"; - std::vector& shaderFileData = this->fetchSpirvBinaryData(); -#endif - - SPDLOG_DEBUG("Kompute OpMult Initialising algorithm component"); - - this->mAlgorithm->init(shaderFileData, this->mTensors); -} - -template -void -OpMult::record() -{ - SPDLOG_DEBUG("Kompute OpMult record called"); - - // Barrier to ensure the data is finished writing to buffer memory - this->mTensorLHS->recordBufferMemoryBarrier( - vk::AccessFlagBits::eHostWrite, - vk::AccessFlagBits::eShaderRead, - vk::PipelineStageFlagBits::eHost, - vk::PipelineStageFlagBits::eComputeShader); - this->mTensorRHS->recordBufferMemoryBarrier( - vk::AccessFlagBits::eHostWrite, - vk::AccessFlagBits::eShaderRead, - vk::PipelineStageFlagBits::eHost, - vk::PipelineStageFlagBits::eComputeShader); - - this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ); - - // Barrier to ensure the shader code is executed before buffer read - this->mTensorOutput->recordBufferMemoryBarrier( - vk::AccessFlagBits::eShaderWrite, - vk::AccessFlagBits::eTransferRead, - vk::PipelineStageFlagBits::eComputeShader, - vk::PipelineStageFlagBits::eTransfer); - - this->mTensorOutputStaging->recordCopyFrom(this->mTensorOutput); -} - -template -void -OpMult::postSubmit() -{ - SPDLOG_DEBUG("Kompute OpMult postSubmit called"); - - this->mTensorOutputStaging->mapDataFromHostMemory(); - - this->mTensorOutput->setData(this->mTensorOutputStaging->data()); -} - -} - -#endif // #ifndef OPMULT_CPP diff --git a/src/include/kompute/shaders/shaderopmult.hpp b/src/include/kompute/shaders/shaderopmult.hpp index cea23cc46..5f970a317 100755 --- a/src/include/kompute/shaders/shaderopmult.hpp +++ b/src/include/kompute/shaders/shaderopmult.hpp @@ -25,7 +25,7 @@ namespace kp { namespace shader_data { static unsigned const char shaders_glsl_opmult_comp_spv[] = { 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x08, 0x00, 0x08, 0x00, - 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, + 0x2d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -45,19 +45,19 @@ static unsigned const char shaders_glsl_opmult_comp_spv[] = { 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x4c, 0x45, 0x4e, 0x5f, - 0x52, 0x48, 0x53, 0x00, 0x05, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, - 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x4c, 0x68, 0x73, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x06, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x4c, 0x68, 0x73, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x03, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x05, 0x00, 0x1f, 0x00, 0x00, 0x00, 0x74, 0x65, 0x6e, 0x73, - 0x6f, 0x72, 0x52, 0x68, 0x73, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, - 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0x61, 0x6c, 0x75, - 0x65, 0x73, 0x52, 0x68, 0x73, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, - 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x4c, 0x45, 0x4e, 0x5f, 0x4c, 0x48, 0x53, 0x00, - 0x05, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, 0x4c, 0x45, 0x4e, 0x5f, + 0x05, 0x00, 0x05, 0x00, 0x18, 0x00, 0x00, 0x00, 0x74, 0x65, 0x6e, 0x73, + 0x6f, 0x72, 0x4c, 0x68, 0x73, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x73, 0x4c, 0x68, 0x73, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, + 0x1a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x05, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x52, 0x68, + 0x73, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x52, 0x68, + 0x73, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x4c, 0x45, 0x4e, 0x5f, 0x4c, 0x48, 0x53, 0x00, 0x05, 0x00, 0x04, 0x00, + 0x29, 0x00, 0x00, 0x00, 0x4c, 0x45, 0x4e, 0x5f, 0x52, 0x48, 0x53, 0x00, + 0x05, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x4c, 0x45, 0x4e, 0x5f, 0x4f, 0x55, 0x54, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, @@ -67,24 +67,24 @@ static unsigned const char shaders_glsl_opmult_comp_spv[] = { 0x13, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, - 0x1a, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, - 0x1b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, - 0x1d, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, - 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x1f, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, - 0x21, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x1a, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x1f, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x29, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, - 0x25, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x2c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -103,25 +103,25 @@ static unsigned const char shaders_glsl_opmult_comp_spv[] = { 0x13, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x32, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x1d, 0x00, 0x03, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x1e, 0x00, 0x03, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x17, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x1b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x1d, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, - 0x1e, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, - 0x1f, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00, - 0x3b, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x32, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x32, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x1f, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x1f, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, + 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x32, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x32, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x32, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x25, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x2c, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, + 0x2b, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, @@ -130,12 +130,23 @@ static unsigned const char shaders_glsl_opmult_comp_spv[] = { 0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, - 0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x16, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x19, 0x00, 0x00, 0x00, - 0x17, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00 + 0x08, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x1b, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, + 0x15, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, + 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x1c, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x25, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, + 0x25, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x1c, 0x00, 0x00, 0x00, + 0x27, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x27, 0x00, 0x00, 0x00, + 0x26, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00 }; -static unsigned const int shaders_glsl_opmult_comp_spv_len = 1320; +static unsigned const int shaders_glsl_opmult_comp_spv_len = 1452; } } #endif // define SHADEROP_SHADEROPMULT_HPP