This commit is contained in:
Alejandro Saucedo 2021-02-28 16:02:37 +00:00
parent 75315db943
commit 63e220a8a4
26 changed files with 667 additions and 624 deletions

View file

@ -12,8 +12,7 @@ namespace kp {
*/
class Algorithm
{
public:
public:
/**
* Default constructor for Algorithm
*
@ -21,12 +20,11 @@ public:
* @param commandBuffer The vulkan command buffer to bind the pipeline and
* shaders
*/
Algorithm(
std::shared_ptr<vk::Device> device,
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
const std::vector<uint32_t>& spirv = {},
const Workgroup& workgroup = {},
const Constants& specializationConstants = {});
Algorithm(std::shared_ptr<vk::Device> device,
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
const std::vector<uint32_t>& spirv = {},
const Workgroup& workgroup = {},
const Constants& specializationConstants = {});
/**
* Initialiser for the shader data provided to the algorithm as well as
@ -34,14 +32,13 @@ public:
*
* @param shaderFileData The bytes in spir-v format of the shader
* @tensorParams The Tensors to be used in the Algorithm / shader for
* @specalizationInstalces The specialization parameters to pass to the function
* processing
* @specalizationInstalces The specialization parameters to pass to the
* function processing
*/
void rebuild(
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
const std::vector<uint32_t>& spirv = {},
const Workgroup& workgroup = {},
const Constants& specializationConstants = {});
void rebuild(const std::vector<std::shared_ptr<Tensor>>& tensors = {},
const std::vector<uint32_t>& spirv = {},
const Workgroup& workgroup = {},
const Constants& specializationConstants = {});
/**
* Destructor for Algorithm which is responsible for freeing and desroying
@ -61,7 +58,8 @@ public:
void bindCore(const vk::CommandBuffer& commandBuffer);
void bindPush(const vk::CommandBuffer& commandBuffer, const Constants& pushConstants);
void bindPush(const vk::CommandBuffer& commandBuffer,
const Constants& pushConstants);
bool isInit();
@ -73,7 +71,7 @@ public:
void destroy();
private:
private:
// -------------- NEVER OWNED RESOURCES
std::shared_ptr<vk::Device> mDevice;
std::vector<std::shared_ptr<Tensor>> mTensors;

View file

@ -60,12 +60,19 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error;
#define KP_LOG_DEBUG(...)
#else
#if defined(VK_USE_PLATFORM_ANDROID_KHR)
#define KP_LOG_DEBUG(...) \
((void)__android_log_print(ANDROID_LOG_DEBUG, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
#define KP_LOG_DEBUG(...) \
((void)__android_log_print( \
ANDROID_LOG_DEBUG, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
#elif defined(KOMPUTE_BUILD_PYTHON)
#define KP_LOG_DEBUG(...) kp_debug(fmt::format(__VA_ARGS__))
#else
#define KP_LOG_DEBUG(...) fmt::print("[{} {}] [debug] [{}:{}] {}\n", __DATE__, __TIME__, __FILE__, __LINE__, fmt::format(__VA_ARGS__))
#define KP_LOG_DEBUG(...) \
fmt::print("[{} {}] [debug] [{}:{}] {}\n", \
__DATE__, \
__TIME__, \
__FILE__, \
__LINE__, \
fmt::format(__VA_ARGS__))
#endif // VK_USE_PLATFORM_ANDROID_KHR
#endif // SPDLOG_ACTIVE_LEVEL > 1
@ -73,12 +80,19 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error;
#define KP_LOG_INFO(...)
#else
#if defined(VK_USE_PLATFORM_ANDROID_KHR)
#define KP_LOG_INFO(...) \
((void)__android_log_print(ANDROID_LOG_INFO, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
#define KP_LOG_INFO(...) \
((void)__android_log_print( \
ANDROID_LOG_INFO, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
#elif defined(KOMPUTE_BUILD_PYTHON)
#define KP_LOG_INFO(...) kp_info(fmt::format(__VA_ARGS__))
#else
#define KP_LOG_INFO(...) fmt::print("[{} {}] [debug] [{}:{}] {}\n", __DATE__, __TIME__, __FILE__, __LINE__, fmt::format(__VA_ARGS__))
#define KP_LOG_INFO(...) \
fmt::print("[{} {}] [debug] [{}:{}] {}\n", \
__DATE__, \
__TIME__, \
__FILE__, \
__LINE__, \
fmt::format(__VA_ARGS__))
#endif // VK_USE_PLATFORM_ANDROID_KHR
#endif // SPDLOG_ACTIVE_LEVEL > 2
@ -86,12 +100,19 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error;
#define KP_LOG_WARN(...)
#else
#if defined(VK_USE_PLATFORM_ANDROID_KHR)
#define KP_LOG_WARN(...) \
((void)__android_log_print(ANDROID_LOG_WARN, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
#define KP_LOG_WARN(...) \
((void)__android_log_print( \
ANDROID_LOG_WARN, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
#elif defined(KOMPUTE_BUILD_PYTHON)
#define KP_LOG_WARN(...) kp_warning(fmt::format(__VA_ARGS__))
#else
#define KP_LOG_WARN(...) fmt::print("[{} {}] [debug] [{}:{}] {}\n", __DATE__, __TIME__, __FILE__, __LINE__, fmt::format(__VA_ARGS__))
#define KP_LOG_WARN(...) \
fmt::print("[{} {}] [debug] [{}:{}] {}\n", \
__DATE__, \
__TIME__, \
__FILE__, \
__LINE__, \
fmt::format(__VA_ARGS__))
#endif // VK_USE_PLATFORM_ANDROID_KHR
#endif // SPDLOG_ACTIVE_LEVEL > 3
@ -99,12 +120,19 @@ extern py::object kp_debug, kp_info, kp_warning, kp_error;
#define KP_LOG_ERROR(...)
#else
#if defined(VK_USE_PLATFORM_ANDROID_KHR)
#define KP_LOG_ERROR(...) \
((void)__android_log_print(ANDROID_LOG_ERROR, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
#define KP_LOG_ERROR(...) \
((void)__android_log_print( \
ANDROID_LOG_ERROR, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__)))
#elif defined(KOMPUTE_BUILD_PYTHON)
#define KP_LOG_ERROR(...) kp_error(fmt::format(__VA_ARGS__))
#else
#define KP_LOG_ERROR(...) fmt::print("[{} {}] [debug] [{}:{}] {}\n", __DATE__, __TIME__, __FILE__, __LINE__, fmt::format(__VA_ARGS__))
#define KP_LOG_ERROR(...) \
fmt::print("[{} {}] [debug] [{}:{}] {}\n", \
__DATE__, \
__TIME__, \
__FILE__, \
__LINE__, \
fmt::format(__VA_ARGS__))
#endif // VK_USE_PLATFORM_ANDROID_KHR
#endif // SPDLOG_ACTIVE_LEVEL > 4
#endif // KOMPUTE_SPDLOG_ENABLED

View file

@ -84,10 +84,10 @@ class Manager
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice);
std::shared_ptr<Algorithm> algorithm(
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
const std::vector<uint32_t>& spirv = {},
const Workgroup& workgroup = {},
const Constants& specializationConstants = {});
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
const std::vector<uint32_t>& spirv = {},
const Workgroup& workgroup = {},
const Constants& specializationConstants = {});
void destroy();
void clear();
@ -119,7 +119,8 @@ class Manager
// Create functions
void createInstance();
void createDevice(const std::vector<uint32_t>& familyQueueIndices = {}, uint32_t hysicalDeviceIndex = 0);
void createDevice(const std::vector<uint32_t>& familyQueueIndices = {},
uint32_t hysicalDeviceIndex = 0);
};
} // End namespace kp

View file

@ -9,7 +9,7 @@ namespace kp {
/**
* Container of operations that can be sent to GPU as batch
*/
class Sequence: public std::enable_shared_from_this<Sequence>
class Sequence : public std::enable_shared_from_this<Sequence>
{
public:
/**
@ -46,8 +46,9 @@ class Sequence: public std::enable_shared_from_this<Sequence>
* which allows for extensible configurations on initialisation.
*/
template<typename T, typename... TArgs>
std::shared_ptr<Sequence>
record(std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
std::shared_ptr<Sequence> record(
std::vector<std::shared_ptr<Tensor>> tensors,
TArgs&&... params)
{
KP_LOG_DEBUG("Kompute Sequence record function started");
@ -56,14 +57,13 @@ class Sequence: public std::enable_shared_from_this<Sequence>
"OpBase derived classes");
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
std::shared_ptr<T> op{
new T(tensors, std::forward<TArgs>(params)...) };
std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
return this->record(op);
}
template<typename T, typename... TArgs>
std::shared_ptr<Sequence>
record(std::shared_ptr<Algorithm> algorithm, TArgs&&... params)
std::shared_ptr<Sequence> record(std::shared_ptr<Algorithm> algorithm,
TArgs&&... params)
{
KP_LOG_DEBUG("Kompute Sequence record function started");
@ -72,8 +72,8 @@ class Sequence: public std::enable_shared_from_this<Sequence>
"OpBase derived classes");
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
std::shared_ptr<T> op{
new T(algorithm, std::forward<TArgs>(params)...) };
std::shared_ptr<T> op{ new T(algorithm,
std::forward<TArgs>(params)...) };
return this->record(op);
}
@ -96,8 +96,8 @@ class Sequence: public std::enable_shared_from_this<Sequence>
*/
// TODO: Aim to have only a single function with tensors/algorithm
template<typename T, typename... TArgs>
std::shared_ptr<Sequence>
eval(std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
std::shared_ptr<Sequence> eval(std::vector<std::shared_ptr<Tensor>> tensors,
TArgs&&... params)
{
KP_LOG_DEBUG("Kompute Sequence record function started");
@ -106,16 +106,16 @@ class Sequence: public std::enable_shared_from_this<Sequence>
"OpBase derived classes");
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
std::shared_ptr<T> op{
new T(tensors, std::forward<TArgs>(params)...) };
std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
// TODO: Aim to be able to handle errors when returning without throw except
// TODO: Aim to be able to handle errors when returning without throw
// except
return this->eval(op);
}
// Needded as otherise can't use initialiser list
template<typename T, typename... TArgs>
std::shared_ptr<Sequence>
eval(std::shared_ptr<Algorithm> algorithm, TArgs&&... params)
std::shared_ptr<Sequence> eval(std::shared_ptr<Algorithm> algorithm,
TArgs&&... params)
{
KP_LOG_DEBUG("Kompute Sequence record function started");
@ -124,8 +124,8 @@ class Sequence: public std::enable_shared_from_this<Sequence>
"OpBase derived classes");
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
std::shared_ptr<T> op{
new T(algorithm, std::forward<TArgs>(params)...) };
std::shared_ptr<T> op{ new T(algorithm,
std::forward<TArgs>(params)...) };
return this->eval(op);
}
@ -147,8 +147,9 @@ class Sequence: public std::enable_shared_from_this<Sequence>
* @return shared_ptr<Sequence> of the Sequence class itself
*/
template<typename T, typename... TArgs>
std::shared_ptr<Sequence>
evalAsync(std::vector<std::shared_ptr<Tensor>> tensors, TArgs&&... params)
std::shared_ptr<Sequence> evalAsync(
std::vector<std::shared_ptr<Tensor>> tensors,
TArgs&&... params)
{
KP_LOG_DEBUG("Kompute Sequence record function started");
@ -157,15 +158,14 @@ class Sequence: public std::enable_shared_from_this<Sequence>
"OpBase derived classes");
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
std::shared_ptr<T> op{
new T(tensors, std::forward<TArgs>(params)...) };
std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
return this->evalAsync(op);
}
// Needed as otherwise it's not possible to use initializer lists
template<typename T, typename... TArgs>
std::shared_ptr<Sequence>
evalAsync(std::shared_ptr<Algorithm> algorithm, TArgs&&... params)
std::shared_ptr<Sequence> evalAsync(std::shared_ptr<Algorithm> algorithm,
TArgs&&... params)
{
KP_LOG_DEBUG("Kompute Sequence record function started");
@ -174,8 +174,8 @@ class Sequence: public std::enable_shared_from_this<Sequence>
"OpBase derived classes");
KP_LOG_DEBUG("Kompute Sequence creating OpBase derived class instance");
std::shared_ptr<T> op{
new T(algorithm, std::forward<TArgs>(params)...) };
std::shared_ptr<T> op{ new T(algorithm,
std::forward<TArgs>(params)...) };
return this->evalAsync(op);
}
@ -190,7 +190,8 @@ class Sequence: public std::enable_shared_from_this<Sequence>
std::shared_ptr<Sequence> evalAwait(uint64_t waitFor = UINT64_MAX);
/**
* Clear function clears all operations currently recorded and starts recording again.
* Clear function clears all operations currently recorded and starts
* recording again.
*/
void clear();
@ -217,7 +218,6 @@ class Sequence: public std::enable_shared_from_this<Sequence>
*/
bool isRecording();
bool isInit();
/**

View file

@ -4,9 +4,9 @@
#include <iostream>
#include <vector>
#include <SPIRV/GlslangToSpv.h>
#include <glslang/Include/ResourceLimits.h>
#include <glslang/Public/ShaderLang.h>
#include <SPIRV/GlslangToSpv.h>
#include "kompute/Core.hpp"
@ -16,161 +16,162 @@ namespace kp {
// Has been adobted by:
// https://github.com/KhronosGroup/glslang/blob/master/StandAlone/ResourceLimits.cpp
const TBuiltInResource defaultResource = {
/* .MaxLights = */ 0,
/* .MaxClipPlanes = */ 0,
/* .MaxTextureUnits = */ 0,
/* .MaxTextureCoords = */ 0,
/* .MaxVertexAttribs = */ 64,
/* .MaxVertexUniformComponents = */ 4096,
/* .MaxVaryingFloats = */ 64,
/* .MaxVertexTextureImageUnits = */ 0,
/* .MaxCombinedTextureImageUnits = */ 0,
/* .MaxTextureImageUnits = */ 0,
/* .MaxFragmentUniformComponents = */ 0,
/* .MaxDrawBuffers = */ 0,
/* .MaxVertexUniformVectors = */ 128,
/* .MaxVaryingVectors = */ 8,
/* .MaxFragmentUniformVectors = */ 0,
/* .MaxVertexOutputVectors = */ 16,
/* .MaxFragmentInputVectors = */ 0,
/* .MinProgramTexelOffset = */ -8,
/* .MaxProgramTexelOffset = */ 7,
/* .MaxClipDistances = */ 8,
/* .MaxComputeWorkGroupCountX = */ 65535,
/* .MaxComputeWorkGroupCountY = */ 65535,
/* .MaxComputeWorkGroupCountZ = */ 65535,
/* .MaxComputeWorkGroupSizeX = */ 1024,
/* .MaxComputeWorkGroupSizeY = */ 1024,
/* .MaxComputeWorkGroupSizeZ = */ 64,
/* .MaxComputeUniformComponents = */ 1024,
/* .MaxComputeTextureImageUnits = */ 16,
/* .MaxComputeImageUniforms = */ 8,
/* .MaxComputeAtomicCounters = */ 8,
/* .MaxComputeAtomicCounterBuffers = */ 1,
/* .MaxVaryingComponents = */ 60,
/* .MaxVertexOutputComponents = */ 64,
/* .MaxGeometryInputComponents = */ 64,
/* .MaxGeometryOutputComponents = */ 128,
/* .MaxFragmentInputComponents = */ 0,
/* .MaxImageUnits = */ 0,
/* .MaxCombinedImageUnitsAndFragmentOutputs = */ 0,
/* .MaxCombinedShaderOutputResources = */ 8,
/* .MaxImageSamples = */ 0,
/* .MaxVertexImageUniforms = */ 0,
/* .MaxTessControlImageUniforms = */ 0,
/* .MaxTessEvaluationImageUniforms = */ 0,
/* .MaxGeometryImageUniforms = */ 0,
/* .MaxFragmentImageUniforms = */ 0,
/* .MaxCombinedImageUniforms = */ 0,
/* .MaxGeometryTextureImageUnits = */ 0,
/* .MaxGeometryOutputVertices = */ 256,
/* .MaxGeometryTotalOutputComponents = */ 1024,
/* .MaxGeometryUniformComponents = */ 1024,
/* .MaxGeometryVaryingComponents = */ 64,
/* .MaxTessControlInputComponents = */ 128,
/* .MaxTessControlOutputComponents = */ 128,
/* .MaxTessControlTextureImageUnits = */ 0,
/* .MaxTessControlUniformComponents = */ 1024,
/* .MaxTessControlTotalOutputComponents = */ 4096,
/* .MaxTessEvaluationInputComponents = */ 128,
/* .MaxTessEvaluationOutputComponents = */ 128,
/* .MaxTessEvaluationTextureImageUnits = */ 16,
/* .MaxTessEvaluationUniformComponents = */ 1024,
/* .MaxTessPatchComponents = */ 120,
/* .MaxPatchVertices = */ 32,
/* .MaxTessGenLevel = */ 64,
/* .MaxViewports = */ 16,
/* .MaxVertexAtomicCounters = */ 0,
/* .MaxTessControlAtomicCounters = */ 0,
/* .MaxTessEvaluationAtomicCounters = */ 0,
/* .MaxGeometryAtomicCounters = */ 0,
/* .MaxFragmentAtomicCounters = */ 0,
/* .MaxCombinedAtomicCounters = */ 8,
/* .MaxAtomicCounterBindings = */ 1,
/* .MaxVertexAtomicCounterBuffers = */ 0,
/* .MaxTessControlAtomicCounterBuffers = */ 0,
/* .MaxTessEvaluationAtomicCounterBuffers = */ 0,
/* .MaxGeometryAtomicCounterBuffers = */ 0,
/* .MaxFragmentAtomicCounterBuffers = */ 0,
/* .MaxCombinedAtomicCounterBuffers = */ 1,
/* .MaxAtomicCounterBufferSize = */ 16384,
/* .MaxTransformFeedbackBuffers = */ 4,
/* .MaxTransformFeedbackInterleavedComponents = */ 64,
/* .MaxCullDistances = */ 8,
/* .MaxCombinedClipAndCullDistances = */ 8,
/* .MaxSamples = */ 4,
/* .maxMeshOutputVerticesNV = */ 256,
/* .maxMeshOutputPrimitivesNV = */ 512,
/* .maxMeshWorkGroupSizeX_NV = */ 32,
/* .maxMeshWorkGroupSizeY_NV = */ 1,
/* .maxMeshWorkGroupSizeZ_NV = */ 1,
/* .maxTaskWorkGroupSizeX_NV = */ 32,
/* .maxTaskWorkGroupSizeY_NV = */ 1,
/* .maxTaskWorkGroupSizeZ_NV = */ 1,
/* .maxMeshViewCountNV = */ 4,
/* .maxDualSourceDrawBuffersEXT = */ 1,
/* .MaxLights = */ 0,
/* .MaxClipPlanes = */ 0,
/* .MaxTextureUnits = */ 0,
/* .MaxTextureCoords = */ 0,
/* .MaxVertexAttribs = */ 64,
/* .MaxVertexUniformComponents = */ 4096,
/* .MaxVaryingFloats = */ 64,
/* .MaxVertexTextureImageUnits = */ 0,
/* .MaxCombinedTextureImageUnits = */ 0,
/* .MaxTextureImageUnits = */ 0,
/* .MaxFragmentUniformComponents = */ 0,
/* .MaxDrawBuffers = */ 0,
/* .MaxVertexUniformVectors = */ 128,
/* .MaxVaryingVectors = */ 8,
/* .MaxFragmentUniformVectors = */ 0,
/* .MaxVertexOutputVectors = */ 16,
/* .MaxFragmentInputVectors = */ 0,
/* .MinProgramTexelOffset = */ -8,
/* .MaxProgramTexelOffset = */ 7,
/* .MaxClipDistances = */ 8,
/* .MaxComputeWorkGroupCountX = */ 65535,
/* .MaxComputeWorkGroupCountY = */ 65535,
/* .MaxComputeWorkGroupCountZ = */ 65535,
/* .MaxComputeWorkGroupSizeX = */ 1024,
/* .MaxComputeWorkGroupSizeY = */ 1024,
/* .MaxComputeWorkGroupSizeZ = */ 64,
/* .MaxComputeUniformComponents = */ 1024,
/* .MaxComputeTextureImageUnits = */ 16,
/* .MaxComputeImageUniforms = */ 8,
/* .MaxComputeAtomicCounters = */ 8,
/* .MaxComputeAtomicCounterBuffers = */ 1,
/* .MaxVaryingComponents = */ 60,
/* .MaxVertexOutputComponents = */ 64,
/* .MaxGeometryInputComponents = */ 64,
/* .MaxGeometryOutputComponents = */ 128,
/* .MaxFragmentInputComponents = */ 0,
/* .MaxImageUnits = */ 0,
/* .MaxCombinedImageUnitsAndFragmentOutputs = */ 0,
/* .MaxCombinedShaderOutputResources = */ 8,
/* .MaxImageSamples = */ 0,
/* .MaxVertexImageUniforms = */ 0,
/* .MaxTessControlImageUniforms = */ 0,
/* .MaxTessEvaluationImageUniforms = */ 0,
/* .MaxGeometryImageUniforms = */ 0,
/* .MaxFragmentImageUniforms = */ 0,
/* .MaxCombinedImageUniforms = */ 0,
/* .MaxGeometryTextureImageUnits = */ 0,
/* .MaxGeometryOutputVertices = */ 256,
/* .MaxGeometryTotalOutputComponents = */ 1024,
/* .MaxGeometryUniformComponents = */ 1024,
/* .MaxGeometryVaryingComponents = */ 64,
/* .MaxTessControlInputComponents = */ 128,
/* .MaxTessControlOutputComponents = */ 128,
/* .MaxTessControlTextureImageUnits = */ 0,
/* .MaxTessControlUniformComponents = */ 1024,
/* .MaxTessControlTotalOutputComponents = */ 4096,
/* .MaxTessEvaluationInputComponents = */ 128,
/* .MaxTessEvaluationOutputComponents = */ 128,
/* .MaxTessEvaluationTextureImageUnits = */ 16,
/* .MaxTessEvaluationUniformComponents = */ 1024,
/* .MaxTessPatchComponents = */ 120,
/* .MaxPatchVertices = */ 32,
/* .MaxTessGenLevel = */ 64,
/* .MaxViewports = */ 16,
/* .MaxVertexAtomicCounters = */ 0,
/* .MaxTessControlAtomicCounters = */ 0,
/* .MaxTessEvaluationAtomicCounters = */ 0,
/* .MaxGeometryAtomicCounters = */ 0,
/* .MaxFragmentAtomicCounters = */ 0,
/* .MaxCombinedAtomicCounters = */ 8,
/* .MaxAtomicCounterBindings = */ 1,
/* .MaxVertexAtomicCounterBuffers = */ 0,
/* .MaxTessControlAtomicCounterBuffers = */ 0,
/* .MaxTessEvaluationAtomicCounterBuffers = */ 0,
/* .MaxGeometryAtomicCounterBuffers = */ 0,
/* .MaxFragmentAtomicCounterBuffers = */ 0,
/* .MaxCombinedAtomicCounterBuffers = */ 1,
/* .MaxAtomicCounterBufferSize = */ 16384,
/* .MaxTransformFeedbackBuffers = */ 4,
/* .MaxTransformFeedbackInterleavedComponents = */ 64,
/* .MaxCullDistances = */ 8,
/* .MaxCombinedClipAndCullDistances = */ 8,
/* .MaxSamples = */ 4,
/* .maxMeshOutputVerticesNV = */ 256,
/* .maxMeshOutputPrimitivesNV = */ 512,
/* .maxMeshWorkGroupSizeX_NV = */ 32,
/* .maxMeshWorkGroupSizeY_NV = */ 1,
/* .maxMeshWorkGroupSizeZ_NV = */ 1,
/* .maxTaskWorkGroupSizeX_NV = */ 32,
/* .maxTaskWorkGroupSizeY_NV = */ 1,
/* .maxTaskWorkGroupSizeZ_NV = */ 1,
/* .maxMeshViewCountNV = */ 4,
/* .maxDualSourceDrawBuffersEXT = */ 1,
/* .limits = */
{
/* .nonInductiveForLoops = */ 1,
/* .whileLoops = */ 1,
/* .doWhileLoops = */ 1,
/* .generalUniformIndexing = */ 1,
/* .generalAttributeMatrixVectorIndexing = */ 1,
/* .generalVaryingIndexing = */ 1,
/* .generalSamplerIndexing = */ 1,
/* .generalVariableIndexing = */ 1,
/* .generalConstantMatrixVectorIndexing = */ 1,
}
};
/* .limits = */ {
/* .nonInductiveForLoops = */ 1,
/* .whileLoops = */ 1,
/* .doWhileLoops = */ 1,
/* .generalUniformIndexing = */ 1,
/* .generalAttributeMatrixVectorIndexing = */ 1,
/* .generalVaryingIndexing = */ 1,
/* .generalSamplerIndexing = */ 1,
/* .generalVariableIndexing = */ 1,
/* .generalConstantMatrixVectorIndexing = */ 1,
}};
/**
Shader utily class with functions to compile and process glsl files.
*/
class Shader {
public:
class Shader
{
public:
/**
* Compile multiple sources with optional filenames. Currently this function
* uses the glslang C++ interface which is not thread safe so this funciton
* should not be called from multiple threads concurrently. If you have a
* online shader processing multithreading use-case that can't use offline
* online shader processing multithreading use-case that can't use offline
* compilation please open an issue.
*
* @param sources A list of raw glsl shaders in string format
* @param files A list of file names respective to each of the sources
* @param entryPoint The function name to use as entry point
* @param definitions List of pairs containing key value definitions
* @param resourcesLimit A list that contains the resource limits for the GLSL compiler
* @param resourcesLimit A list that contains the resource limits for the
* GLSL compiler
* @return The compiled SPIR-V binary in unsigned int32 format
*/
static std::vector<uint32_t> compile_sources(
const std::vector<std::string>& sources,
const std::vector<std::string>& files = {},
const std::string& entryPoint = "main",
std::vector<std::pair<std::string,std::string>> definitions = {},
const TBuiltInResource& resources = defaultResource);
const std::vector<std::string>& sources,
const std::vector<std::string>& files = {},
const std::string& entryPoint = "main",
std::vector<std::pair<std::string, std::string>> definitions = {},
const TBuiltInResource& resources = defaultResource);
/**
* Compile a single glslang source from string value. Currently this function
* uses the glslang C++ interface which is not thread safe so this funciton
* should not be called from multiple threads concurrently. If you have a
* online shader processing multithreading use-case that can't use offline
* compilation please open an issue.
* Compile a single glslang source from string value. Currently this
* function uses the glslang C++ interface which is not thread safe so this
* funciton should not be called from multiple threads concurrently. If you
* have a online shader processing multithreading use-case that can't use
* offline compilation please open an issue.
*
* @param source An individual raw glsl shader in string format
* @param entryPoint The function name to use as entry point
* @param definitions List of pairs containing key value definitions
* @param resourcesLimit A list that contains the resource limits for the GLSL compiler
* @param resourcesLimit A list that contains the resource limits for the
* GLSL compiler
* @return The compiled SPIR-V binary in unsigned int32 format
*/
static std::vector<uint32_t> compile_source(
const std::string& source,
const std::string& entryPoint = "main",
std::vector<std::pair<std::string,std::string>> definitions = {},
const TBuiltInResource& resources = defaultResource);
const std::string& source,
const std::string& entryPoint = "main",
std::vector<std::pair<std::string, std::string>> definitions = {},
const TBuiltInResource& resources = defaultResource);
};
}
#endif // DKOMPUTE_DISABLE_SHADER_UTILS

View file

@ -54,7 +54,7 @@ class Tensor
* otherwise there is no need to copy from host memory.
*/
void rebuild(const std::vector<float>& data,
TensorTypes tensorType = TensorTypes::eDevice);
TensorTypes tensorType = TensorTypes::eDevice);
/**
* Destroys and frees the GPU resources which include the buffer and memory.
@ -125,9 +125,8 @@ class Tensor
* @param createBarrier Whether to create a barrier that ensures the data is
* copied before further operations. Default is true.
*/
void recordCopyFromStagingToDevice(
const vk::CommandBuffer& commandBuffer,
bool createBarrier);
void recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer,
bool createBarrier);
/**
* Records a copy from the internal device memory to the staging memory
@ -138,9 +137,8 @@ class Tensor
* @param createBarrier Whether to create a barrier that ensures the data is
* copied before further operations. Default is true.
*/
void recordCopyFromDeviceToStaging(
const vk::CommandBuffer& commandBuffer,
bool createBarrier);
void recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer,
bool createBarrier);
/**
* Records the buffer memory barrier into the command buffer which
@ -152,12 +150,11 @@ class Tensor
* @param scrStageMask Pipeline stage flags for source stage mask
* @param dstStageMask Pipeline stage flags for destination stage mask
*/
void recordBufferMemoryBarrier(
const vk::CommandBuffer& commandBuffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask);
void recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask);
/**
* Constructs a vulkan descriptor buffer info which can be used to specify
@ -205,11 +202,11 @@ class Tensor
std::shared_ptr<vk::DeviceMemory> memory,
vk::MemoryPropertyFlags memoryPropertyFlags);
void recordCopyBuffer(const vk::CommandBuffer& commandBuffer,
std::shared_ptr<vk::Buffer> bufferFrom,
std::shared_ptr<vk::Buffer> bufferTo,
vk::DeviceSize bufferSize,
vk::BufferCopy copyRegion,
bool createBarrier);
std::shared_ptr<vk::Buffer> bufferFrom,
std::shared_ptr<vk::Buffer> bufferTo,
vk::DeviceSize bufferSize,
vk::BufferCopy copyRegion,
bool createBarrier);
// Private util functions
vk::BufferUsageFlags getPrimaryBufferUsageFlags();