From 5133ffe5488aac3fb9e381f21a8ce47044fac79c Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sat, 7 Nov 2020 16:52:56 +0000 Subject: [PATCH 01/11] Added automated generated documentation --- python/src/docstrings.hpp | 846 ++++++++++++++++++++++++++++++++++++++ python/src/main.cpp | 10 +- 2 files changed, 852 insertions(+), 4 deletions(-) create mode 100644 python/src/docstrings.hpp diff --git a/python/src/docstrings.hpp b/python/src/docstrings.hpp new file mode 100644 index 000000000..37f3ff785 --- /dev/null +++ b/python/src/docstrings.hpp @@ -0,0 +1,846 @@ +/* + This file contains docstrings for use in the Python bindings. + Do not edit! They were automatically extracted by pybind11_mkdoc. + */ + +#define __EXPAND(x) x +#define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...) COUNT +#define __VA_SIZE(...) __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1)) +#define __CAT1(a, b) a ## b +#define __CAT2(a, b) __CAT1(a, b) +#define __DOC1(n1) __doc_##n1 +#define __DOC2(n1, n2) __doc_##n1##_##n2 +#define __DOC3(n1, n2, n3) __doc_##n1##_##n2##_##n3 +#define __DOC4(n1, n2, n3, n4) __doc_##n1##_##n2##_##n3##_##n4 +#define __DOC5(n1, n2, n3, n4, n5) __doc_##n1##_##n2##_##n3##_##n4##_##n5 +#define __DOC6(n1, n2, n3, n4, n5, n6) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6 +#define __DOC7(n1, n2, n3, n4, n5, n6, n7) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7 +#define DOC(...) __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__)) + +#if defined(__GNUG__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" +#endif + + +static const char *__doc_kp_Algorithm = +R"doc(Abstraction for compute shaders that are run on top of tensors grouped +via ParameterGroups (which group descriptorsets))doc"; + +static const char *__doc_kp_Algorithm_Algorithm = +R"doc(Base constructor for Algorithm. Should not be used unless explicit +intended.)doc"; + +static const char *__doc_kp_Algorithm_Algorithm_2 = +R"doc(Default constructor for Algorithm + +@param device The Vulkan device to use for creating resources @param +commandBuffer The vulkan command buffer to bind the pipeline and +shaders)doc"; + +static const char *__doc_kp_Algorithm_createDescriptorPool = R"doc()doc"; + +static const char *__doc_kp_Algorithm_createParameters = R"doc()doc"; + +static const char *__doc_kp_Algorithm_createPipeline = R"doc()doc"; + +static const char *__doc_kp_Algorithm_createShaderModule = R"doc()doc"; + +static const char *__doc_kp_Algorithm_init = +R"doc(Initialiser for the shader data provided to the algorithm as well as +tensor parameters that will be used in shader. + +@param shaderFileData The bytes in spir-v format of the shader +@tensorParams The Tensors to be used in the Algorithm / shader for +processing)doc"; + +static const char *__doc_kp_Algorithm_mCommandBuffer = R"doc()doc"; + +static const char *__doc_kp_Algorithm_mDescriptorPool = R"doc()doc"; + +static const char *__doc_kp_Algorithm_mDescriptorSet = R"doc()doc"; + +static const char *__doc_kp_Algorithm_mDescriptorSetLayout = R"doc()doc"; + +static const char *__doc_kp_Algorithm_mDevice = R"doc()doc"; + +static const char *__doc_kp_Algorithm_mFreeDescriptorPool = R"doc()doc"; + +static const char *__doc_kp_Algorithm_mFreeDescriptorSet = R"doc()doc"; + +static const char *__doc_kp_Algorithm_mFreeDescriptorSetLayout = R"doc()doc"; + +static const char *__doc_kp_Algorithm_mFreePipeline = R"doc()doc"; + +static const char *__doc_kp_Algorithm_mFreePipelineCache = R"doc()doc"; + +static const char *__doc_kp_Algorithm_mFreePipelineLayout = R"doc()doc"; + +static const char *__doc_kp_Algorithm_mFreeShaderModule = R"doc()doc"; + +static const char *__doc_kp_Algorithm_mPipeline = R"doc()doc"; + +static const char *__doc_kp_Algorithm_mPipelineCache = R"doc()doc"; + +static const char *__doc_kp_Algorithm_mPipelineLayout = R"doc()doc"; + +static const char *__doc_kp_Algorithm_mShaderModule = R"doc()doc"; + +static const char *__doc_kp_Algorithm_recordDispatch = +R"doc(Records the dispatch function with the provided template parameters or +alternatively using the size of the tensor by default. + +@param x Layout X dispatch value @param y Layout Y dispatch value +@param z Layout Z dispatch value)doc"; + +static const char *__doc_kp_Manager = +R"doc(Base orchestrator which creates and manages device and child +components)doc"; + +static const char *__doc_kp_Manager_Manager = +R"doc(Base constructor and default used which creates the base resources +including choosing the device 0 by default.)doc"; + +static const char *__doc_kp_Manager_Manager_2 = +R"doc(Similar to base constructor but allows the user to provide the device +they would like to create the resources on. + +@param physicalDeviceIndex The index of the physical device to use +@param familyQueueIndices (Optional) List of queue indices to add for +explicit allocation @param totalQueues The total number of compute +queues to create.)doc"; + +static const char *__doc_kp_Manager_Manager_3 = +R"doc(Manager constructor which allows your own vulkan application to +integrate with the vulkan kompute use. + +@param instance Vulkan compute instance to base this application +@param physicalDevice Vulkan physical device to use for application +@param device Vulkan logical device to use for all base resources +@param physicalDeviceIndex Index for vulkan physical device used)doc"; + +static const char *__doc_kp_Manager_buildTensor = +R"doc(Function that simplifies the common workflow of tensor creation and +initialization. It will take the constructor parameters for a Tensor +and will will us it to create a new Tensor and then create it using +the OpCreateTensor command. + +@param data The data to initialize the tensor with @param tensorType +The type of tensor to initialize @returns Initialized Tensor with +memory Syncd to GPU device)doc"; + +static const char *__doc_kp_Manager_createDevice = R"doc()doc"; + +static const char *__doc_kp_Manager_createInstance = R"doc()doc"; + +static const char *__doc_kp_Manager_createManagedSequence = +R"doc(Create a new managed Kompute sequence so it's available within the +manager. + +@param sequenceName The name for the named sequence to be created, if +empty then default indexed value is used @param queueIndex The queue +to use from the available queues @return Weak pointer to the manager +owned sequence resource)doc"; + +static const char *__doc_kp_Manager_evalOp = +R"doc(Function that evaluates operation against named sequence. + +@param tensors The tensors to be used in the operation recorded @param +sequenceName The name of the sequence to be retrieved or created +@param TArgs Template parameters that will be used to initialise +Operation to allow for extensible configurations on initialisation)doc"; + +static const char *__doc_kp_Manager_evalOpAsync = +R"doc(Function that evaluates operation against named sequence +asynchronously. + +@param tensors The tensors to be used in the operation recorded @param +sequenceName The name of the sequence to be retrieved or created +@param params Template parameters that will be used to initialise +Operation to allow for extensible configurations on initialisation)doc"; + +static const char *__doc_kp_Manager_evalOpAsyncDefault = +R"doc(Operation that evaluates operation against default sequence +asynchronously. + +@param tensors The tensors to be used in the operation recorded @param +params Template parameters that will be used to initialise Operation +to allow for extensible configurations on initialisation)doc"; + +static const char *__doc_kp_Manager_evalOpAwait = +R"doc(Operation that awaits for named sequence to finish. + +@param sequenceName The name of the sequence to wait for termination +@param waitFor The amount of time to wait before timing out)doc"; + +static const char *__doc_kp_Manager_evalOpAwaitDefault = +R"doc(Operation that awaits for default sequence to finish. + +@param tensors The tensors to be used in the operation recorded @param +params Template parameters that will be used to initialise Operation +to allow for extensible configurations on initialisation)doc"; + +static const char *__doc_kp_Manager_evalOpDefault = +R"doc(Function that evaluates operation against a newly created sequence. + +@param tensors The tensors to be used in the operation recorded @param +TArgs Template parameters that will be used to initialise Operation to +allow for extensible configurations on initialisation)doc"; + +static const char *__doc_kp_Manager_getOrCreateManagedSequence = +R"doc(Get or create a managed Sequence that will be contained by this +manager. If the named sequence does not currently exist, it would be +created and initialised. + +@param sequenceName The name for the named sequence to be retrieved or +created @return Shared pointer to the manager owned sequence resource)doc"; + +static const char *__doc_kp_Manager_mComputeQueueFamilyIndices = R"doc()doc"; + +static const char *__doc_kp_Manager_mComputeQueues = R"doc()doc"; + +static const char *__doc_kp_Manager_mCurrentSequenceIndex = R"doc()doc"; + +static const char *__doc_kp_Manager_mDevice = R"doc()doc"; + +static const char *__doc_kp_Manager_mFreeDevice = R"doc()doc"; + +static const char *__doc_kp_Manager_mFreeInstance = R"doc()doc"; + +static const char *__doc_kp_Manager_mInstance = R"doc()doc"; + +static const char *__doc_kp_Manager_mManagedSequences = R"doc()doc"; + +static const char *__doc_kp_Manager_mPhysicalDevice = R"doc()doc"; + +static const char *__doc_kp_Manager_mPhysicalDeviceIndex = R"doc()doc"; + +static const char *__doc_kp_OpAlgoBase = +R"doc(Operation that provides a general abstraction that simplifies the use +of algorithm and parameter components which can be used with shaders. +By default it enables the user to provide a dynamic number of tensors +which are then passed as inputs.)doc"; + +static const char *__doc_kp_OpAlgoBase_KomputeWorkgroup = R"doc()doc"; + +static const char *__doc_kp_OpAlgoBase_KomputeWorkgroup_x = R"doc()doc"; + +static const char *__doc_kp_OpAlgoBase_KomputeWorkgroup_y = R"doc()doc"; + +static const char *__doc_kp_OpAlgoBase_KomputeWorkgroup_z = R"doc()doc"; + +static const char *__doc_kp_OpAlgoBase_OpAlgoBase = R"doc(Base constructor, should not be used unless explicitly intended.)doc"; + +static const char *__doc_kp_OpAlgoBase_OpAlgoBase_2 = +R"doc(Default constructor with parameters that provides the bare minimum +requirements for the operations to be able to create and manage their +sub-components. + +@param physicalDevice Vulkan physical device used to find device +queues @param device Vulkan logical device for passing to Algorithm +@param commandBuffer Vulkan Command Buffer to record commands into +@param tensors Tensors that are to be used in this operation @param +shaderFilePath Optional parameter to specify the shader to load +(either in spirv or raw format) @param komputeWorkgroup Optional +parameter to specify the layout for processing)doc"; + +static const char *__doc_kp_OpAlgoBase_OpAlgoBase_3 = +R"doc(Constructor that enables a file to be passed to the operation with the +contents of the shader. This can be either in raw format or in +compiled SPIR-V binary format. + +@param physicalDevice Vulkan physical device used to find device +queues @param device Vulkan logical device for passing to Algorithm +@param commandBuffer Vulkan Command Buffer to record commands into +@param tensors Tensors that are to be used in this operation @param +shaderFilePath Parameter to specify the shader to load (either in +spirv or raw format) @param komputeWorkgroup Optional parameter to +specify the layout for processing)doc"; + +static const char *__doc_kp_OpAlgoBase_OpAlgoBase_4 = +R"doc(Constructor that enables raw shader data to be passed to the main +operation which can be either in raw shader glsl code or in compiled +SPIR-V binary. + +@param physicalDevice Vulkan physical device used to find device +queues @param device Vulkan logical device for passing to Algorithm +@param commandBuffer Vulkan Command Buffer to record commands into +@param tensors Tensors that are to be used in this operation @param +shaderDataRaw Optional parameter to specify the shader data either in +binary or raw form @param komputeWorkgroup Optional parameter to +specify the layout for processing)doc"; + +static const char *__doc_kp_OpAlgoBase_fetchSpirvBinaryData = R"doc()doc"; + +static const char *__doc_kp_OpAlgoBase_init = +R"doc(The init function is responsible for the initialisation of the +algorithm component based on the parameters specified, and allows for +extensibility on the options provided. Further dependent classes can +perform more specific checks such as ensuring tensors provided are +initialised, etc.)doc"; + +static const char *__doc_kp_OpAlgoBase_mAlgorithm = R"doc()doc"; + +static const char *__doc_kp_OpAlgoBase_mFreeAlgorithm = R"doc()doc"; + +static const char *__doc_kp_OpAlgoBase_mKomputeWorkgroup = R"doc()doc"; + +static const char *__doc_kp_OpAlgoBase_mShaderDataRaw = +R"doc(< Optional member variable which can be provided to contain either the +raw shader content or the spirv binary content)doc"; + +static const char *__doc_kp_OpAlgoBase_mShaderFilePath = +R"doc(< Optional member variable which can be provided for the OpAlgoBase to +find the data automatically and load for processing)doc"; + +static const char *__doc_kp_OpAlgoBase_postEval = +R"doc(Executes after the recorded commands are submitted, and performs a +copy of the GPU Device memory into the staging buffer so the output +data can be retrieved.)doc"; + +static const char *__doc_kp_OpAlgoBase_preEval = R"doc(Does not perform any preEval commands.)doc"; + +static const char *__doc_kp_OpAlgoBase_record = +R"doc(This records the commands that are to be sent to the GPU. This +includes the barriers that ensure the memory has been copied before +going in and out of the shader, as well as the dispatch operation that +sends the shader processing to the gpu. This function also records the +GPU memory copy of the output data for the staging buffer so it can be +read by the host.)doc"; + +static const char *__doc_kp_OpAlgoLhsRhsOut = +R"doc(Operation base class to simplify the creation of operations that +require right hand and left hand side datapoints together with a +single output. The expected data passed is two input tensors and one +output tensor.)doc"; + +static const char *__doc_kp_OpAlgoLhsRhsOut_OpAlgoLhsRhsOut = R"doc(Base constructor, should not be used unless explicitly intended.)doc"; + +static const char *__doc_kp_OpAlgoLhsRhsOut_OpAlgoLhsRhsOut_2 = +R"doc(Default constructor with parameters that provides the bare minimum +requirements for the operations to be able to create and manage their +sub-components. + +@param physicalDevice Vulkan physical device used to find device +queues @param device Vulkan logical device for passing to Algorithm +@param commandBuffer Vulkan Command Buffer to record commands into +@param tensors Tensors that are to be used in this operation @param +freeTensors Whether operation manages the memory of the Tensors @param +komputeWorkgroup Optional parameter to specify the layout for +processing)doc"; + +static const char *__doc_kp_OpAlgoLhsRhsOut_init = +R"doc(The init function is responsible for ensuring that all of the tensors +provided are aligned with requirements such as LHS, RHS and Output +tensors, and creates the algorithm component which processes the +computation.)doc"; + +static const char *__doc_kp_OpAlgoLhsRhsOut_mTensorLHS = +R"doc(< Reference to the parameter used in the left hand side equation of +the shader)doc"; + +static const char *__doc_kp_OpAlgoLhsRhsOut_mTensorOutput = +R"doc(< Reference to the parameter used in the output of the shader and will +be copied with a staging vector)doc"; + +static const char *__doc_kp_OpAlgoLhsRhsOut_mTensorOutputStaging = R"doc(< Staging temporary tensor user do to copy the output of the tensor)doc"; + +static const char *__doc_kp_OpAlgoLhsRhsOut_mTensorRHS = +R"doc(< Reference to the parameter used in the right hand side equation of +the shader)doc"; + +static const char *__doc_kp_OpAlgoLhsRhsOut_postEval = +R"doc(Executes after the recorded commands are submitted, and performs a +copy of the GPU Device memory into the staging buffer so the output +data can be retrieved.)doc"; + +static const char *__doc_kp_OpAlgoLhsRhsOut_record = +R"doc(This records the commands that are to be sent to the GPU. This +includes the barriers that ensure the memory has been copied before +going in and out of the shader, as well as the dispatch operation that +sends the shader processing to the gpu. This function also records the +GPU memory copy of the output data for the staging buffer so it can be +read by the host.)doc"; + +static const char *__doc_kp_OpBase = +R"doc(Base Operation which provides the high level interface that Kompute +operations implement in order to perform a set of actions in the GPU. + +Operations can perform actions on tensors, and optionally can also own +an Algorithm with respective parameters. kp::Operations with +kp::Algorithms would inherit from kp::OpBaseAlgo.)doc"; + +static const char *__doc_kp_OpBase_OpBase = R"doc(Base constructor, should not be used unless explicitly intended.)doc"; + +static const char *__doc_kp_OpBase_OpBase_2 = +R"doc(Default constructor with parameters that provides the bare minimum +requirements for the operations to be able to create and manage their +sub-components. + +@param physicalDevice Vulkan physical device used to find device +queues @param device Vulkan logical device for passing to Algorithm +@param commandBuffer Vulkan Command Buffer to record commands into +@param tensors Tensors that are to be used in this operation @param +freeTensors Whether operation manages the memory of the Tensors)doc"; + +static const char *__doc_kp_OpBase_init = +R"doc(The init function is responsible for setting up all the resources and +should be called after the Operation has been created.)doc"; + +static const char *__doc_kp_OpBase_mCommandBuffer = R"doc(< Vulkan Command Buffer)doc"; + +static const char *__doc_kp_OpBase_mDevice = R"doc(< Vulkan Logical Device)doc"; + +static const char *__doc_kp_OpBase_mFreeTensors = +R"doc(< Explicit boolean that specifies whether the < tensors are freed (if +they are managed))doc"; + +static const char *__doc_kp_OpBase_mPhysicalDevice = R"doc(< Vulkan Physical Device)doc"; + +static const char *__doc_kp_OpBase_mTensors = +R"doc(< Tensors referenced by operation that can be managed < optionally by +operation)doc"; + +static const char *__doc_kp_OpBase_postEval = +R"doc(Post eval is called after the Sequence has called eval and submitted +the commands to the GPU for processing, and can be used to perform any +tear-down steps required as the computation iteration finishes. It's +worth noting that there are situations where eval can be called +multiple times, so the resources that are destroyed should not require +a re-init unless explicitly provided by the user.)doc"; + +static const char *__doc_kp_OpBase_preEval = +R"doc(Pre eval is called before the Sequence has called eval and submitted +the commands to the GPU for processing, and can be used to perform any +per-eval setup steps required as the computation iteration begins. +It's worth noting that there are situations where eval can be called +multiple times, so the resources that are created should be idempotent +in case it's called multiple times in a row.)doc"; + +static const char *__doc_kp_OpBase_record = +R"doc(The record function is intended to only send a record command or run +commands that are expected to record operations that are to be +submitted as a batch into the GPU.)doc"; + +static const char *__doc_kp_OpMult = +R"doc(Operation that performs multiplication on two tensors and outpus on +third tensor.)doc"; + +static const char *__doc_kp_OpMult_OpMult = R"doc(Base constructor, should not be used unless explicitly intended.)doc"; + +static const char *__doc_kp_OpMult_OpMult_2 = +R"doc(Default constructor with parameters that provides the bare minimum +requirements for the operations to be able to create and manage their +sub-components. + +@param physicalDevice Vulkan physical device used to find device +queues @param device Vulkan logical device for passing to Algorithm +@param commandBuffer Vulkan Command Buffer to record commands into +@param tensors Tensors that are to be used in this operation @param +komputeWorkgroup Optional parameter to specify the layout for +processing)doc"; + +static const char *__doc_kp_OpTensorCopy = +R"doc(Operation that copies the data from the first tensor to the rest of +the tensors provided, using a record command for all the vectors. This +operation does not own/manage the memory of the tensors passed to it. +The operation must only receive tensors of type)doc"; + +static const char *__doc_kp_OpTensorCopy_OpTensorCopy = R"doc()doc"; + +static const char *__doc_kp_OpTensorCopy_OpTensorCopy_2 = +R"doc(Default constructor with parameters that provides the core vulkan +resources and the tensors that will be used in the operation. + +@param physicalDevice Vulkan physical device used to find device +queues @param device Vulkan logical device for passing to Algorithm +@param commandBuffer Vulkan Command Buffer to record commands into +@param tensors Tensors that will be used to create in operation.)doc"; + +static const char *__doc_kp_OpTensorCopy_init = +R"doc(Performs basic checks such as ensuring there are at least two tensors +provided, that they are initialised and that they are not of type +TensorTypes::eStorage.)doc"; + +static const char *__doc_kp_OpTensorCopy_postEval = +R"doc(Copies the local vectors for all the tensors to sync the data with the +gpu.)doc"; + +static const char *__doc_kp_OpTensorCopy_preEval = R"doc(Does not perform any preEval commands.)doc"; + +static const char *__doc_kp_OpTensorCopy_record = +R"doc(Records the copy commands from the first tensor into all the other +tensors provided. Also optionally records a barrier.)doc"; + +static const char *__doc_kp_OpTensorCreate = +R"doc(Operation that creates tensor and manages the memory of the components +created)doc"; + +static const char *__doc_kp_OpTensorCreate_OpTensorCreate = R"doc()doc"; + +static const char *__doc_kp_OpTensorCreate_OpTensorCreate_2 = +R"doc(Default constructor with parameters that provides the bare minimum +requirements for the operations to be able to create and manage their +sub-components. + +@param physicalDevice Vulkan physical device used to find device +queues @param device Vulkan logical device for passing to Algorithm +@param commandBuffer Vulkan Command Buffer to record commands into +@param tensors Tensors that will be used to create in operation. +@param freeTensors Whether operation manages the memory of the Tensors)doc"; + +static const char *__doc_kp_OpTensorCreate_init = +R"doc(In charge of initialising the primary Tensor as well as the staging +tensor as required. It will only initialise a staging tensor if the +Primary tensor is of type Device. For staging tensors it performs a +mapDataIntoHostMemory which would perform immediately as opposed to on +sequence eval/submission.)doc"; + +static const char *__doc_kp_OpTensorCreate_mStagingTensors = R"doc()doc"; + +static const char *__doc_kp_OpTensorCreate_postEval = +R"doc(Performs a copy back into the main tensor to ensure that the data +contained is the one that is now being stored in the GPU.)doc"; + +static const char *__doc_kp_OpTensorCreate_preEval = R"doc(Does not perform any preEval commands.)doc"; + +static const char *__doc_kp_OpTensorCreate_record = +R"doc(Record runs the core actions to create the tensors. For device tensors +it records a copyCommand to move the data from the staging tensor to +the device tensor. The mapping for staging tensors happens in the init +function not in the record function.)doc"; + +static const char *__doc_kp_OpTensorSyncDevice = +R"doc(Operation that syncs tensor's device by mapping local data into the +device memory. For TensorTypes::eDevice it will use a staging tensor +to perform the copy. For TensorTypes::eStaging it will only copy the +data and perform a map, which will be executed during the record (as +opposed to during the sequence eval/submit). This function cannot be +carried out for TensorTypes::eStaging.)doc"; + +static const char *__doc_kp_OpTensorSyncDevice_OpTensorSyncDevice = R"doc()doc"; + +static const char *__doc_kp_OpTensorSyncDevice_OpTensorSyncDevice_2 = +R"doc(Default constructor with parameters that provides the core vulkan +resources and the tensors that will be used in the operation. The +tensos provided cannot be of type TensorTypes::eStorage. + +@param physicalDevice Vulkan physical device used to find device +queues @param device Vulkan logical device for passing to Algorithm +@param commandBuffer Vulkan Command Buffer to record commands into +@param tensors Tensors that will be used to create in operation.)doc"; + +static const char *__doc_kp_OpTensorSyncDevice_init = +R"doc(Performs basic checks such as ensuring that there is at least one +tensor provided, that they are initialized and that they are not of +type TensorTpes::eStaging. For staging tensors in host memory, the map +is performed during the init function.)doc"; + +static const char *__doc_kp_OpTensorSyncDevice_mStagingTensors = R"doc()doc"; + +static const char *__doc_kp_OpTensorSyncDevice_postEval = R"doc(Does not perform any postEval commands.)doc"; + +static const char *__doc_kp_OpTensorSyncDevice_preEval = R"doc(Does not perform any preEval commands.)doc"; + +static const char *__doc_kp_OpTensorSyncDevice_record = +R"doc(For device tensors, it records the copy command to the device tensor +from the temporary staging tensor.)doc"; + +static const char *__doc_kp_OpTensorSyncLocal = +R"doc(Operation that syncs tensor's local data by mapping the data from +device memory into the local vector. For TensorTypes::eDevice it will +use a staging tensor to perform the copy. For TensorTypes::eStaging it +will only copy the data and perform a map, which will be executed +during the postSubmit (there will be no copy during the sequence +eval/submit). This function cannot be carried out for +TensorTypes::eStaging.)doc"; + +static const char *__doc_kp_OpTensorSyncLocal_OpTensorSyncLocal = R"doc()doc"; + +static const char *__doc_kp_OpTensorSyncLocal_OpTensorSyncLocal_2 = +R"doc(Default constructor with parameters that provides the core vulkan +resources and the tensors that will be used in the operation. The +tensors provided cannot be of type TensorTypes::eStorage. + +@param physicalDevice Vulkan physical device used to find device +queues @param device Vulkan logical device for passing to Algorithm +@param commandBuffer Vulkan Command Buffer to record commands into +@param tensors Tensors that will be used to create in operation.)doc"; + +static const char *__doc_kp_OpTensorSyncLocal_init = +R"doc(Performs basic checks such as ensuring that there is at least one +tensor provided, that they are initialized and that they are not of +type TensorTpes::eStaging.)doc"; + +static const char *__doc_kp_OpTensorSyncLocal_mStagingTensors = R"doc()doc"; + +static const char *__doc_kp_OpTensorSyncLocal_postEval = +R"doc(For host tensors it performs the map command from the host memory into +local memory.)doc"; + +static const char *__doc_kp_OpTensorSyncLocal_preEval = R"doc(Does not perform any preEval commands.)doc"; + +static const char *__doc_kp_OpTensorSyncLocal_record = +R"doc(For device tensors, it records the copy command into the staging +tensor from the device tensor.)doc"; + +static const char *__doc_kp_Sequence = R"doc(Container of operations that can be sent to GPU as batch)doc"; + +static const char *__doc_kp_Sequence_Sequence = +R"doc(Base constructor for Sequence. Should not be used unless explicit +intended.)doc"; + +static const char *__doc_kp_Sequence_Sequence_2 = +R"doc(Main constructor for sequence which requires core vulkan components to +generate all dependent resources. + +@param physicalDevice Vulkan physical device @param device Vulkan +logical device @param computeQueue Vulkan compute queue @param +queueIndex Vulkan compute queue index in device)doc"; + +static const char *__doc_kp_Sequence_begin = +R"doc(Begins recording commands for commands to be submitted into the +command buffer. + +@return Boolean stating whether execution was successful.)doc"; + +static const char *__doc_kp_Sequence_createCommandBuffer = R"doc()doc"; + +static const char *__doc_kp_Sequence_createCommandPool = R"doc()doc"; + +static const char *__doc_kp_Sequence_end = +R"doc(Ends the recording and stops recording commands when the record +command is sent. + +@return Boolean stating whether execution was successful.)doc"; + +static const char *__doc_kp_Sequence_eval = +R"doc(Eval sends all the recorded and stored operations in the vector of +operations into the gpu as a submit job with a barrier. + +@return Boolean stating whether execution was successful.)doc"; + +static const char *__doc_kp_Sequence_evalAsync = +R"doc(Eval Async sends all the recorded and stored operations in the vector +of operations into the gpu as a submit job with a barrier. EvalAwait() +must be called after to ensure the sequence is terminated correctly. + +@return Boolean stating whether execution was successful.)doc"; + +static const char *__doc_kp_Sequence_evalAwait = +R"doc(Eval Await waits for the fence to finish processing and then once it +finishes, it runs the postEval of all operations. + +@param waitFor Number of milliseconds to wait before timing out. +@return Boolean stating whether execution was successful.)doc"; + +static const char *__doc_kp_Sequence_freeMemoryDestroyGPUResources = +R"doc(Destroys and frees the GPU resources which include the buffer and +memory and sets the sequence as init=False.)doc"; + +static const char *__doc_kp_Sequence_init = +R"doc(Initialises sequence including the creation of the command pool and +the command buffer.)doc"; + +static const char *__doc_kp_Sequence_isInit = +R"doc(Returns true if the sequence has been successfully initialised. + +@return Boolean stating if sequence has been initialised.)doc"; + +static const char *__doc_kp_Sequence_isRecording = +R"doc(Returns true if the sequence is currently in recording activated. + +@return Boolean stating if recording ongoing.)doc"; + +static const char *__doc_kp_Sequence_isRunning = +R"doc(Returns true if the sequence is currently running - mostly used for +async workloads. + +@return Boolean stating if currently running.)doc"; + +static const char *__doc_kp_Sequence_mCommandBuffer = R"doc()doc"; + +static const char *__doc_kp_Sequence_mCommandPool = R"doc()doc"; + +static const char *__doc_kp_Sequence_mComputeQueue = R"doc()doc"; + +static const char *__doc_kp_Sequence_mDevice = R"doc()doc"; + +static const char *__doc_kp_Sequence_mFence = R"doc()doc"; + +static const char *__doc_kp_Sequence_mFreeCommandBuffer = R"doc()doc"; + +static const char *__doc_kp_Sequence_mFreeCommandPool = R"doc()doc"; + +static const char *__doc_kp_Sequence_mIsInit = R"doc()doc"; + +static const char *__doc_kp_Sequence_mIsRunning = R"doc()doc"; + +static const char *__doc_kp_Sequence_mOperations = R"doc()doc"; + +static const char *__doc_kp_Sequence_mPhysicalDevice = R"doc()doc"; + +static const char *__doc_kp_Sequence_mQueueIndex = R"doc()doc"; + +static const char *__doc_kp_Sequence_mRecording = R"doc()doc"; + +static const char *__doc_kp_Sequence_record = +R"doc(Record function for operation to be added to the GPU queue in batch. +This template requires classes to be derived from the OpBase class. +This function also requires the Sequence to be recording, otherwise it +will not be able to add the operation. + +@param tensors Vector of tensors to use for the operation @param TArgs +Template parameters that are used to initialise operation which allows +for extensible configurations on initialisation.)doc"; + +static const char *__doc_kp_Tensor = +R"doc(Structured data used in GPU operations. + +Tensors are the base building block in Kompute to perform operations +across GPUs. Each tensor would have a respective Vulkan memory and +buffer, which would be used to store their respective data. The +tensors can be used for GPU data storage or transfer.)doc"; + +static const char *__doc_kp_Tensor_Tensor = R"doc(Base constructor, should not be used unless explicitly intended.)doc"; + +static const char *__doc_kp_Tensor_Tensor_2 = +R"doc(Default constructor with data provided which would be used to create +the respective vulkan buffer and memory. + +@param data Vector of data that will be used by the tensor @param +tensorType Type for the tensor which is of type TensorTypes)doc"; + +static const char *__doc_kp_Tensor_TensorTypes = +R"doc(Type for tensors created: Device allows memory to be transferred from +staging buffers. Staging are host memory visible. Storage are device +visible but are not set up to transfer or receive data (only for +shader storage).)doc"; + +static const char *__doc_kp_Tensor_TensorTypes_eDevice = R"doc(< Type is device memory, source and destination)doc"; + +static const char *__doc_kp_Tensor_TensorTypes_eStaging = R"doc(< Type is host memory, source and destination)doc"; + +static const char *__doc_kp_Tensor_TensorTypes_eStorage = R"doc(< Type is Device memory (only))doc"; + +static const char *__doc_kp_Tensor_constructDescriptorBufferInfo = +R"doc(Constructs a vulkan descriptor buffer info which can be used to +specify and reference the underlying buffer component of the tensor +without exposing it. + +@return Descriptor buffer info with own buffer)doc"; + +static const char *__doc_kp_Tensor_createBuffer = R"doc()doc"; + +static const char *__doc_kp_Tensor_data = +R"doc(Returns the vector of data currently contained by the Tensor. It is +important to ensure that there is no out-of-sync data with the GPU +memory. + +@return Reference to vector of elements representing the data in the +tensor.)doc"; + +static const char *__doc_kp_Tensor_freeMemoryDestroyGPUResources = +R"doc(Destroys and frees the GPU resources which include the buffer and +memory.)doc"; + +static const char *__doc_kp_Tensor_getBufferUsageFlags = R"doc()doc"; + +static const char *__doc_kp_Tensor_getMemoryPropertyFlags = R"doc()doc"; + +static const char *__doc_kp_Tensor_init = +R"doc(Initialiser which calls the initialisation for all the respective +tensors as well as creates the respective staging tensors. The staging +tensors would only be created for the tensors of type +TensorType::eDevice as otherwise there is no need to copy from host +memory.)doc"; + +static const char *__doc_kp_Tensor_isInit = +R"doc(Returns true if the tensor initialisation function has been carried +out successful, which would mean that the buffer and memory will have +been provisioned.)doc"; + +static const char *__doc_kp_Tensor_mBuffer = R"doc()doc"; + +static const char *__doc_kp_Tensor_mData = R"doc()doc"; + +static const char *__doc_kp_Tensor_mDevice = R"doc()doc"; + +static const char *__doc_kp_Tensor_mFreeBuffer = R"doc()doc"; + +static const char *__doc_kp_Tensor_mFreeMemory = R"doc()doc"; + +static const char *__doc_kp_Tensor_mIsInit = R"doc()doc"; + +static const char *__doc_kp_Tensor_mMemory = R"doc()doc"; + +static const char *__doc_kp_Tensor_mPhysicalDevice = R"doc()doc"; + +static const char *__doc_kp_Tensor_mShape = R"doc()doc"; + +static const char *__doc_kp_Tensor_mTensorType = R"doc()doc"; + +static const char *__doc_kp_Tensor_mapDataFromHostMemory = +R"doc(Maps data from the Host Visible GPU memory into the data vector. It +requires the Tensor to be of staging type for it to work.)doc"; + +static const char *__doc_kp_Tensor_mapDataIntoHostMemory = +R"doc(Maps data from the data vector into the Host Visible GPU memory. It +requires the tensor to be of staging type for it to work.)doc"; + +static const char *__doc_kp_Tensor_memorySize = R"doc()doc"; + +static const char *__doc_kp_Tensor_operator_array = +R"doc(Overrides the subscript operator to expose the underlying data's +subscript operator which in this case would be its underlying +vector's. + +@param i The index where the element will be returned from. @return +Returns the element in the position requested.)doc"; + +static const char *__doc_kp_Tensor_recordBufferMemoryBarrier = +R"doc(Records the buffer memory barrier into the command buffer which +ensures that relevant data transfers are carried out correctly. + +@param commandBuffer Vulkan Command Buffer to record the commands into +@param srcAccessMask Access flags for source access mask @param +dstAccessMask Access flags for destination access mask @param +scrStageMask Pipeline stage flags for source stage mask @param +dstStageMask Pipeline stage flags for destination stage mask)doc"; + +static const char *__doc_kp_Tensor_recordCopyFrom = +R"doc(Records a copy from the memory of the tensor provided to the current +thensor. This is intended to pass memory into a processing, to perform +a staging buffer transfer, or to gather output (between others). + +@param commandBuffer Vulkan Command Buffer to record the commands into +@param copyFromTensor Tensor to copy the data from @param +createBarrier Whether to create a barrier that ensures the data is +copied before further operations. Default is true.)doc"; + +static const char *__doc_kp_Tensor_setData = +R"doc(Sets / resets the vector data of the tensor. This function does not +perform any copies into GPU memory and is only performed on the host.)doc"; + +static const char *__doc_kp_Tensor_shape = +R"doc(Returns the shape of the tensor, which includes the number of +dimensions and the size per dimension. + +@return Array containing the sizes for each dimension. Zero means +respective dimension is not active.)doc"; + +static const char *__doc_kp_Tensor_size = +R"doc(Returns the size/magnitude of the Tensor, which will be the total +number of elements across all dimensions + +@return Unsigned integer representing the total number of elements)doc"; + +static const char *__doc_kp_Tensor_tensorType = +R"doc(Retrieve the tensor type of the Tensor + +@return Tensor type of tensor)doc"; + +#if defined(__GNUG__) +#pragma GCC diagnostic pop +#endif + diff --git a/python/src/main.cpp b/python/src/main.cpp index e50ec7945..f368d77ae 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -3,6 +3,8 @@ #include +#include "docstrings.hpp" + namespace py = pybind11; PYBIND11_MODULE(kp, m) { @@ -21,22 +23,22 @@ PYBIND11_MODULE(kp, m) { #endif }); - py::enum_(m, "TensorTypes", "Enum with GPU memory types for Tensor.") + py::enum_(m, "TensorTypes", DOC(kp, Tensor, TensorTypes)) .value("device", kp::Tensor::TensorTypes::eDevice, "Tensor holding data in GPU memory.") .value("staging", kp::Tensor::TensorTypes::eStaging, "Tensor used for transfer of data to device.") .value("storage", kp::Tensor::TensorTypes::eStorage, "Tensor with host visible gpu memory.") .export_values(); - py::class_>(m, "Tensor", "Structured data used in GPU operations.") + py::class_>(m, "Tensor", DOC(kp, Tensor)) .def(py::init( [](const std::vector& data) { return std::unique_ptr(new kp::Tensor(data)); - }), "Initialiser with only list of data components.") + }), DOC(kp, Tensor, Tensor, 2)) .def(py::init( [](const std::vector& data, kp::Tensor::TensorTypes tensorTypes) { return std::unique_ptr(new kp::Tensor(data, tensorTypes)); }), "Initialiser with list of data components and tensor GPU memory type.") - .def("data", &kp::Tensor::data, "Retrieves the data as a list containing the local Tensor memory data.") + .def("data", &kp::Tensor::data, DOC(kp, Tensor, data)) .def("size", &kp::Tensor::size, "Retrieves the size of the Tensor data as per the local Tensor memory.") .def("tensor_type", &kp::Tensor::tensorType, "Retreves the memory type of the tensor.") .def("is_init", &kp::Tensor::isInit, "Checks whether the tensor GPU memory has been initialised.") From a4523338be45d78806f1a5e35cdc2a1a69e9b169 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sat, 7 Nov 2020 18:42:51 +0000 Subject: [PATCH 02/11] Updated python function to be updated to py::bytes --- python/src/main.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/python/src/main.cpp b/python/src/main.cpp index f368d77ae..52b39eb9a 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -72,7 +72,7 @@ PYBIND11_MODULE(kp, m) { "Records operation to run multiplication compute shader to two input tensors and an output tensor") .def("record_algo_file", &kp::Sequence::record, "Records an operation using a custom shader provided from a shader path") - .def("record_algo_data", &kp::Sequence::record>, + .def("record_algo_data", &kp::Sequence::record, "Records an operation using a custom shader provided as raw string or spirv bytes") .def("record_algo_lro", &kp::Sequence::record, "Records operation to run left right out operation with custom shader"); @@ -112,7 +112,7 @@ PYBIND11_MODULE(kp, m) { "Evaluates operation to run multiplication compute shader to two input tensors and an output tensor with new anonymous Sequence") .def("eval_algo_file_def", &kp::Manager::evalOpDefault, "Evaluates an operation using a custom shader provided from a shader path with new anonymous Sequence") - .def("eval_algo_data_def", &kp::Manager::evalOpDefault>, + .def("eval_algo_data_def", &kp::Manager::evalOpDefault, "Evaluates an operation using a custom shader provided as raw string or spirv bytes with new anonymous Sequence") .def("eval_algo_lro_def", &kp::Manager::evalOpDefault, "Evaluates operation to run left right out operation with custom shader with new anonymous Sequence") @@ -129,7 +129,7 @@ PYBIND11_MODULE(kp, m) { "Evaluates operation to run multiplication compute shader to two input tensors and an output tensor with explicitly named Sequence") .def("eval_algo_file", &kp::Manager::evalOp, "Evaluates an operation using a custom shader provided from a shader path with explicitly named Sequence") - .def("eval_algo_data", &kp::Manager::evalOp>, + .def("eval_algo_data", &kp::Manager::evalOp, "Evaluates an operation using a custom shader provided as raw string or spirv bytes with explicitly named Sequence") .def("eval_algo_lro", &kp::Manager::evalOp, "Evaluates operation to run left right out operation with custom shader with explicitly named Sequence") @@ -146,7 +146,7 @@ PYBIND11_MODULE(kp, m) { "Evaluates asynchronously operation to run multiplication compute shader to two input tensors and an output tensor with anonymous Sequence") .def("eval_async_algo_file_def", &kp::Manager::evalOpAsyncDefault, "Evaluates asynchronously an operation using a custom shader provided from a shader path with anonymous Sequence") - .def("eval_async_algo_data_def", &kp::Manager::evalOpAsyncDefault>, + .def("eval_async_algo_data_def", &kp::Manager::evalOpAsyncDefault, "Evaluates asynchronously an operation using a custom shader provided as raw string or spirv bytes with anonymous Sequence") .def("eval_async_algo_lro_def", &kp::Manager::evalOpAsyncDefault, "Evaluates asynchronously operation to run left right out operation with custom shader with anonymous Sequence") @@ -163,7 +163,7 @@ PYBIND11_MODULE(kp, m) { "Evaluates asynchronously operation to run multiplication compute shader to two input tensors and an output tensor with explicitly named Sequence") .def("eval_async_algo_file", &kp::Manager::evalOpAsync, "Evaluates asynchronously an operation using a custom shader provided from a shader path with explicitly named Sequence") - .def("eval_async_algo_data", &kp::Manager::evalOpAsync>, + .def("eval_async_algo_data", &kp::Manager::evalOpAsync, "Evaluates asynchronously an operation using a custom shader provided as raw string or spirv bytes with explicitly named Sequence") .def("eval_async_algo_lro", &kp::Manager::evalOpAsync, "Evaluates asynchronously operation to run left right out operation with custom shader with explicitly named Sequence"); From 6c6132942247bcc7a04144b9f29e12eacc58b74e Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 8 Nov 2020 11:20:12 +0000 Subject: [PATCH 03/11] Updated to add separate bytes load and str load functions --- python/src/main.cpp | 76 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 69 insertions(+), 7 deletions(-) diff --git a/python/src/main.cpp b/python/src/main.cpp index 52b39eb9a..265df8f52 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -72,7 +72,17 @@ PYBIND11_MODULE(kp, m) { "Records operation to run multiplication compute shader to two input tensors and an output tensor") .def("record_algo_file", &kp::Sequence::record, "Records an operation using a custom shader provided from a shader path") - .def("record_algo_data", &kp::Sequence::record, + .def("record_algo_data", [](kp::Sequence &self, + std::vector> tensors, + py::bytes &bytes) { + // Bytes have to be converted into std::vector + py::buffer_info info(py::buffer(bytes).request()); + const char *data = reinterpret_cast(info.ptr); + size_t length = static_cast(info.size); + self.record( + tensors, + std::vector(data, data + length)); + }, "Records an operation using a custom shader provided as raw string or spirv bytes") .def("record_algo_lro", &kp::Sequence::record, "Records operation to run left right out operation with custom shader"); @@ -112,8 +122,20 @@ PYBIND11_MODULE(kp, m) { "Evaluates operation to run multiplication compute shader to two input tensors and an output tensor with new anonymous Sequence") .def("eval_algo_file_def", &kp::Manager::evalOpDefault, "Evaluates an operation using a custom shader provided from a shader path with new anonymous Sequence") - .def("eval_algo_data_def", &kp::Manager::evalOpDefault, - "Evaluates an operation using a custom shader provided as raw string or spirv bytes with new anonymous Sequence") + .def("eval_algo_str_def", &kp::Manager::evalOpDefault>, + "Evaluates an operation using a custom shader provided as string provided as list of characters with new anonymous Sequence") + .def("eval_algo_data_def", [](kp::Manager &self, + std::vector> tensors, + py::bytes &bytes) { + // Bytes have to be converted into std::vector + py::buffer_info info(py::buffer(bytes).request()); + const char *data = reinterpret_cast(info.ptr); + size_t length = static_cast(info.size); + self.evalOpDefault( + tensors, + std::vector(data, data + length)); + }, + "Evaluates an operation using a custom shader provided as spirv bytes with new anonymous Sequence") .def("eval_algo_lro_def", &kp::Manager::evalOpDefault, "Evaluates operation to run left right out operation with custom shader with new anonymous Sequence") // eval @@ -129,8 +151,22 @@ PYBIND11_MODULE(kp, m) { "Evaluates operation to run multiplication compute shader to two input tensors and an output tensor with explicitly named Sequence") .def("eval_algo_file", &kp::Manager::evalOp, "Evaluates an operation using a custom shader provided from a shader path with explicitly named Sequence") - .def("eval_algo_data", &kp::Manager::evalOp, - "Evaluates an operation using a custom shader provided as raw string or spirv bytes with explicitly named Sequence") + .def("eval_algo_str", &kp::Manager::evalOp>, + "Evaluates an operation using a custom shader provided as string provided as list of characters with explicitly named Sequence") + .def("eval_algo_data", [](kp::Manager &self, + std::vector> tensors, + std::string sequenceName, + py::bytes &bytes) { + // Bytes have to be converted into std::vector + py::buffer_info info(py::buffer(bytes).request()); + const char *data = reinterpret_cast(info.ptr); + size_t length = static_cast(info.size); + self.evalOp( + tensors, + sequenceName, + std::vector(data, data + length)); + }, + "Evaluates an operation using a custom shader provided as spirv bytes with explicitly named Sequence") .def("eval_algo_lro", &kp::Manager::evalOp, "Evaluates operation to run left right out operation with custom shader with explicitly named Sequence") // eval async default @@ -146,7 +182,19 @@ PYBIND11_MODULE(kp, m) { "Evaluates asynchronously operation to run multiplication compute shader to two input tensors and an output tensor with anonymous Sequence") .def("eval_async_algo_file_def", &kp::Manager::evalOpAsyncDefault, "Evaluates asynchronously an operation using a custom shader provided from a shader path with anonymous Sequence") - .def("eval_async_algo_data_def", &kp::Manager::evalOpAsyncDefault, + .def("eval_async_algo_str_def", &kp::Manager::evalOpAsyncDefault>, + "Evaluates Asynchronously an operation using a custom shader provided as string provided as list of characters with new anonymous Sequence") + .def("eval_async_algo_data_def", [](kp::Manager &self, + std::vector> tensors, + py::bytes &bytes) { + // Bytes have to be converted into std::vector + py::buffer_info info(py::buffer(bytes).request()); + const char *data = reinterpret_cast(info.ptr); + size_t length = static_cast(info.size); + self.evalOpAsyncDefault( + tensors, + std::vector(data, data + length)); + }, "Evaluates asynchronously an operation using a custom shader provided as raw string or spirv bytes with anonymous Sequence") .def("eval_async_algo_lro_def", &kp::Manager::evalOpAsyncDefault, "Evaluates asynchronously operation to run left right out operation with custom shader with anonymous Sequence") @@ -163,7 +211,21 @@ PYBIND11_MODULE(kp, m) { "Evaluates asynchronously operation to run multiplication compute shader to two input tensors and an output tensor with explicitly named Sequence") .def("eval_async_algo_file", &kp::Manager::evalOpAsync, "Evaluates asynchronously an operation using a custom shader provided from a shader path with explicitly named Sequence") - .def("eval_async_algo_data", &kp::Manager::evalOpAsync, + .def("eval_async_algo_str", &kp::Manager::evalOpAsync>, + "Evaluates Asynchronous an operation using a custom shader provided as string provided as list of characters with explicitly named Sequence") + .def("eval_async_algo_data", [](kp::Manager &self, + std::vector> tensors, + std::string sequenceName, + py::bytes &bytes) { + // Bytes have to be converted into std::vector + py::buffer_info info(py::buffer(bytes).request()); + const char *data = reinterpret_cast(info.ptr); + size_t length = static_cast(info.size); + self.evalOpAsync( + tensors, + sequenceName, + std::vector(data, data + length)); + }, "Evaluates asynchronously an operation using a custom shader provided as raw string or spirv bytes with explicitly named Sequence") .def("eval_async_algo_lro", &kp::Manager::evalOpAsync, "Evaluates asynchronously operation to run left right out operation with custom shader with explicitly named Sequence"); From 65b52f3023a5c4bed54be4d5c6efea078985e1ad Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 8 Nov 2020 13:18:13 +0000 Subject: [PATCH 04/11] Updated tests to cover str and data load, one of the tests leveraging pyshader --- python/test/requirements-dev.txt | 1 + python/test/test_kompute.py | 31 +++++++++++++++++++++++-------- 2 files changed, 24 insertions(+), 8 deletions(-) create mode 100644 python/test/requirements-dev.txt diff --git a/python/test/requirements-dev.txt b/python/test/requirements-dev.txt new file mode 100644 index 000000000..5718a0210 --- /dev/null +++ b/python/test/requirements-dev.txt @@ -0,0 +1 @@ +pyshader==0.7.0 diff --git a/python/test/test_kompute.py b/python/test/test_kompute.py index 43baf77d1..8c95f1f70 100644 --- a/python/test/test_kompute.py +++ b/python/test/test_kompute.py @@ -49,7 +49,7 @@ def test_opalgobase_data(): mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out]) - mgr.eval_algo_data_def([tensor_in_a, tensor_in_b, tensor_out], list(shaderData)) + mgr.eval_algo_str_def([tensor_in_a, tensor_in_b, tensor_out], list(shaderData)) mgr.eval_tensor_sync_local_def([tensor_out]) @@ -81,28 +81,43 @@ def test_sequence(): """ Test basic OpAlgoBase operation """ - mgr = Manager(0, [2]) - tensor_in_a = Tensor([2, 2, 2]) tensor_in_b = Tensor([1, 2, 3]) tensor_out = Tensor([0, 0, 0]) - mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out]) - seq = mgr.create_sequence("op") - shaderFilePath = "../../shaders/glsl/opmult.comp" mgr.eval_async_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath) mgr.eval_await_def() - seq.begin() seq.record_tensor_sync_local([tensor_in_a]) seq.record_tensor_sync_local([tensor_in_b]) seq.record_tensor_sync_local([tensor_out]) seq.end() - seq.eval() + assert tensor_out.data() == [2.0, 4.0, 6.0] + +def test_pyshader_generated(): + from pyshader import python2shader, f32, ivec3, Array + + @python2shader + def compute_shader_multiply(index: ("input", "GlobalInvocationId", ivec3), + data1: ("buffer", 0, Array(f32)), + data2: ("buffer", 1, Array(f32)), + data3: ("buffer", 2, Array(f32))): + i = index.x + data3[i] = data1[i] * data2[i] + + tensor_in_a = Tensor([2, 2, 2]) + tensor_in_b = Tensor([1, 2, 3]) + tensor_out = Tensor([0, 0, 0]) + + mgr = Manager() + + mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out]) + mgr.eval_algo_data_def([tensor_in_a, tensor_in_b, tensor_out], compute_shader_multiply.to_spirv()) + mgr.eval_tensor_sync_local_def([tensor_out]) assert tensor_out.data() == [2.0, 4.0, 6.0] From 9af9cb7a50336a992f6ad9f4e6280eeac0a5ddee Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 8 Nov 2020 15:38:18 +0000 Subject: [PATCH 05/11] Implemented logistic regression in python (naive version without sequence) and added to test --- python/test/test_kompute.py | 109 ++++++++++++++++++++++++++++++++++-- 1 file changed, 103 insertions(+), 6 deletions(-) diff --git a/python/test/test_kompute.py b/python/test/test_kompute.py index 8c95f1f70..fd6611550 100644 --- a/python/test/test_kompute.py +++ b/python/test/test_kompute.py @@ -1,4 +1,7 @@ +from pyshader import python2shader, f32, ivec3, Array +from pyshader.stdlib import exp, log + from kp import Tensor, Manager, Sequence def test_opmult(): @@ -98,14 +101,13 @@ def test_sequence(): seq.eval() assert tensor_out.data() == [2.0, 4.0, 6.0] -def test_pyshader_generated(): - from pyshader import python2shader, f32, ivec3, Array +def test_pyshader_pyshader(): @python2shader - def compute_shader_multiply(index: ("input", "GlobalInvocationId", ivec3), - data1: ("buffer", 0, Array(f32)), - data2: ("buffer", 1, Array(f32)), - data3: ("buffer", 2, Array(f32))): + def compute_shader_multiply(index=("input", "GlobalInvocationId", ivec3), + data1=("buffer", 0, Array(f32)), + data2=("buffer", 1, Array(f32)), + data3=("buffer", 2, Array(f32))): i = index.x data3[i] = data1[i] * data2[i] @@ -121,5 +123,100 @@ def test_pyshader_generated(): assert tensor_out.data() == [2.0, 4.0, 6.0] +def test_logistic_regression_pyshader(): + @python2shader + def compute_shader( + index = ("input", "GlobalInvocationId", ivec3), + x_i = ("buffer", 0, Array(f32)), + x_j = ("buffer", 1, Array(f32)), + y = ("buffer", 2, Array(f32)), + w_in = ("buffer", 3, Array(f32)), + w_out_i = ("buffer", 4, Array(f32)), + w_out_j = ("buffer", 5, Array(f32)), + b_in = ("buffer", 6, Array(f32)), + b_out = ("buffer", 7, Array(f32)), + l_out = ("buffer", 8, Array(f32)), + M = ("buffer", 9, Array(f32))): + + i = index.x + + m = M[0] + + w_curr = vec2(w_in[0], w_in[1]) + b_curr = b_in[0] + + x_curr = vec2(x_i[i], x_j[i]) + y_curr = y[i] + + z_dot = w_curr @ x_curr + z = z_dot + b_curr + y_hat = 1.0 / (1.0 + exp(-z)) + + d_z = y_hat - y_curr + d_w = (1.0 / m) * x_curr * d_z + d_b = (1.0 / m) * d_z + + loss = -((y_curr * log(y_hat)) + ((1.0 + y_curr) * log(1.0 - y_hat))) + + w_out_i[i] = d_w.x + w_out_j[i] = d_w.y + b_out[i] = d_b + l_out[i] = loss + + + # First we create input and ouput tensors for shader + tensor_x_i = Tensor([0.0, 1.0, 1.0, 1.0, 1.0]) + tensor_x_j = Tensor([0.0, 0.0, 0.0, 1.0, 1.0]) + + tensor_y = Tensor([0.0, 0.0, 0.0, 1.0, 1.0]) + + tensor_w_in = Tensor([0.001, 0.001]) + tensor_w_out_i = Tensor([0.0, 0.0, 0.0, 0.0, 0.0]) + tensor_w_out_j = Tensor([0.0, 0.0, 0.0, 0.0, 0.0]) + + tensor_b_in = Tensor([0.0]) + tensor_b_out = Tensor([0.0, 0.0, 0.0, 0.0, 0.0]) + + tensor_l_out = Tensor([0.0, 0.0, 0.0, 0.0, 0.0]) + + tensor_m = Tensor([ 5.0 ]) + + # We store them in an array for easier interaction + params = [tensor_x_i, tensor_x_j, tensor_y, tensor_w_in, tensor_w_out_i, + tensor_w_out_j, tensor_b_in, tensor_b_out, tensor_l_out, tensor_m] + + mgr = Manager() + + mgr.eval_tensor_create_def(params) + + ITERATIONS = 100 + learning_rate = 0.1 + + # Perform machine learning training and inference across all input X and Y + for i_iter in range(ITERATIONS): + mgr.eval_tensor_sync_device_def([tensor_w_in, tensor_b_in]) + mgr.eval_algo_data_def(params, compute_shader.to_spirv()) + mgr.eval_tensor_sync_local_def([tensor_w_out_i, tensor_w_out_j, tensor_b_out, tensor_l_out]) + + # Calculate the parameters based on the respective derivatives calculated + w_in_i_val = tensor_w_in.data()[0] + w_in_j_val = tensor_w_in.data()[1] + b_in_val = tensor_b_in.data()[0] + + for j_iter in range(tensor_b_out.size()): + w_in_i_val -= learning_rate * tensor_w_out_i.data()[j_iter] + w_in_j_val -= learning_rate * tensor_w_out_j.data()[j_iter] + b_in_val -= learning_rate * tensor_b_out.data()[j_iter] + + # Update the parameters to process inference again + tensor_w_in.set_data([w_in_i_val, w_in_j_val]) + tensor_b_in.set_data([b_in_val]) + + assert tensor_w_in.data()[0] < 0.01 + assert tensor_w_in.data()[0] > 0.0 + assert tensor_w_in.data()[1] > 1.5 + assert tensor_b_in.data()[0] < 0.7 + + if __name__ == "__main__": test_sequence() From 2ba3c8eadb7fec89fb8392fde29261729a53ee6b Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 8 Nov 2020 15:38:38 +0000 Subject: [PATCH 06/11] Updated lr cpp test to print without fmt --- test/TestLogisticRegression.cpp | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/test/TestLogisticRegression.cpp b/test/TestLogisticRegression.cpp index eda6ca635..c360542b6 100644 --- a/test/TestLogisticRegression.cpp +++ b/test/TestLogisticRegression.cpp @@ -1,7 +1,6 @@ #include "gtest/gtest.h" -//#include #include "kompute/Kompute.hpp" TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegression) @@ -73,12 +72,11 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegression) EXPECT_LT(wIn->data()[0], 0.01); EXPECT_GT(wIn->data()[1], 1.0); EXPECT_LT(bIn->data()[0], 0.0); - EXPECT_LT(bIn->data()[0], 0.0); - // SPDLOG_WARN("Result wIn: {}, bIn: {}, loss: {}", - // wIn->data(), - // bIn->data(), - // lOut->data()); + SPDLOG_WARN("Result wIn i: {}, wIn j: {}, bIn: {}", + wIn->data()[0], + wIn->data()[1], + bIn->data()[0]); } TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegressionManualCopy) @@ -156,8 +154,8 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegressionManualCopy) EXPECT_GT(wIn->data()[1], 1.0); EXPECT_LT(bIn->data()[0], 0.0); - // SPDLOG_WARN("Result wIn: {}, bIn: {}, loss: {}", - // wIn->data(), - // bIn->data(), - // lOut->data()); + SPDLOG_WARN("Result wIn i: {}, wIn j: {}, bIn: {}", + wIn->data()[0], + wIn->data()[1], + bIn->data()[0]); } From 93e03ae46312e2706c152bfe6b54dec7742c7963 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 8 Nov 2020 15:54:41 +0000 Subject: [PATCH 07/11] Updated function create_sequence to have default param for create_sequence name to empty string --- python/src/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/src/main.cpp b/python/src/main.cpp index 265df8f52..59d16abbb 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -99,7 +99,7 @@ PYBIND11_MODULE(kp, m) { }), "Manager initialiser can provide specified device and array of GPU queueFamilies to load.") .def("get_create_sequence", &kp::Manager::getOrCreateManagedSequence, "Get a Sequence or create a new one with given name") .def("create_sequence", &kp::Manager::createManagedSequence, - py::arg("name"), py::arg("queueIndex") = 0, "Create a sequence with specific name and specified index of available queues") + py::arg("name") = "", py::arg("queueIndex") = 0, "Create a sequence with specific name and specified index of available queues") .def("build_tensor", &kp::Manager::buildTensor, py::arg("data"), py::arg("tensorType") = kp::Tensor::TensorTypes::eDevice, "Build and initialise tensor") From 358f496549a8bf9d2102f3fd592bdaee40ebad38 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 8 Nov 2020 15:56:43 +0000 Subject: [PATCH 08/11] Updated python lr impl to use sequence for more efficient management of sequences --- python/test/test_kompute.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/python/test/test_kompute.py b/python/test/test_kompute.py index fd6611550..559600eba 100644 --- a/python/test/test_kompute.py +++ b/python/test/test_kompute.py @@ -189,14 +189,20 @@ def test_logistic_regression_pyshader(): mgr.eval_tensor_create_def(params) + # Record commands for efficient evaluation + sq = mgr.create_sequence() + sq.begin() + sq.record_tensor_sync_device([tensor_w_in, tensor_b_in]) + sq.record_algo_data(params, compute_shader.to_spirv()) + sq.record_tensor_sync_local([tensor_w_out_i, tensor_w_out_j, tensor_b_out, tensor_l_out]) + sq.end() + ITERATIONS = 100 learning_rate = 0.1 # Perform machine learning training and inference across all input X and Y for i_iter in range(ITERATIONS): - mgr.eval_tensor_sync_device_def([tensor_w_in, tensor_b_in]) - mgr.eval_algo_data_def(params, compute_shader.to_spirv()) - mgr.eval_tensor_sync_local_def([tensor_w_out_i, tensor_w_out_j, tensor_b_out, tensor_l_out]) + sq.eval() # Calculate the parameters based on the respective derivatives calculated w_in_i_val = tensor_w_in.data()[0] From 13503e763975c1803d089e7925378c04a22b87c2 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 8 Nov 2020 15:58:43 +0000 Subject: [PATCH 09/11] Removed last line from py tests --- python/test/test_kompute.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/test/test_kompute.py b/python/test/test_kompute.py index 559600eba..ea82799e8 100644 --- a/python/test/test_kompute.py +++ b/python/test/test_kompute.py @@ -223,6 +223,3 @@ def test_logistic_regression_pyshader(): assert tensor_w_in.data()[1] > 1.5 assert tensor_b_in.data()[0] < 0.7 - -if __name__ == "__main__": - test_sequence() From b68446beeb73cc5aac4e6fe2f6483bcbe3112a06 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 8 Nov 2020 16:04:05 +0000 Subject: [PATCH 10/11] Updated readme for python example --- README.md | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 43b3b8511..4facb1137 100644 --- a/README.md +++ b/README.md @@ -306,8 +306,18 @@ tensor_out = Tensor([0, 0, 0]) mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out]) -shaderFilePath = "shaders/glsl/opmult.comp" -mgr.eval_async_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath) +# Define the function via PyShader or directly as glsl string or spirv bytes +@python2shader +def compute_shader_multiply(index=("input", "GlobalInvocationId", ivec3), + data1=("buffer", 0, Array(f32)), + data2=("buffer", 1, Array(f32)), + data3=("buffer", 2, Array(f32))): + i = index.x + data3[i] = data1[i] * data2[i] + +# Run shader operation synchronously +mgr.eval_algo_data_def( + [tensor_in_a, tensor_in_b, tensor_out], compute_shader_multiply.to_spirv()) # Alternatively can pass raw string/bytes: # shaderFileData = """ shader code here... """ @@ -332,13 +342,22 @@ tensor_in_a = Tensor([2, 2, 2]) tensor_in_b = Tensor([1, 2, 3]) tensor_out = Tensor([0, 0, 0]) -shaderFilePath = "../../shaders/glsl/opmult.comp" - mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out]) seq = mgr.create_sequence("op") -mgr.eval_async_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath) +# Define the function via PyShader or directly as glsl string or spirv bytes +@python2shader +def compute_shader_multiply(index=("input", "GlobalInvocationId", ivec3), + data1=("buffer", 0, Array(f32)), + data2=("buffer", 1, Array(f32)), + data3=("buffer", 2, Array(f32))): + i = index.x + data3[i] = data1[i] * data2[i] + +# Run shader operation asynchronously and then await +mgr.eval_async_algo_data_def( + [tensor_in_a, tensor_in_b, tensor_out], compute_shader_multiply.to_spirv()) mgr.eval_await_def() seq.begin() From 3b540d00e15bb7af1ec2c41593f186d9dd458099 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Sun, 8 Nov 2020 16:27:01 +0000 Subject: [PATCH 11/11] Updated python package documentation --- docs/overview/python-package.rst | 200 ++++++++++++++++++++++++++++- docs/overview/python-reference.rst | 3 - 2 files changed, 197 insertions(+), 6 deletions(-) diff --git a/docs/overview/python-package.rst b/docs/overview/python-package.rst index ffe5b272e..004f16a56 100644 --- a/docs/overview/python-package.rst +++ b/docs/overview/python-package.rst @@ -9,7 +9,7 @@ Below is a diagram that provides insights on the relationship between Vulkan Kom .. image:: ../images/kompute-architecture.jpg :width: 70% -Python Components +Core Python Components ^^^^^^^^ The Python package exposes three main classes: @@ -30,7 +30,89 @@ More specifically, it can be through the following functions: * mgr.eval_async__def - Runs operation asynchronously under a new anonymous sequence * seq.record_ - Records operation in sequence (requires sequence to be in recording mode) -You can see these operations being used in the `Simple Python example `_ and in the `Extended Python Example `_. +Python Example (Simple) +^^^^^ + +Then you can interact with it from your interpreter. Below is the same sample as above "Your First Kompute (Simple Version)" but in Python: + +.. code-block:: python + :linenos: + + mgr = Manager() + + # Can be initialized with List[] or np.Array + tensor_in_a = Tensor([2, 2, 2]) + tensor_in_b = Tensor([1, 2, 3]) + tensor_out = Tensor([0, 0, 0]) + + mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out]) + + # Define the function via PyShader or directly as glsl string or spirv bytes + @python2shader + def compute_shader_multiply(index=("input", "GlobalInvocationId", ivec3), + data1=("buffer", 0, Array(f32)), + data2=("buffer", 1, Array(f32)), + data3=("buffer", 2, Array(f32))): + i = index.x + data3[i] = data1[i] * data2[i] + + # Run shader operation synchronously + mgr.eval_algo_data_def( + [tensor_in_a, tensor_in_b, tensor_out], compute_shader_multiply.to_spirv()) + + # Alternatively can pass raw string/bytes: + # shaderFileData = """ shader code here... """ + # mgr.eval_algo_data_def([tensor_in_a, tensor_in_b, tensor_out], list(shaderFileData)) + + mgr.eval_await_def() + + mgr.eval_tensor_sync_local_def([tensor_out]) + + assert tensor_out.data() == [2.0, 4.0, 6.0] + + +Python Example (Extended) +^^^^^ + +Similarly you can find the same extended example as above: + +.. code-block:: python + :linenos: + + mgr = Manager(0, [2]) + + # Can be initialized with List[] or np.Array + tensor_in_a = Tensor([2, 2, 2]) + tensor_in_b = Tensor([1, 2, 3]) + tensor_out = Tensor([0, 0, 0]) + + mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out]) + + seq = mgr.create_sequence("op") + + # Define the function via PyShader or directly as glsl string or spirv bytes + @python2shader + def compute_shader_multiply(index=("input", "GlobalInvocationId", ivec3), + data1=("buffer", 0, Array(f32)), + data2=("buffer", 1, Array(f32)), + data3=("buffer", 2, Array(f32))): + i = index.x + data3[i] = data1[i] * data2[i] + + # Run shader operation asynchronously and then await + mgr.eval_async_algo_data_def( + [tensor_in_a, tensor_in_b, tensor_out], compute_shader_multiply.to_spirv()) + mgr.eval_await_def() + + seq.begin() + seq.record_tensor_sync_local([tensor_in_a]) + seq.record_tensor_sync_local([tensor_in_b]) + seq.record_tensor_sync_local([tensor_out]) + seq.end() + + seq.eval() + + assert tensor_out.data() == [2.0, 4.0, 6.0] Kompute Operation Capabilities ^^^^^ @@ -38,7 +120,8 @@ Kompute Operation Capabilities Handling multiple capabilites of processing can be done by compute shaders being loaded into separate sequences. The example below shows how this can be done: .. code-block:: python - :linenos: + :linenos: + from kp import Manager # We'll assume we have the shader data available @@ -77,6 +160,117 @@ Handling multiple capabilites of processing can be done by compute shaders being print(t1.data(), t2.data(), t3.data()) +Machine Learning Logistic Regression Implementation +^^^^^^ + +Similar to the logistic regression implementation in the C++ examples section, below you can find the Python implementation of the Logistic Regression algorithm. + +.. code-block:: python + :linenos: + + @python2shader + def compute_shader( + index = ("input", "GlobalInvocationId", ivec3), + x_i = ("buffer", 0, Array(f32)), + x_j = ("buffer", 1, Array(f32)), + y = ("buffer", 2, Array(f32)), + w_in = ("buffer", 3, Array(f32)), + w_out_i = ("buffer", 4, Array(f32)), + w_out_j = ("buffer", 5, Array(f32)), + b_in = ("buffer", 6, Array(f32)), + b_out = ("buffer", 7, Array(f32)), + l_out = ("buffer", 8, Array(f32)), + M = ("buffer", 9, Array(f32))): + + i = index.x + + m = M[0] + + w_curr = vec2(w_in[0], w_in[1]) + b_curr = b_in[0] + + x_curr = vec2(x_i[i], x_j[i]) + y_curr = y[i] + + z_dot = w_curr @ x_curr + z = z_dot + b_curr + y_hat = 1.0 / (1.0 + exp(-z)) + + d_z = y_hat - y_curr + d_w = (1.0 / m) * x_curr * d_z + d_b = (1.0 / m) * d_z + + loss = -((y_curr * log(y_hat)) + ((1.0 + y_curr) * log(1.0 - y_hat))) + + w_out_i[i] = d_w.x + w_out_j[i] = d_w.y + b_out[i] = d_b + l_out[i] = loss + + + # First we create input and ouput tensors for shader + tensor_x_i = Tensor([0.0, 1.0, 1.0, 1.0, 1.0]) + tensor_x_j = Tensor([0.0, 0.0, 0.0, 1.0, 1.0]) + + tensor_y = Tensor([0.0, 0.0, 0.0, 1.0, 1.0]) + + tensor_w_in = Tensor([0.001, 0.001]) + tensor_w_out_i = Tensor([0.0, 0.0, 0.0, 0.0, 0.0]) + tensor_w_out_j = Tensor([0.0, 0.0, 0.0, 0.0, 0.0]) + + tensor_b_in = Tensor([0.0]) + tensor_b_out = Tensor([0.0, 0.0, 0.0, 0.0, 0.0]) + + tensor_l_out = Tensor([0.0, 0.0, 0.0, 0.0, 0.0]) + + tensor_m = Tensor([ 5.0 ]) + + # We store them in an array for easier interaction + params = [tensor_x_i, tensor_x_j, tensor_y, tensor_w_in, tensor_w_out_i, + tensor_w_out_j, tensor_b_in, tensor_b_out, tensor_l_out, tensor_m] + + mgr = Manager() + + mgr.eval_tensor_create_def(params) + + # Record commands for efficient evaluation + sq = mgr.create_sequence() + sq.begin() + sq.record_tensor_sync_device([tensor_w_in, tensor_b_in]) + sq.record_algo_data(params, compute_shader.to_spirv()) + sq.record_tensor_sync_local([tensor_w_out_i, tensor_w_out_j, tensor_b_out, tensor_l_out]) + sq.end() + + ITERATIONS = 100 + learning_rate = 0.1 + + # Perform machine learning training and inference across all input X and Y + for i_iter in range(ITERATIONS): + sq.eval() + + # Calculate the parameters based on the respective derivatives calculated + w_in_i_val = tensor_w_in.data()[0] + w_in_j_val = tensor_w_in.data()[1] + b_in_val = tensor_b_in.data()[0] + + for j_iter in range(tensor_b_out.size()): + w_in_i_val -= learning_rate * tensor_w_out_i.data()[j_iter] + w_in_j_val -= learning_rate * tensor_w_out_j.data()[j_iter] + b_in_val -= learning_rate * tensor_b_out.data()[j_iter] + + # Update the parameters to process inference again + tensor_w_in.set_data([w_in_i_val, w_in_j_val]) + tensor_b_in.set_data([b_in_val]) + + assert tensor_w_in.data()[0] < 0.01 + assert tensor_w_in.data()[0] > 0.0 + assert tensor_w_in.data()[1] > 1.5 + assert tensor_b_in.data()[0] < 0.7 + + # Print outputs + print(tensor_w_in.data()) + print(tensor_b_in.data()) + Package Installation ^^^^^^^^^ diff --git a/docs/overview/python-reference.rst b/docs/overview/python-reference.rst index 0a8eb7a23..89b426ce0 100644 --- a/docs/overview/python-reference.rst +++ b/docs/overview/python-reference.rst @@ -6,9 +6,6 @@ Python Class Documentation & Reference This section provides a breakdown of the Python classes and what each of their functions provide. Below is a diagram that provides insights on the relationship between Vulkan Kompute objects and Vulkan resources, which primarily encompass ownership of either CPU and/or GPU memory. -.. image:: ../images/kompute-architecture.jpg - :width: 70% - Manager -------