diff --git a/python/src/docstrings.hpp b/python/src/docstrings.hpp index bf98e6581..a5bda0a4d 100644 --- a/python/src/docstrings.hpp +++ b/python/src/docstrings.hpp @@ -247,8 +247,10 @@ static const char *__doc_kp_Manager_sequence = R"doc(Create a managed sequence that will be destroyed by this manager if it hasn't been destroyed by its reference count going to zero. -@param queueIndex The queue to use from the available queues @returns -Shared pointer with initialised sequence)doc"; +@param queueIndex The queue to use from the available queues @param +nrOfTimestamps The maximum number of timestamps to allocate. If zero +(default), disables latching of timestamps. @returns Shared pointer +with initialised sequence)doc"; static const char *__doc_kp_Manager_tensor = R"doc(Create a managed tensor that will be destroyed by this manager if it @@ -264,18 +266,26 @@ of algorithm and parameter components which can be used with shaders. By default it enables the user to provide a dynamic number of tensors which are then passed as inputs.)doc"; -static const char *__doc_kp_OpAlgoDispatch_OpAlgoDispatch = R"doc()doc"; +static const char *__doc_kp_OpAlgoDispatch_OpAlgoDispatch = +R"doc(Constructor that stores the algorithm to use as well as the relevant +push constants to override when recording. + +@param algorithm The algorithm object to use for dispatch @param +pushConstants The push constants to use for override)doc"; static const char *__doc_kp_OpAlgoDispatch_mAlgorithm = R"doc()doc"; static const char *__doc_kp_OpAlgoDispatch_mPushConstants = R"doc()doc"; static const char *__doc_kp_OpAlgoDispatch_postEval = -R"doc(Executes after the recorded commands are submitted, and performs a -copy of the GPU Device memory into the staging buffer so the output -data can be retrieved.)doc"; +R"doc(Does not perform any postEval commands. -static const char *__doc_kp_OpAlgoDispatch_preEval = R"doc(Does not perform any preEval commands.)doc"; +@param commandBuffer The command buffer to record the command into.)doc"; + +static const char *__doc_kp_OpAlgoDispatch_preEval = +R"doc(Does not perform any preEval commands. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_OpAlgoDispatch_record = R"doc(This records the commands that are to be sent to the GPU. This @@ -283,7 +293,9 @@ includes the barriers that ensure the memory has been copied before going in and out of the shader, as well as the dispatch operation that sends the shader processing to the gpu. This function also records the GPU memory copy of the output data for the staging buffer so it can be -read by the host.)doc"; +read by the host. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_OpBase = R"doc(Base Operation which provides the high level interface that Kompute @@ -299,7 +311,9 @@ the commands to the GPU for processing, and can be used to perform any tear-down steps required as the computation iteration finishes. It's worth noting that there are situations where eval can be called multiple times, so the resources that are destroyed should not require -a re-init unless explicitly provided by the user.)doc"; +a re-init unless explicitly provided by the user. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_OpBase_preEval = R"doc(Pre eval is called before the Sequence has called eval and submitted @@ -307,12 +321,16 @@ the commands to the GPU for processing, and can be used to perform any per-eval setup steps required as the computation iteration begins. It's worth noting that there are situations where eval can be called multiple times, so the resources that are created should be idempotent -in case it's called multiple times in a row.)doc"; +in case it's called multiple times in a row. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_OpBase_record = R"doc(The record function is intended to only send a record command or run commands that are expected to record operations that are to be -submitted as a batch into the GPU.)doc"; +submitted as a batch into the GPU. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_OpMult = R"doc(Operation that performs multiplication on two tensors and outpus on @@ -323,12 +341,9 @@ R"doc(Default constructor with parameters that provides the bare minimum requirements for the operations to be able to create and manage their sub-components. -@param physicalDevice Vulkan physical device used to find device -queues @param device Vulkan logical device for passing to Algorithm -@param commandBuffer Vulkan Command Buffer to record commands into @param tensors Tensors that are to be used in this operation @param -komputeWorkgroup Optional parameter to specify the layout for -processing)doc"; +algorithm An algorithm that will be overridden with the OpMult shader +data and the tensors provided which are expected to be 3)doc"; static const char *__doc_kp_OpTensorCopy = R"doc(Operation that copies the data from the first tensor to the rest of @@ -340,84 +355,95 @@ static const char *__doc_kp_OpTensorCopy_OpTensorCopy = R"doc(Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. -@param physicalDevice Vulkan physical device used to find device -queues @param device Vulkan logical device for passing to Algorithm -@param commandBuffer Vulkan Command Buffer to record commands into @param tensors Tensors that will be used to create in operation.)doc"; static const char *__doc_kp_OpTensorCopy_mTensors = R"doc()doc"; static const char *__doc_kp_OpTensorCopy_postEval = R"doc(Copies the local vectors for all the tensors to sync the data with the -gpu.)doc"; +gpu. -static const char *__doc_kp_OpTensorCopy_preEval = R"doc(Does not perform any preEval commands.)doc"; +@param commandBuffer The command buffer to record the command into.)doc"; + +static const char *__doc_kp_OpTensorCopy_preEval = +R"doc(Does not perform any preEval commands. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_OpTensorCopy_record = R"doc(Records the copy commands from the first tensor into all the other -tensors provided. Also optionally records a barrier.)doc"; +tensors provided. Also optionally records a barrier. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_OpTensorSyncDevice = R"doc(Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For -TensorTypes::eStaging it will only map the data into host memory which +TensorTypes::eHost it will only map the data into host memory which will happen during preEval before the recorded commands are -dispatched. This operation won't have any effect on -TensorTypes::eStaging.)doc"; +dispatched.)doc"; static const char *__doc_kp_OpTensorSyncDevice_OpTensorSyncDevice = R"doc(Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensos provided cannot be of type TensorTypes::eStorage. -@param physicalDevice Vulkan physical device used to find device -queues @param device Vulkan logical device for passing to Algorithm -@param commandBuffer Vulkan Command Buffer to record commands into @param tensors Tensors that will be used to create in operation.)doc"; static const char *__doc_kp_OpTensorSyncDevice_mTensors = R"doc()doc"; -static const char *__doc_kp_OpTensorSyncDevice_postEval = R"doc(Does not perform any postEval commands.)doc"; +static const char *__doc_kp_OpTensorSyncDevice_postEval = +R"doc(Does not perform any postEval commands. -static const char *__doc_kp_OpTensorSyncDevice_preEval = R"doc(Does not perform any preEval commands.)doc"; +@param commandBuffer The command buffer to record the command into.)doc"; + +static const char *__doc_kp_OpTensorSyncDevice_preEval = +R"doc(Does not perform any preEval commands. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_OpTensorSyncDevice_record = R"doc(For device tensors, it records the copy command for the tensor to copy -the data from its staging to device memory.)doc"; +the data from its staging to device memory. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_OpTensorSyncLocal = R"doc(Operation that syncs tensor's local memory by mapping device data into the local CPU memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For -TensorTypes::eStaging it will only map the data into host memory which +TensorTypes::eHost it will only map the data into host memory which will happen during preEval before the recorded commands are -dispatched. This operation won't have any effect on -TensorTypes::eStaging.)doc"; +dispatched.)doc"; static const char *__doc_kp_OpTensorSyncLocal_OpTensorSyncLocal = R"doc(Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensors provided cannot be of type TensorTypes::eStorage. -@param physicalDevice Vulkan physical device used to find device -queues @param device Vulkan logical device for passing to Algorithm -@param commandBuffer Vulkan Command Buffer to record commands into @param tensors Tensors that will be used to create in operation.)doc"; static const char *__doc_kp_OpTensorSyncLocal_mTensors = R"doc()doc"; static const char *__doc_kp_OpTensorSyncLocal_postEval = R"doc(For host tensors it performs the map command from the host memory into -local memory.)doc"; +local memory. -static const char *__doc_kp_OpTensorSyncLocal_preEval = R"doc(Does not perform any preEval commands.)doc"; +@param commandBuffer The command buffer to record the command into.)doc"; + +static const char *__doc_kp_OpTensorSyncLocal_preEval = +R"doc(Does not perform any preEval commands. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_OpTensorSyncLocal_record = R"doc(For device tensors, it records the copy command for the tensor to copy -the data from its device to staging memory.)doc"; +the data from its device to staging memory. + +@param commandBuffer The command buffer to record the command into.)doc"; static const char *__doc_kp_Sequence = R"doc(Container of operations that can be sent to GPU as batch)doc"; @@ -427,7 +453,8 @@ generate all dependent resources. @param physicalDevice Vulkan physical device @param device Vulkan logical device @param computeQueue Vulkan compute queue @param -queueIndex Vulkan compute queue index in device)doc"; +queueIndex Vulkan compute queue index in device @param totalTimestamps +Maximum number of timestamps to allocate)doc"; static const char *__doc_kp_Sequence_begin = R"doc(Begins recording commands for commands to be submitted into the @@ -443,6 +470,8 @@ static const char *__doc_kp_Sequence_createCommandBuffer = R"doc()doc"; static const char *__doc_kp_Sequence_createCommandPool = R"doc()doc"; +static const char *__doc_kp_Sequence_createTimestampQueryPool = R"doc()doc"; + static const char *__doc_kp_Sequence_destroy = R"doc(Destroys and frees the GPU resources which include the buffer and memory and sets the sequence as init=False.)doc"; @@ -528,6 +557,10 @@ finishes, it runs the postEval of all operations. @param waitFor Number of milliseconds to wait before timing out. @return shared_ptr of the Sequence class itself)doc"; +static const char *__doc_kp_Sequence_getTimestamps = +R"doc(Return the timestamps that were latched at the beginning and after +each operation during the last eval() call.)doc"; + static const char *__doc_kp_Sequence_isInit = R"doc(Returns true if the sequence has been initialised, and it's based on the GPU resources being refrenced. @@ -607,9 +640,11 @@ R"doc(Clears command buffer and triggers re-record of all the current operations saved, which is useful if the underlying kp::Tensors or kp::Algorithms are modified and need to be re-recorded.)doc"; +static const char *__doc_kp_Sequence_timestampQueryPool = R"doc()doc"; + static const char *__doc_kp_Shader = R"doc(Shader utily class with functions to compile and process glsl files.)doc"; -static const char *__doc_kp_Shader_compile_source = +static const char *__doc_kp_Shader_compileSource = R"doc(Compile a single glslang source from string value. Currently this function uses the glslang C++ interface which is not thread safe so this funciton should not be called from multiple threads concurrently. @@ -622,7 +657,7 @@ List of pairs containing key value definitions @param resourcesLimit A list that contains the resource limits for the GLSL compiler @return The compiled SPIR-V binary in unsigned int32 format)doc"; -static const char *__doc_kp_Shader_compile_sources = +static const char *__doc_kp_Shader_compileSources = R"doc(Compile multiple sources with optional filenames. Currently this function uses the glslang C++ interface which is not thread safe so this funciton should not be called from multiple threads concurrently. @@ -645,11 +680,13 @@ buffer, which would be used to store their respective data. The tensors can be used for GPU data storage or transfer.)doc"; static const char *__doc_kp_Tensor_Tensor = -R"doc(Default constructor with data provided which would be used to create -the respective vulkan buffer and memory. +R"doc(Constructor with data provided which would be used to create the +respective vulkan buffer and memory. +@param physicalDevice The physical device to use to fetch properties +@param device The device to use to create the buffer and memory from @param data Non-zero-sized vector of data that will be used by the -tensor @param tensorType Type for the tensor which is of type +tensor @param tensorTypes Type for the tensor which is of type TensorTypes)doc"; static const char *__doc_kp_Tensor_TensorTypes = @@ -697,7 +734,11 @@ static const char *__doc_kp_Tensor_getStagingBufferUsageFlags = R"doc()doc"; static const char *__doc_kp_Tensor_getStagingMemoryPropertyFlags = R"doc()doc"; -static const char *__doc_kp_Tensor_isInit = R"doc()doc"; +static const char *__doc_kp_Tensor_isInit = +R"doc(Check whether tensor is initialized based on the created gpu +resources. + +@returns Boolean stating whether tensor is initialized)doc"; static const char *__doc_kp_Tensor_mData = R"doc()doc"; @@ -742,11 +783,11 @@ vector's. Returns the element in the position requested.)doc"; static const char *__doc_kp_Tensor_rebuild = -R"doc(Initialiser which calls the initialisation for all the respective -tensors as well as creates the respective staging tensors. The staging -tensors would only be created for the tensors of type -TensorType::eDevice as otherwise there is no need to copy from host -memory.)doc"; +R"doc(Function to trigger reinitialisation of the tensor buffer and memory +with new data as well as new potential device type. + +@param data Vector of data to use to initialise vector from @param +tensorType The type to use for the tensor)doc"; static const char *__doc_kp_Tensor_recordBufferMemoryBarrier = R"doc(Records the buffer memory barrier into the command buffer which diff --git a/python/src/main.cpp b/python/src/main.cpp index 7165d41e7..d4b0f2084 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -26,9 +26,9 @@ PYBIND11_MODULE(kp, m) { py::module_ np = py::module_::import("numpy"); py::enum_(m, "TensorTypes") - .value("device", kp::Tensor::TensorTypes::eDevice, "Tensor holding data in GPU memory.") - .value("host", kp::Tensor::TensorTypes::eHost, "Tensor used for CPU visible GPU data.") - .value("storage", kp::Tensor::TensorTypes::eStorage, "Tensor with host visible gpu memory.") + .value("device", kp::Tensor::TensorTypes::eDevice, DOC(kp, Tensor, TensorTypes, eDevice)) + .value("host", kp::Tensor::TensorTypes::eHost, DOC(kp, Tensor, TensorTypes, eHost)) + .value("storage", kp::Tensor::TensorTypes::eStorage, DOC(kp, Tensor, TensorTypes, eStorage)) .export_values(); #if !defined(KOMPUTE_DISABLE_SHADER_UTILS) || !KOMPUTE_DISABLE_SHADER_UTILS @@ -37,51 +37,63 @@ PYBIND11_MODULE(kp, m) { const std::string& source, const std::string& entryPoint, const std::vector>& definitions) { - std::vector spirv = kp::Shader::compile_source(source, entryPoint, definitions); + std::vector spirv = kp::Shader::compileSource(source, entryPoint, definitions); return py::bytes((const char*)spirv.data(), spirv.size() * sizeof(uint32_t)); }, - "Compiles string source provided and returns the value in bytes", - py::arg("source"), py::arg("entryPoint") = "main", py::arg("definitions") = std::vector>() ) + DOC(kp, Shader, compileSource), + py::arg("source"), + py::arg("entryPoint") = "main", + py::arg("definitions") = std::vector>() ) .def_static("compile_sources", []( const std::vector& source, const std::vector& files, const std::string& entryPoint, const std::vector>& definitions) { - std::vector spirv = kp::Shader::compile_sources(source, files, entryPoint, definitions); + std::vector spirv = kp::Shader::compileSources(source, files, entryPoint, definitions); return py::bytes((const char*)spirv.data(), spirv.size() * sizeof(uint32_t)); }, - "Compiles sources provided with file names and returns the value in bytes", - py::arg("sources"), py::arg("files") = std::vector(), py::arg("entryPoint") = "main", py::arg("definitions") = std::vector>() ); + DOC(kp, Shader, compileSources), + py::arg("sources"), + py::arg("files") = std::vector(), + py::arg("entryPoint") = "main", + py::arg("definitions") = std::vector>() ); #endif // KOMPUTE_DISABLE_SHADER_UTILS - py::class_>(m, "OpBase"); + py::class_>(m, "OpBase", DOC(kp, OpBase)); - py::class_>(m, "OpTensorSyncDevice", py::base()) - .def(py::init>&>()); + py::class_>( + m, "OpTensorSyncDevice", py::base(), DOC(kp, OpTensorSyncDevice)) + .def(py::init>&>(), DOC(kp, OpTensorSyncDevice, OpTensorSyncDevice)); - py::class_>(m, "OpTensorSyncLocal", py::base()) - .def(py::init>&>()); + py::class_>( + m, "OpTensorSyncLocal", py::base(), DOC(kp, OpTensorSyncLocal)) + .def(py::init>&>(), DOC(kp, OpTensorSyncLocal, OpTensorSyncLocal)); - py::class_>(m, "OpTensorCopy", py::base()) - .def(py::init>&>()); + py::class_>( + m, "OpTensorCopy", py::base(), DOC(kp, OpTensorCopy)) + .def(py::init>&>(), DOC(kp, OpTensorCopy, OpTensorCopy)); - py::class_>(m, "OpAlgoDispatch", py::base()) + py::class_>( + m, "OpAlgoDispatch", py::base(), DOC(kp, OpAlgoDispatch)) .def(py::init&,const kp::Constants&>(), + DOC(kp, OpAlgoDispatch, OpAlgoDispatch), py::arg("algorithm"), py::arg("push_consts") = kp::Constants()); - py::class_>(m, "OpMult", py::base()) - .def(py::init>&,const std::shared_ptr&>()); + py::class_>( + m, "OpMult", py::base(), DOC(kp, OpMult)) + .def(py::init>&,const std::shared_ptr&>(), + DOC(kp, OpMult, OpMult)); - py::class_>(m, "Algorithm") - .def("get_tensors", &kp::Algorithm::getTensors) - .def("destroy", &kp::Algorithm::destroy) - .def("get_spec_consts", &kp::Algorithm::getSpecializationConstants) - .def("is_init", &kp::Algorithm::isInit); + py::class_>(m, "Algorithm", DOC(kp, Algorithm, Algorithm)) + .def("get_tensors", &kp::Algorithm::getTensors, DOC(kp, Algorithm, getTensors)) + .def("destroy", &kp::Algorithm::destroy, DOC(kp, Algorithm, destroy)) + .def("get_spec_consts", &kp::Algorithm::getSpecializationConstants, DOC(kp, Algorithm, getSpecializationConstants)) + .def("is_init", &kp::Algorithm::isInit, DOC(kp, Algorithm, isInit)); py::class_>(m, "Tensor", DOC(kp, Tensor)) .def("data", [](kp::Tensor& self) { return py::array(self.data().size(), self.data().data()); - }, "Returns stored data as a new numpy array.") + }, DOC(kp, Tensor, data)) .def("__getitem__", [](kp::Tensor &self, size_t index) -> float { return self.data()[index]; }, "When only an index is necessary") .def("__setitem__", [](kp::Tensor &self, size_t index, float value) { @@ -91,7 +103,7 @@ PYBIND11_MODULE(kp, m) { const py::buffer_info info = flatdata.request(); const float* ptr = (float*) info.ptr; self.setData(std::vector(ptr, ptr+flatdata.size())); - }, "Overrides the data in the local Tensor memory.") + }, DOC(kp, Tensor, setData)) .def("__iter__", [](kp::Tensor &self) { return py::make_iterator(self.data().begin(), self.data().end()); }, py::keep_alive<0, 1>(), // Required to keep alive iterator while exists @@ -112,35 +124,52 @@ PYBIND11_MODULE(kp, m) { } return reversed; }) - .def("size", &kp::Tensor::size, "Retrieves the size of the Tensor data as per the local Tensor memory.") - .def("__len__", &kp::Tensor::size, "Retrieves the size of the Tensor data as per the local Tensor memory.") - .def("tensor_type", &kp::Tensor::tensorType, "Retreves the memory type of the tensor.") - .def("is_init", &kp::Tensor::isInit, "Checks whether the tensor GPU memory has been initialised.") - .def("destroy", &kp::Tensor::destroy, "Destroy tensor GPU resources."); + .def("size", &kp::Tensor::size, DOC(kp, Tensor, size)) + .def("__len__", &kp::Tensor::size, DOC(kp, Tensor, size)) + .def("tensor_type", &kp::Tensor::tensorType, DOC(kp, Tensor, tensorType)) + .def("is_init", &kp::Tensor::isInit, DOC(kp, Tensor, isInit)) + .def("destroy", &kp::Tensor::destroy, DOC(kp, Tensor, destroy)); - py::class_>(m, "Sequence") - .def("record", [](kp::Sequence& self, std::shared_ptr op) { return self.record(op); }) - .def("eval", [](kp::Sequence& self) { return self.eval(); }) - .def("eval", [](kp::Sequence& self, std::shared_ptr op) { return self.eval(op); }) - .def("eval_async", [](kp::Sequence& self) { return self.eval(); }) - .def("eval_async", [](kp::Sequence& self, std::shared_ptr op) { return self.evalAsync(op); }) - .def("eval_await", [](kp::Sequence& self) { return self.evalAwait(); }) - .def("eval_await", [](kp::Sequence& self, uint32_t wait) { return self.evalAwait(wait); }) - .def("is_recording", &kp::Sequence::isRecording) - .def("is_running", &kp::Sequence::isRunning) - .def("is_init", &kp::Sequence::isInit) - .def("get_timestamps", &kp::Sequence::getTimestamps) - .def("clear", &kp::Sequence::clear) - .def("destroy", &kp::Sequence::destroy); + py::class_>(m, "Sequence", DOC(kp, Sequence)) + .def("record", [](kp::Sequence& self, std::shared_ptr op) { return self.record(op); }, + DOC(kp, Sequence, record)) + .def("eval", [](kp::Sequence& self) { return self.eval(); }, + DOC(kp, Sequence, eval)) + .def("eval", [](kp::Sequence& self, std::shared_ptr op) { return self.eval(op); }, + DOC(kp, Sequence, eval)) + .def("eval_async", [](kp::Sequence& self) { return self.eval(); }, + DOC(kp, Sequence, evalAsync)) + .def("eval_async", [](kp::Sequence& self, std::shared_ptr op) { return self.evalAsync(op); }, + DOC(kp, Sequence, evalAsync)) + .def("eval_await", [](kp::Sequence& self) { return self.evalAwait(); }, + DOC(kp, Sequence, evalAwait)) + .def("eval_await", [](kp::Sequence& self, uint32_t wait) { return self.evalAwait(wait); }, + DOC(kp, Sequence, evalAwait)) + .def("is_recording", &kp::Sequence::isRecording, + DOC(kp, Sequence, isRecording)) + .def("is_running", &kp::Sequence::isRunning, + DOC(kp, Sequence, isRunning)) + .def("is_init", &kp::Sequence::isInit, + DOC(kp, Sequence, isInit)) + .def("clear", &kp::Sequence::clear, + DOC(kp, Sequence, clear)) + .def("rerecord", &kp::Sequence::rerecord, + DOC(kp, Sequence, rerecord)) + .def("get_timestamps", &kp::Sequence::getTimestamps, + DOC(kp, Sequence, getTimestamps)) + .def("destroy", &kp::Sequence::destroy, + DOC(kp, Sequence, destroy)); - py::class_>(m, "Manager") - .def(py::init()) - .def(py::init()) + py::class_>(m, "Manager", DOC(kp, Manager)) + .def(py::init(), DOC(kp, Manager, Manager)) + .def(py::init(), DOC(kp, Manager, Manager_2)) .def(py::init&,const std::vector&>(), + DOC(kp, Manager, Manager_2), py::arg("device") = 0, py::arg("family_queue_indices") = std::vector(), py::arg("desired_extensions") = std::vector()) - .def("sequence", &kp::Manager::sequence, py::arg("queue_index") = 0, py::arg("total_timestamps") = 0) + .def("sequence", &kp::Manager::sequence, DOC(kp, Manager, sequence), + py::arg("queue_index") = 0, py::arg("total_timestamps") = 0) .def("tensor", [np](kp::Manager& self, const py::array_t data, kp::Tensor::TensorTypes tensor_type) { @@ -149,7 +178,7 @@ PYBIND11_MODULE(kp, m) { const float* ptr = (float*) info.ptr; return self.tensor(std::vector(ptr, ptr+flatdata.size()), tensor_type); }, - "Tensor initialisation function with data and tensor type", + DOC(kp, Manager, tensor), py::arg("data"), py::arg("tensor_type") = kp::Tensor::TensorTypes::eDevice) .def("algorithm", [](kp::Manager& self, const std::vector>& tensors, @@ -163,8 +192,12 @@ PYBIND11_MODULE(kp, m) { std::vector spirvVec((uint32_t*)data, (uint32_t*)(data + length)); return self.algorithm(tensors, spirvVec, workgroup, spec_consts, push_consts); }, - "Algorithm initialisation function", - py::arg("tensors"), py::arg("spirv"), py::arg("workgroup") = kp::Workgroup(), py::arg("spec_consts") = kp::Constants(), py::arg("push_consts") = kp::Constants()); + DOC(kp, Manager, algorithm), + py::arg("tensors"), + py::arg("spirv"), + py::arg("workgroup") = kp::Workgroup(), + py::arg("spec_consts") = kp::Constants(), + py::arg("push_consts") = kp::Constants()); #ifdef VERSION_INFO m.attr("__version__") = VERSION_INFO;