Merge pull request #286 from COM8/clang-format

Added .clang-format file and formatted everything
This commit is contained in:
Alejandro Saucedo 2022-05-04 07:42:30 +01:00 committed by GitHub
commit de46d30678
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
35 changed files with 1198 additions and 1051 deletions

5
.clang-format Normal file
View file

@ -0,0 +1,5 @@
---
BasedOnStyle: Mozilla
IndentWidth: 4
...

View file

@ -198,7 +198,9 @@ win_build_xxd:
cd external/bin/ && gcc.exe -o xxd.exe xxd.c -DCYGWIN
format:
$(CLANG_FORMAT_BIN) -i -style="{BasedOnStyle: mozilla, IndentWidth: 4}" src/*.cpp src/include/kompute/*.hpp test/*cpp
for val in "examples single_include src test" ; do \
find $$val -depth -iname *.h -or -iname *.c -or -iname *.hpp -or -iname *.cpp | grep -v "shaders" | xargs $(CLANG_FORMAT_BIN) -style=file -i; \
done
static_scan:
cppcheck --project=build/compile_commands.json -iexternal/

View file

@ -12,17 +12,16 @@
// See the License for the specific language governing permissions and
// limitations under the License.
// Includes the Jni utilities for Android to be able to create the
// relevant bindings for java, including JNIEXPORT, JNICALL , and
// Includes the Jni utilities for Android to be able to create the
// relevant bindings for java, including JNIEXPORT, JNICALL , and
// other "j-variables".
#include <jni.h>
// The ML class exposing the Kompute ML workflow for training and
// The ML class exposing the Kompute ML workflow for training and
// prediction of inference data.
#include "KomputeModelML.hpp"
// Allows us to use the C++ sleep function to wait when loading the
// Allows us to use the C++ sleep function to wait when loading the
// Vulkan library in android
#include <unistd.h>
@ -30,86 +29,92 @@
#define KOMPUTE_VK_INIT_RETRIES 5
#endif
static std::vector<float> jfloatArrayToVector(JNIEnv *env, const jfloatArray & fromArray) {
float *inCArray = env->GetFloatArrayElements(fromArray, NULL);
if (NULL == inCArray) return std::vector<float>();
static std::vector<float>
jfloatArrayToVector(JNIEnv* env, const jfloatArray& fromArray)
{
float* inCArray = env->GetFloatArrayElements(fromArray, NULL);
if (NULL == inCArray)
return std::vector<float>();
int32_t length = env->GetArrayLength(fromArray);
std::vector<float> outVector(inCArray, inCArray + length);
return outVector;
}
static jfloatArray vectorToJFloatArray(JNIEnv *env, const std::vector<float> & fromVector) {
static jfloatArray
vectorToJFloatArray(JNIEnv* env, const std::vector<float>& fromVector)
{
jfloatArray ret = env->NewFloatArray(fromVector.size());
if (NULL == ret) return NULL;
if (NULL == ret)
return NULL;
env->SetFloatArrayRegion(ret, 0, fromVector.size(), fromVector.data());
return ret;
}
extern "C" {
extern "C"
{
JNIEXPORT jboolean JNICALL
Java_com_ethicalml_kompute_KomputeJni_initVulkan(JNIEnv *env, jobject thiz) {
JNIEXPORT jboolean JNICALL
Java_com_ethicalml_kompute_KomputeJni_initVulkan(JNIEnv* env, jobject thiz)
{
KP_LOG_INFO("Initialising vulkan");
KP_LOG_INFO("Initialising vulkan");
uint32_t totalRetries = 0;
uint32_t totalRetries = 0;
while (totalRetries < KOMPUTE_VK_INIT_RETRIES) {
KP_LOG_INFO("VULKAN LOAD TRY NUMBER: %u", totalRetries);
if(InitVulkan()) {
break;
while (totalRetries < KOMPUTE_VK_INIT_RETRIES) {
KP_LOG_INFO("VULKAN LOAD TRY NUMBER: %u", totalRetries);
if (InitVulkan()) {
break;
}
sleep(1);
totalRetries++;
}
sleep(1);
totalRetries++;
return totalRetries < KOMPUTE_VK_INIT_RETRIES;
}
return totalRetries < KOMPUTE_VK_INIT_RETRIES;
}
JNIEXPORT jfloatArray JNICALL
Java_com_ethicalml_kompute_KomputeJni_kompute(
JNIEnv *env,
jobject thiz,
jfloatArray xiJFloatArr,
jfloatArray xjJFloatArr,
jfloatArray yJFloatArr) {
KP_LOG_INFO("Creating manager");
std::vector<float> xiVector = jfloatArrayToVector(env, xiJFloatArr);
std::vector<float> xjVector = jfloatArrayToVector(env, xjJFloatArr);
std::vector<float> yVector = jfloatArrayToVector(env, yJFloatArr);
KomputeModelML kml;
kml.train(yVector, xiVector, xjVector);
std::vector<float> pred = kml.predict(xiVector, xjVector);
return vectorToJFloatArray(env, pred);
}
JNIEXPORT jfloatArray JNICALL
Java_com_ethicalml_kompute_KomputeJni_komputeParams(
JNIEnv *env,
jobject thiz,
jfloatArray xiJFloatArr,
jfloatArray xjJFloatArr,
jfloatArray yJFloatArr) {
KP_LOG_INFO("Creating manager");
std::vector<float> xiVector = jfloatArrayToVector(env, xiJFloatArr);
std::vector<float> xjVector = jfloatArrayToVector(env, xjJFloatArr);
std::vector<float> yVector = jfloatArrayToVector(env, yJFloatArr);
KomputeModelML kml;
kml.train(yVector, xiVector, xjVector);
std::vector<float> params = kml.get_params();
return vectorToJFloatArray(env, params);
}
JNIEXPORT jfloatArray JNICALL
Java_com_ethicalml_kompute_KomputeJni_kompute(JNIEnv* env,
jobject thiz,
jfloatArray xiJFloatArr,
jfloatArray xjJFloatArr,
jfloatArray yJFloatArr)
{
KP_LOG_INFO("Creating manager");
std::vector<float> xiVector = jfloatArrayToVector(env, xiJFloatArr);
std::vector<float> xjVector = jfloatArrayToVector(env, xjJFloatArr);
std::vector<float> yVector = jfloatArrayToVector(env, yJFloatArr);
KomputeModelML kml;
kml.train(yVector, xiVector, xjVector);
std::vector<float> pred = kml.predict(xiVector, xjVector);
return vectorToJFloatArray(env, pred);
}
JNIEXPORT jfloatArray JNICALL
Java_com_ethicalml_kompute_KomputeJni_komputeParams(JNIEnv* env,
jobject thiz,
jfloatArray xiJFloatArr,
jfloatArray xjJFloatArr,
jfloatArray yJFloatArr)
{
KP_LOG_INFO("Creating manager");
std::vector<float> xiVector = jfloatArrayToVector(env, xiJFloatArr);
std::vector<float> xjVector = jfloatArrayToVector(env, xjJFloatArr);
std::vector<float> yVector = jfloatArrayToVector(env, yJFloatArr);
KomputeModelML kml;
kml.train(yVector, xiVector, xjVector);
std::vector<float> params = kml.get_params();
return vectorToJFloatArray(env, params);
}
}

View file

@ -1,15 +1,15 @@
#include "KomputeModelML.hpp"
KomputeModelML::KomputeModelML() {
KomputeModelML::KomputeModelML() {}
}
KomputeModelML::~KomputeModelML() {}
KomputeModelML::~KomputeModelML() {
}
void KomputeModelML::train(std::vector<float> yData, std::vector<float> xIData, std::vector<float> xJData) {
void
KomputeModelML::train(std::vector<float> yData,
std::vector<float> xIData,
std::vector<float> xJData)
{
std::vector<float> zerosData;
@ -42,17 +42,19 @@ void KomputeModelML::train(std::vector<float> yData, std::vector<float> xIData,
bIn, bOut, lOut };
std::vector<uint32_t> spirv = std::vector<uint32_t>(
(uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
(uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv +
kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));
(uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
(uint32_t*)(kp::shader_data::
shaders_glsl_logisticregression_comp_spv +
kp::shader_data::
shaders_glsl_logisticregression_comp_spv_len));
std::shared_ptr<kp::Algorithm> algorithm = mgr.algorithm(
params, spirv, kp::Workgroup({ 5 }), std::vector<float>({ 5.0 }));
params, spirv, kp::Workgroup({ 5 }), std::vector<float>({ 5.0 }));
mgr.sequence()->eval<kp::OpTensorSyncDevice>(params);
std::shared_ptr<kp::Sequence> sq = mgr.sequence()
std::shared_ptr<kp::Sequence> sq =
mgr.sequence()
->record<kp::OpTensorSyncDevice>({ wIn, bIn })
->record<kp::OpAlgoDispatch>(algorithm)
->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
@ -79,7 +81,9 @@ void KomputeModelML::train(std::vector<float> yData, std::vector<float> xIData,
}
}
std::vector<float> KomputeModelML::predict(std::vector<float> xI, std::vector<float> xJ) {
std::vector<float>
KomputeModelML::predict(std::vector<float> xI, std::vector<float> xJ)
{
KP_LOG_INFO("Running prediction inference");
@ -93,9 +97,8 @@ std::vector<float> KomputeModelML::predict(std::vector<float> xI, std::vector<fl
for (size_t i = 0; i < xI.size(); i++) {
float xIVal = xI[i];
float xJVal = xJ[i];
float result = (xIVal * this->mWeights[0]
+ xJVal * this->mWeights[1]
+ this->mBias[0]);
float result = (xIVal * this->mWeights[0] + xJVal * this->mWeights[1] +
this->mBias[0]);
// Instead of using sigmoid we'll just return full numbers
float var = result > 0 ? 1 : 0;
@ -107,13 +110,15 @@ std::vector<float> KomputeModelML::predict(std::vector<float> xI, std::vector<fl
return retVector;
}
std::vector<float> KomputeModelML::get_params() {
std::vector<float>
KomputeModelML::get_params()
{
KP_LOG_INFO("Displaying results");
std::vector<float> retVector;
if(this->mWeights.size() + this->mBias.size() == 0) {
if (this->mWeights.size() + this->mBias.size() == 0) {
return retVector;
}

View file

@ -2,28 +2,30 @@
#ifndef KOMPUTEMODELML_HPP
#define KOMPUTEMODELML_HPP
#include <vector>
#include <string>
#include <memory>
#include <string>
#include <vector>
#include "kompute/Kompute.hpp"
class KomputeModelML {
class KomputeModelML
{
public:
public:
KomputeModelML();
virtual ~KomputeModelML();
void train(std::vector<float> yData, std::vector<float> xIData, std::vector<float> xJData);
void train(std::vector<float> yData,
std::vector<float> xIData,
std::vector<float> xJData);
std::vector<float> predict(std::vector<float> xI, std::vector<float> xJ);
std::vector<float> get_params();
private:
private:
std::vector<float> mWeights;
std::vector<float> mBias;
};
static std::string LR_SHADER = R"(
@ -83,4 +85,4 @@ void main() {
}
)";
#endif //ANDROID_SIMPLE_KOMPUTEMODELML_HPP
#endif // ANDROID_SIMPLE_KOMPUTEMODELML_HPP

44
examples/array_multiplication/src/Main.cpp Executable file → Normal file
View file

@ -5,23 +5,27 @@
#include "kompute/Kompute.hpp"
static
std::vector<uint32_t>
compileSource(
const std::string& source)
static std::vector<uint32_t>
compileSource(const std::string& source)
{
std::ofstream fileOut("tmp_kp_shader.comp");
fileOut << source;
fileOut.close();
if (system(std::string("glslangValidator -V tmp_kp_shader.comp -o tmp_kp_shader.comp.spv").c_str()))
fileOut << source;
fileOut.close();
if (system(
std::string(
"glslangValidator -V tmp_kp_shader.comp -o tmp_kp_shader.comp.spv")
.c_str()))
throw std::runtime_error("Error running glslangValidator command");
std::ifstream fileStream("tmp_kp_shader.comp.spv", std::ios::binary);
std::vector<char> buffer;
buffer.insert(buffer.begin(), std::istreambuf_iterator<char>(fileStream), {});
return {(uint32_t*)buffer.data(), (uint32_t*)(buffer.data() + buffer.size())};
buffer.insert(
buffer.begin(), std::istreambuf_iterator<char>(fileStream), {});
return { (uint32_t*)buffer.data(),
(uint32_t*)(buffer.data() + buffer.size()) };
}
int main()
int
main()
{
#if KOMPUTE_ENABLE_SPDLOG
spdlog::set_level(
@ -53,21 +57,23 @@ int main()
}
)");
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorInA, tensorInB, tensorOut };
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorInA,
tensorInB,
tensorOut };
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(params, compileSource(shader));
std::shared_ptr<kp::Algorithm> algo =
mgr.algorithm(params, compileSource(shader));
mgr.sequence()
->record<kp::OpTensorSyncDevice>(params)
->record<kp::OpAlgoDispatch>(algo)
->record<kp::OpTensorSyncLocal>(params)
->eval();
->record<kp::OpTensorSyncDevice>(params)
->record<kp::OpAlgoDispatch>(algo)
->record<kp::OpTensorSyncLocal>(params)
->eval();
// prints "Output { 0 4 12 }"
std::cout<< "Output: { ";
std::cout << "Output: { ";
for (const float& elem : tensorOut->vector()) {
std::cout << elem << " ";
std::cout << elem << " ";
}
std::cout << "}" << std::endl;
}

View file

@ -4,55 +4,63 @@
#include "KomputeSummatorNode.h"
static
std::vector<uint32_t>
compileSource(
const std::string& source)
static std::vector<uint32_t>
compileSource(const std::string& source)
{
std::ofstream fileOut("tmp_kp_shader.comp");
fileOut << source;
fileOut.close();
if (system(std::string("glslangValidator -V tmp_kp_shader.comp -o tmp_kp_shader.comp.spv").c_str()))
fileOut << source;
fileOut.close();
if (system(
std::string(
"glslangValidator -V tmp_kp_shader.comp -o tmp_kp_shader.comp.spv")
.c_str()))
throw std::runtime_error("Error running glslangValidator command");
std::ifstream fileStream("tmp_kp_shader.comp.spv", std::ios::binary);
std::vector<char> buffer;
buffer.insert(buffer.begin(), std::istreambuf_iterator<char>(fileStream), {});
return {(uint32_t*)buffer.data(), (uint32_t*)(buffer.data() + buffer.size())};
buffer.insert(
buffer.begin(), std::istreambuf_iterator<char>(fileStream), {});
return { (uint32_t*)buffer.data(),
(uint32_t*)(buffer.data() + buffer.size()) };
}
KomputeSummatorNode::KomputeSummatorNode() {
KomputeSummatorNode::KomputeSummatorNode()
{
this->_init();
}
void KomputeSummatorNode::add(float value) {
void
KomputeSummatorNode::add(float value)
{
// Set the new data in the local device
this->mSecondaryTensor->setData({value});
this->mSecondaryTensor->setData({ value });
// Execute recorded sequence
if (std::shared_ptr<kp::Sequence> sq = this->mSequence) {
sq->eval();
}
else {
} else {
throw std::runtime_error("Sequence pointer no longer available");
}
}
void KomputeSummatorNode::reset() {
}
void
KomputeSummatorNode::reset()
{}
float KomputeSummatorNode::get_total() const {
float
KomputeSummatorNode::get_total() const
{
return this->mPrimaryTensor->data()[0];
}
void KomputeSummatorNode::_init() {
void
KomputeSummatorNode::_init()
{
std::cout << "CALLING INIT" << std::endl;
this->mPrimaryTensor = this->mManager.tensor({ 0.0 });
this->mSecondaryTensor = this->mManager.tensor({ 0.0 });
this->mSequence = this->mManager.sequence();
// We now record the steps in the sequence
if (std::shared_ptr<kp::Sequence> sq = this->mSequence)
{
if (std::shared_ptr<kp::Sequence> sq = this->mSequence) {
std::string shader(R"(
#version 450
@ -68,40 +76,38 @@ void KomputeSummatorNode::_init() {
}
)");
std::shared_ptr<kp::Algorithm> algo =
this->mManager.algorithm(
{ this->mPrimaryTensor, this->mSecondaryTensor },
compileSource(shader));
std::shared_ptr<kp::Algorithm> algo = this->mManager.algorithm(
{ this->mPrimaryTensor, this->mSecondaryTensor },
compileSource(shader));
// First we ensure secondary tensor loads to GPU
// No need to sync the primary tensor as it should not be changed
sq->record<kp::OpTensorSyncDevice>(
{ this->mSecondaryTensor });
sq->record<kp::OpTensorSyncDevice>({ this->mSecondaryTensor });
// Then we run the operation with both tensors
sq->record<kp::OpAlgoDispatch>(algo);
// We map the result back to local
sq->record<kp::OpTensorSyncLocal>(
{ this->mPrimaryTensor });
// We map the result back to local
sq->record<kp::OpTensorSyncLocal>({ this->mPrimaryTensor });
}
else {
} else {
throw std::runtime_error("Sequence pointer no longer available");
}
}
void KomputeSummatorNode::_process(float delta) {
void
KomputeSummatorNode::_process(float delta)
{}
}
void KomputeSummatorNode::_bind_methods() {
ClassDB::bind_method(D_METHOD("_process", "delta"), &KomputeSummatorNode::_process);
void
KomputeSummatorNode::_bind_methods()
{
ClassDB::bind_method(D_METHOD("_process", "delta"),
&KomputeSummatorNode::_process);
ClassDB::bind_method(D_METHOD("_init"), &KomputeSummatorNode::_init);
ClassDB::bind_method(D_METHOD("add", "value"), &KomputeSummatorNode::add);
ClassDB::bind_method(D_METHOD("reset"), &KomputeSummatorNode::reset);
ClassDB::bind_method(D_METHOD("get_total"), &KomputeSummatorNode::get_total);
ClassDB::bind_method(D_METHOD("get_total"),
&KomputeSummatorNode::get_total);
}

View file

@ -6,10 +6,11 @@
#include "scene/main/node.h"
class KomputeSummatorNode : public Node {
class KomputeSummatorNode : public Node
{
GDCLASS(KomputeSummatorNode, Node);
public:
public:
KomputeSummatorNode();
void add(float value);
@ -19,13 +20,12 @@ public:
void _process(float delta);
void _init();
protected:
protected:
static void _bind_methods();
private:
private:
kp::Manager mManager;
std::shared_ptr<kp::Sequence> mSequence;
std::shared_ptr<kp::Tensor> mPrimaryTensor;
std::shared_ptr<kp::Tensor> mSecondaryTensor;
};

View file

@ -2,13 +2,17 @@
#include "register_types.h"
#include "core/class_db.h"
#include "KomputeSummatorNode.h"
#include "core/class_db.h"
void register_kompute_summator_types() {
void
register_kompute_summator_types()
{
ClassDB::register_class<KomputeSummatorNode>();
}
void unregister_kompute_summator_types() {
// Nothing to do here in this example.
void
unregister_kompute_summator_types()
{
// Nothing to do here in this example.
}

View file

@ -1,6 +1,8 @@
/* register_types.h */
#pragma once
void register_kompute_summator_types();
void unregister_kompute_summator_types();
void
register_kompute_summator_types();
void
unregister_kompute_summator_types();
/* yes, the word in the middle must be the same as the module folder name */

View file

@ -1,14 +1,20 @@
#include "KomputeSummator.hpp"
extern "C" void GDN_EXPORT godot_gdnative_init(godot_gdnative_init_options *o) {
extern "C" void GDN_EXPORT
godot_gdnative_init(godot_gdnative_init_options* o)
{
godot::Godot::gdnative_init(o);
}
extern "C" void GDN_EXPORT godot_gdnative_terminate(godot_gdnative_terminate_options *o) {
extern "C" void GDN_EXPORT
godot_gdnative_terminate(godot_gdnative_terminate_options* o)
{
godot::Godot::gdnative_terminate(o);
}
extern "C" void GDN_EXPORT godot_nativescript_init(void *handle) {
extern "C" void GDN_EXPORT
godot_nativescript_init(void* handle)
{
godot::Godot::nativescript_init(handle);
godot::register_class<godot::KomputeSummator>();

View file

@ -1,49 +1,59 @@
/* summator.cpp */
#include <vector>
#include <iostream>
#include <vector>
#include "KomputeSummator.hpp"
static
std::vector<uint32_t>
compileSource(
const std::string& source)
static std::vector<uint32_t>
compileSource(const std::string& source)
{
std::ofstream fileOut("tmp_kp_shader.comp");
fileOut << source;
fileOut.close();
if (system(std::string("glslangValidator -V tmp_kp_shader.comp -o tmp_kp_shader.comp.spv").c_str()))
fileOut << source;
fileOut.close();
if (system(
std::string(
"glslangValidator -V tmp_kp_shader.comp -o tmp_kp_shader.comp.spv")
.c_str()))
throw std::runtime_error("Error running glslangValidator command");
std::ifstream fileStream("tmp_kp_shader.comp.spv", std::ios::binary);
std::vector<char> buffer;
buffer.insert(buffer.begin(), std::istreambuf_iterator<char>(fileStream), {});
return {(uint32_t*)buffer.data(), (uint32_t*)(buffer.data() + buffer.size())};
buffer.insert(
buffer.begin(), std::istreambuf_iterator<char>(fileStream), {});
return { (uint32_t*)buffer.data(),
(uint32_t*)(buffer.data() + buffer.size()) };
}
namespace godot {
KomputeSummator::KomputeSummator() {
KomputeSummator::KomputeSummator()
{
std::cout << "CALLING CONSTRUCTOR" << std::endl;
this->_init();
}
void KomputeSummator::add(float value) {
void
KomputeSummator::add(float value)
{
// Set the new data in the local device
this->mSecondaryTensor->setData({value});
this->mSecondaryTensor->setData({ value });
// Execute recorded sequence
this->mSequence->eval();
}
void KomputeSummator::reset() {
}
void
KomputeSummator::reset()
{}
float KomputeSummator::get_total() const {
float
KomputeSummator::get_total() const
{
return this->mPrimaryTensor->data()[0];
}
void KomputeSummator::_init() {
void
KomputeSummator::_init()
{
std::cout << "CALLING INIT" << std::endl;
this->mPrimaryTensor = this->mManager.tensor({ 0.0 });
this->mSecondaryTensor = this->mManager.tensor({ 0.0 });
@ -70,33 +80,34 @@ void KomputeSummator::_init() {
// First we ensure secondary tensor loads to GPU
// No need to sync the primary tensor as it should not be changed
this->mSequence->record<kp::OpTensorSyncDevice>(
{ this->mSecondaryTensor });
{ this->mSecondaryTensor });
// Then we run the operation with both tensors
this->mSequence->record<kp::OpAlgoCreate>(
{ this->mPrimaryTensor, this->mSecondaryTensor },
compileSource(shader));
{ this->mPrimaryTensor, this->mSecondaryTensor },
compileSource(shader));
// We map the result back to local
// We map the result back to local
this->mSequence->record<kp::OpTensorSyncLocal>(
{ this->mPrimaryTensor });
{ this->mPrimaryTensor });
this->mSequence->end();
}
}
void KomputeSummator::_process(float delta) {
void
KomputeSummator::_process(float delta)
{}
}
void
KomputeSummator::_register_methods()
{
register_method((char*)"_process", &KomputeSummator::_process);
register_method((char*)"_init", &KomputeSummator::_init);
void KomputeSummator::_register_methods() {
register_method((char *)"_process", &KomputeSummator::_process);
register_method((char *)"_init", &KomputeSummator::_init);
register_method((char *)"add", &KomputeSummator::add);
register_method((char *)"reset", &KomputeSummator::reset);
register_method((char *)"get_total", &KomputeSummator::get_total);
register_method((char*)"add", &KomputeSummator::add);
register_method((char*)"reset", &KomputeSummator::reset);
register_method((char*)"get_total", &KomputeSummator::get_total);
}
}

View file

@ -8,11 +8,12 @@
#include "kompute/Kompute.hpp"
namespace godot {
class KomputeSummator : public Node2D {
private:
class KomputeSummator : public Node2D
{
private:
GODOT_CLASS(KomputeSummator, Node2D);
public:
public:
KomputeSummator();
void add(float value);
@ -24,7 +25,7 @@ public:
static void _register_methods();
private:
private:
kp::Manager mManager;
std::shared_ptr<kp::Sequence> mSequence;
std::shared_ptr<kp::Tensor> mPrimaryTensor;

View file

@ -4,12 +4,15 @@
#include "KomputeModelMLNode.h"
KomputeModelMLNode::KomputeModelMLNode() {
KomputeModelMLNode::KomputeModelMLNode()
{
std::cout << "CALLING CONSTRUCTOR" << std::endl;
this->_init();
}
void KomputeModelMLNode::train(Array yArr, Array xIArr, Array xJArr) {
void
KomputeModelMLNode::train(Array yArr, Array xIArr, Array xJArr)
{
assert(yArr.size() == xIArr.size());
assert(xIArr.size() == xJArr.size());
@ -52,15 +55,19 @@ void KomputeModelMLNode::train(Array yArr, Array xIArr, Array xJArr) {
{
std::vector<uint32_t> spirv(
(uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
(uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv
+ kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));
(uint32_t*)
kp::shader_data::shaders_glsl_logisticregression_comp_spv,
(uint32_t*)(kp::shader_data::
shaders_glsl_logisticregression_comp_spv +
kp::shader_data::
shaders_glsl_logisticregression_comp_spv_len));
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(params, spirv);
mgr.sequence()->eval<kp::OpTensorSyncDevice>(params);
std::shared_ptr<kp::Sequence> sq = mgr.sequence()
std::shared_ptr<kp::Sequence> sq =
mgr.sequence()
->record<kp::OpTensorSyncDevice>({ wIn, bIn })
->record<kp::OpAlgoDispatch>(algo)
->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
@ -88,20 +95,22 @@ void KomputeModelMLNode::train(Array yArr, Array xIArr, Array xJArr) {
}
}
Array KomputeModelMLNode::predict(Array xI, Array xJ) {
Array
KomputeModelMLNode::predict(Array xI, Array xJ)
{
assert(xI.size() == xJ.size());
Array retArray;
// We run the inference in the CPU for simplicity
// BUt you can also implement the inference on GPU
// BUt you can also implement the inference on GPU
// GPU implementation would speed up minibatching
for (size_t i = 0; i < xI.size(); i++) {
float xIVal = xI[i];
float xJVal = xJ[i];
float result = (xIVal * this->mWeights.data()[0]
+ xJVal * this->mWeights.data()[1]
+ this->mBias.data()[0]);
float result =
(xIVal * this->mWeights.data()[0] + xJVal * this->mWeights.data()[1] +
this->mBias.data()[0]);
// Instead of using sigmoid we'll just return full numbers
Variant var = result > 0 ? 1 : 0;
@ -111,12 +120,14 @@ Array KomputeModelMLNode::predict(Array xI, Array xJ) {
return retArray;
}
Array KomputeModelMLNode::get_params() {
Array
KomputeModelMLNode::get_params()
{
Array retArray;
KP_LOG_INFO(this->mWeights.size() + this->mBias.size());
if(this->mWeights.size() + this->mBias.size() == 0) {
if (this->mWeights.size() + this->mBias.size() == 0) {
return retArray;
}
@ -128,20 +139,27 @@ Array KomputeModelMLNode::get_params() {
return retArray;
}
void KomputeModelMLNode::_init() {
void
KomputeModelMLNode::_init()
{
std::cout << "CALLING INIT" << std::endl;
}
void KomputeModelMLNode::_process(float delta) {
void
KomputeModelMLNode::_process(float delta)
{}
}
void KomputeModelMLNode::_bind_methods() {
ClassDB::bind_method(D_METHOD("_process", "delta"), &KomputeModelMLNode::_process);
void
KomputeModelMLNode::_bind_methods()
{
ClassDB::bind_method(D_METHOD("_process", "delta"),
&KomputeModelMLNode::_process);
ClassDB::bind_method(D_METHOD("_init"), &KomputeModelMLNode::_init);
ClassDB::bind_method(D_METHOD("train", "yArr", "xIArr", "xJArr"), &KomputeModelMLNode::train);
ClassDB::bind_method(D_METHOD("predict", "xI", "xJ"), &KomputeModelMLNode::predict);
ClassDB::bind_method(D_METHOD("get_params"), &KomputeModelMLNode::get_params);
ClassDB::bind_method(D_METHOD("train", "yArr", "xIArr", "xJArr"),
&KomputeModelMLNode::train);
ClassDB::bind_method(D_METHOD("predict", "xI", "xJ"),
&KomputeModelMLNode::predict);
ClassDB::bind_method(D_METHOD("get_params"),
&KomputeModelMLNode::get_params);
}

View file

@ -6,10 +6,11 @@
#include "scene/main/node.h"
class KomputeModelMLNode : public Node {
class KomputeModelMLNode : public Node
{
GDCLASS(KomputeModelMLNode, Node);
public:
public:
KomputeModelMLNode();
void train(Array y, Array xI, Array xJ);
@ -21,10 +22,10 @@ public:
void _process(float delta);
void _init();
protected:
protected:
static void _bind_methods();
private:
private:
kp::Tensor mWeights;
kp::Tensor mBias;
};
@ -85,4 +86,3 @@ void main() {
lout[idx] = calculateLoss(yHat, yCurr);
}
)";

View file

@ -2,13 +2,17 @@
#include "register_types.h"
#include "core/class_db.h"
#include "KomputeModelMLNode.h"
#include "core/class_db.h"
void register_kompute_model_ml_types() {
void
register_kompute_model_ml_types()
{
ClassDB::register_class<KomputeModelMLNode>();
}
void unregister_kompute_model_ml_types() {
// Nothing to do here in this example.
void
unregister_kompute_model_ml_types()
{
// Nothing to do here in this example.
}

View file

@ -1,6 +1,8 @@
/* register_types.h */
#pragma once
void register_kompute_model_ml_types();
void unregister_kompute_model_ml_types();
void
register_kompute_model_ml_types();
void
unregister_kompute_model_ml_types();
/* yes, the word in the middle must be the same as the module folder name */

View file

@ -1,14 +1,20 @@
#include "KomputeModelML.hpp"
extern "C" void GDN_EXPORT godot_gdnative_init(godot_gdnative_init_options *o) {
extern "C" void GDN_EXPORT
godot_gdnative_init(godot_gdnative_init_options* o)
{
godot::Godot::gdnative_init(o);
}
extern "C" void GDN_EXPORT godot_gdnative_terminate(godot_gdnative_terminate_options *o) {
extern "C" void GDN_EXPORT
godot_gdnative_terminate(godot_gdnative_terminate_options* o)
{
godot::Godot::gdnative_terminate(o);
}
extern "C" void GDN_EXPORT godot_nativescript_init(void *handle) {
extern "C" void GDN_EXPORT
godot_nativescript_init(void* handle)
{
godot::Godot::nativescript_init(handle);
godot::register_class<godot::KomputeModelML>();

View file

@ -1,19 +1,22 @@
#pragma once
#include <vector>
#include <string>
#include <iostream>
#include <string>
#include <vector>
#include "KomputeModelML.hpp"
namespace godot {
KomputeModelML::KomputeModelML() {
KomputeModelML::KomputeModelML()
{
std::cout << "CALLING CONSTRUCTOR" << std::endl;
this->_init();
}
void KomputeModelML::train(Array yArr, Array xIArr, Array xJArr) {
void
KomputeModelML::train(Array yArr, Array xIArr, Array xJArr)
{
assert(yArr.size() == xIArr.size());
assert(xIArr.size() == xJArr.size());
@ -56,15 +59,19 @@ void KomputeModelML::train(Array yArr, Array xIArr, Array xJArr) {
{
std::vector<uint32_t> spirv(
(uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
(uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv
+ kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));
(uint32_t*)
kp::shader_data::shaders_glsl_logisticregression_comp_spv,
(uint32_t*)(kp::shader_data::
shaders_glsl_logisticregression_comp_spv +
kp::shader_data::
shaders_glsl_logisticregression_comp_spv_len));
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(params, spirv);
mgr.sequence()->eval<kp::OpTensorSyncDevice>(params);
std::shared_ptr<kp::Sequence> sq = mgr.sequence()
std::shared_ptr<kp::Sequence> sq =
mgr.sequence()
->record<kp::OpTensorSyncDevice>({ wIn, bIn })
->record<kp::OpAlgoDispatch>(algo)
->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
@ -92,20 +99,22 @@ void KomputeModelML::train(Array yArr, Array xIArr, Array xJArr) {
}
}
Array KomputeModelML::predict(Array xI, Array xJ) {
Array
KomputeModelML::predict(Array xI, Array xJ)
{
assert(xI.size() == xJ.size());
Array retArray;
// We run the inference in the CPU for simplicity
// BUt you can also implement the inference on GPU
// BUt you can also implement the inference on GPU
// GPU implementation would speed up minibatching
for (size_t i = 0; i < xI.size(); i++) {
float xIVal = xI[i];
float xJVal = xJ[i];
float result = (xIVal * this->mWeights->data()[0]
+ xJVal * this->mWeights->data()[1]
+ this->mBias->data()[0]);
float result =
(xIVal * this->mWeights->data()[0] +
xJVal * this->mWeights->data()[1] + this->mBias->data()[0]);
// Instead of using sigmoid we'll just return full numbers
Variant var = result > 0 ? 1 : 0;
@ -115,12 +124,14 @@ Array KomputeModelML::predict(Array xI, Array xJ) {
return retArray;
}
Array KomputeModelML::get_params() {
Array
KomputeModelML::get_params()
{
Array retArray;
KP_LOG_INFO(this->mWeights->size() + this->mBias->size());
if(this->mWeights->size() + this->mBias->size() == 0) {
if (this->mWeights->size() + this->mBias->size() == 0) {
return retArray;
}
@ -132,22 +143,25 @@ Array KomputeModelML::get_params() {
return retArray;
}
void KomputeModelML::_init() {
void
KomputeModelML::_init()
{
std::cout << "CALLING INIT" << std::endl;
}
void KomputeModelML::_process(float delta) {
void
KomputeModelML::_process(float delta)
{}
}
void
KomputeModelML::_register_methods()
{
register_method((char*)"_process", &KomputeModelML::_process);
register_method((char*)"_init", &KomputeModelML::_init);
void KomputeModelML::_register_methods() {
register_method((char *)"_process", &KomputeModelML::_process);
register_method((char *)"_init", &KomputeModelML::_init);
register_method((char *)"train", &KomputeModelML::train);
register_method((char *)"predict", &KomputeModelML::predict);
register_method((char *)"get_params", &KomputeModelML::get_params);
register_method((char*)"train", &KomputeModelML::train);
register_method((char*)"predict", &KomputeModelML::predict);
register_method((char*)"get_params", &KomputeModelML::get_params);
}
}

View file

@ -1,19 +1,20 @@
#pragma once
#include <Array.hpp>
#include <Godot.hpp>
#include <Node2D.hpp>
#include <Array.hpp>
#include <memory>
#include "kompute/Kompute.hpp"
namespace godot {
class KomputeModelML : public Node2D {
private:
class KomputeModelML : public Node2D
{
private:
GODOT_CLASS(KomputeModelML, Node2D);
public:
public:
KomputeModelML();
void train(Array y, Array xI, Array xJ);
@ -27,7 +28,7 @@ public:
static void _register_methods();
private:
private:
std::shared_ptr<kp::Tensor> mWeights;
std::shared_ptr<kp::Tensor> mBias;
};

16
examples/logistic_regression/src/Main.cpp Executable file → Normal file
View file

@ -5,7 +5,8 @@
#include "kompute/Kompute.hpp"
int main()
int
main()
{
#if KOMPUTE_ENABLE_SPDLOG
spdlog::set_level(
@ -36,16 +37,18 @@ int main()
bIn, bOut, lOut };
std::vector<uint32_t> spirv(
(uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
(uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv
+ kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));
(uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
(uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv +
kp::shader_data::
shaders_glsl_logisticregression_comp_spv_len));
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(
params, spirv, kp::Workgroup({ 5 }), std::vector<float>({ 5.0 }));
params, spirv, kp::Workgroup({ 5 }), std::vector<float>({ 5.0 }));
mgr.sequence()->eval<kp::OpTensorSyncDevice>(params);
std::shared_ptr<kp::Sequence> sq = mgr.sequence()
std::shared_ptr<kp::Sequence> sq =
mgr.sequence()
->record<kp::OpTensorSyncDevice>({ wIn, bIn })
->record<kp::OpAlgoDispatch>(algo)
->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
@ -67,4 +70,3 @@ int main()
std::cout << "w2: " << wIn->data()[1] << std::endl;
std::cout << "b: " << bIn->data()[0] << std::endl;
}

View file

@ -1,15 +1,15 @@
#pragma once
#include "kompute/shaders/shaderopmult.hpp"
#include "kompute/shaders/shaderlogisticregression.hpp"
#include "kompute/Core.hpp"
#include "kompute/Tensor.hpp"
#include "kompute/Algorithm.hpp"
#include "kompute/Core.hpp"
#include "kompute/Manager.hpp"
#include "kompute/Sequence.hpp"
#include "kompute/Tensor.hpp"
#include "kompute/operations/OpAlgoDispatch.hpp"
#include "kompute/operations/OpBase.hpp"
#include "kompute/operations/OpMemoryBarrier.hpp"
#include "kompute/operations/OpMult.hpp"
#include "kompute/operations/OpTensorCopy.hpp"
#include "kompute/operations/OpTensorSyncDevice.hpp"
#include "kompute/operations/OpTensorSyncLocal.hpp"
#include "kompute/operations/OpAlgoDispatch.hpp"
#include "kompute/operations/OpMult.hpp"
#include "kompute/Sequence.hpp"
#include "kompute/Manager.hpp"
#include "kompute/shaders/shaderlogisticregression.hpp"
#include "kompute/shaders/shaderopmult.hpp"

1280
single_include/kompute/Kompute.hpp Executable file → Normal file

File diff suppressed because it is too large Load diff

View file

@ -305,7 +305,8 @@ Algorithm::createPipeline()
this->mFreePipeline = true;
#else
vk::Pipeline pipeline =
this->mDevice->createComputePipeline(*this->mPipelineCache, pipelineInfo).value;
this->mDevice->createComputePipeline(*this->mPipelineCache, pipelineInfo)
.value;
this->mPipeline = std::make_shared<vk::Pipeline>(pipeline);
this->mFreePipeline = true;
#endif

View file

@ -291,7 +291,7 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices,
// Getting an integer that says how many vuklan devices we have
std::vector<vk::PhysicalDevice> physicalDevices =
this->mInstance->enumeratePhysicalDevices();
this->mInstance->enumeratePhysicalDevices();
uint32_t deviceCount = physicalDevices.size();
// This means there are no devices at all

View file

@ -345,33 +345,55 @@ class TensorT : public Tensor
/**
* fmt fromater for kp::Tensor::TensorDataTypes.
*/
template <> struct fmt::formatter<kp::Tensor::TensorDataTypes>: formatter<std::string> {
template <typename FormatContext>
auto format(kp::Tensor::TensorDataTypes dt, FormatContext& ctx) {
std::string name = "unknown";
switch (dt) {
case kp::Tensor::TensorDataTypes::eBool: name = "eBool"; break;
case kp::Tensor::TensorDataTypes::eDouble: name = "eDouble"; break;
case kp::Tensor::TensorDataTypes::eFloat: name = "eFloat"; break;
case kp::Tensor::TensorDataTypes::eInt: name = "eInt"; break;
case kp::Tensor::TensorDataTypes::eUnsignedInt: name = "eUnsignedInt"; break;
template<>
struct fmt::formatter<kp::Tensor::TensorDataTypes> : formatter<std::string>
{
template<typename FormatContext>
auto format(kp::Tensor::TensorDataTypes dt, FormatContext& ctx)
{
std::string name = "unknown";
switch (dt) {
case kp::Tensor::TensorDataTypes::eBool:
name = "eBool";
break;
case kp::Tensor::TensorDataTypes::eDouble:
name = "eDouble";
break;
case kp::Tensor::TensorDataTypes::eFloat:
name = "eFloat";
break;
case kp::Tensor::TensorDataTypes::eInt:
name = "eInt";
break;
case kp::Tensor::TensorDataTypes::eUnsignedInt:
name = "eUnsignedInt";
break;
}
return formatter<std::string>::format(name, ctx);
}
return formatter<std::string>::format(name, ctx);
}
};
/**
* fmt fromater for kp::Tensor::TensorTypes.
*/
template <> struct fmt::formatter<kp::Tensor::TensorTypes>: formatter<std::string> {
template <typename FormatContext>
auto format(kp::Tensor::TensorTypes dt, FormatContext& ctx) {
std::string name = "unknown";
switch (dt) {
case kp::Tensor::TensorTypes::eDevice: name = "eDevice"; break;
case kp::Tensor::TensorTypes::eHost: name = "eHost"; break;
case kp::Tensor::TensorTypes::eStorage: name = "eStorage"; break;
template<>
struct fmt::formatter<kp::Tensor::TensorTypes> : formatter<std::string>
{
template<typename FormatContext>
auto format(kp::Tensor::TensorTypes dt, FormatContext& ctx)
{
std::string name = "unknown";
switch (dt) {
case kp::Tensor::TensorTypes::eDevice:
name = "eDevice";
break;
case kp::Tensor::TensorTypes::eHost:
name = "eHost";
break;
case kp::Tensor::TensorTypes::eStorage:
name = "eStorage";
break;
}
return formatter<std::string>::format(name, ctx);
}
return formatter<std::string>::format(name, ctx);
}
};

View file

@ -1,15 +1,15 @@
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "kompute/Core.hpp"
#include "kompute/Algorithm.hpp"
#include "kompute/Core.hpp"
#include "kompute/Tensor.hpp"
#include "kompute/operations/OpBase.hpp"
namespace kp {
/**
* Operation that provides a general abstraction that simplifies the use of
* Operation that provides a general abstraction that simplifies the use of
* algorithm and parameter components which can be used with shaders.
* By default it enables the user to provide a dynamic number of tensors
* which are then passed as inputs.
@ -17,7 +17,6 @@ namespace kp {
class OpAlgoDispatch : public OpBase
{
public:
/**
* Constructor that stores the algorithm to use as well as the relevant
* push constants to override when recording.
@ -27,7 +26,7 @@ class OpAlgoDispatch : public OpBase
*/
template<typename T = float>
OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm,
const std::vector<T>& pushConstants = {})
const std::vector<T>& pushConstants = {})
{
KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor");
@ -76,7 +75,7 @@ class OpAlgoDispatch : public OpBase
*/
virtual void postEval(const vk::CommandBuffer& commandBuffer) override;
private:
private:
// -------------- ALWAYS OWNED RESOURCES
std::shared_ptr<Algorithm> mAlgorithm;
void* mPushConstantsData = nullptr;
@ -85,4 +84,3 @@ private:
};
} // End namespace kp

View file

@ -1,9 +1,9 @@
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "kompute/Algorithm.hpp"
#include "kompute/Core.hpp"
#include "kompute/Tensor.hpp"
#include "kompute/Algorithm.hpp"
namespace kp {
@ -18,16 +18,12 @@ namespace kp {
class OpBase
{
public:
/**
* Default destructor for OpBase class. This OpBase destructor class should
* always be called to destroy and free owned resources unless it is
* intended to destroy the resources in the parent class.
*/
virtual ~OpBase()
{
KP_LOG_DEBUG("Kompute OpBase destructor started");
}
virtual ~OpBase() { KP_LOG_DEBUG("Kompute OpBase destructor started"); }
/**
* The record function is intended to only send a record command or run
@ -39,24 +35,24 @@ class OpBase
virtual void record(const vk::CommandBuffer& commandBuffer) = 0;
/**
* Pre eval is called before the Sequence has called eval and submitted the commands to
* the GPU for processing, and can be used to perform any per-eval setup steps
* required as the computation iteration begins. It's worth noting that
* there are situations where eval can be called multiple times, so the
* resources that are created should be idempotent in case it's called multiple
* times in a row.
* Pre eval is called before the Sequence has called eval and submitted the
* commands to the GPU for processing, and can be used to perform any
* per-eval setup steps required as the computation iteration begins. It's
* worth noting that there are situations where eval can be called multiple
* times, so the resources that are created should be idempotent in case
* it's called multiple times in a row.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void preEval(const vk::CommandBuffer& commandBuffer) = 0;
/**
* Post eval is called after the Sequence has called eval and submitted the commands to
* the GPU for processing, and can be used to perform any tear-down steps
* required as the computation iteration finishes. It's worth noting that
* there are situations where eval can be called multiple times, so the
* resources that are destroyed should not require a re-init unless explicitly
* provided by the user.
* Post eval is called after the Sequence has called eval and submitted the
* commands to the GPU for processing, and can be used to perform any
* tear-down steps required as the computation iteration finishes. It's
* worth noting that there are situations where eval can be called multiple
* times, so the resources that are destroyed should not require a re-init
* unless explicitly provided by the user.
*
* @param commandBuffer The command buffer to record the command into.
*/

View file

@ -1,15 +1,15 @@
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "kompute/Core.hpp"
#include "kompute/Algorithm.hpp"
#include "kompute/Core.hpp"
#include "kompute/Tensor.hpp"
#include "kompute/operations/OpBase.hpp"
namespace kp {
/**
* Operation that provides a general abstraction that simplifies the use of
* Operation that provides a general abstraction that simplifies the use of
* algorithm and parameter components which can be used with shaders.
* It exposes the pipeline barrier functionality specifically for memory
* barriers that can be configured through the respective source and destination
@ -18,29 +18,32 @@ namespace kp {
class OpMemoryBarrier : public OpBase
{
public:
/**
* Constructor that stores tensors as well as memory barrier parameters to be
* used to create a pipeline barrier on the respective primary or staging tensor.
* Constructor that stores tensors as well as memory barrier parameters to
* be used to create a pipeline barrier on the respective primary or staging
* tensor.
*
* @param tensors The tensors to apply the memory barriers on
* @param srcAccessMask The kp::AccessFlagBits for the source access mask
* @param dstAccessMask The kp::AccessFlagBits for the destination access mask
* @param srcStageMask The kp::PipelineStageFlagBits for the source stage mask
* @param dstStageMask The kp::PipelineStageFlagBits for the destination stage mask
* @param barrierOnPrimary Boolean to select primary or secondary buffers on tensors
* @param dstAccessMask The kp::AccessFlagBits for the destination access
* mask
* @param srcStageMask The kp::PipelineStageFlagBits for the source stage
* mask
* @param dstStageMask The kp::PipelineStageFlagBits for the destination
* stage mask
* @param barrierOnPrimary Boolean to select primary or secondary buffers on
* tensors
*/
OpMemoryBarrier(
const std::vector<std::shared_ptr<Tensor>>& tensors,
const vk::AccessFlagBits& srcAccessMask,
const vk::AccessFlagBits& dstAccessMask,
const vk::PipelineStageFlagBits& srcStageMask,
const vk::PipelineStageFlagBits& dstStageMask,
bool barrierOnPrimary = true);
OpMemoryBarrier(const std::vector<std::shared_ptr<Tensor>>& tensors,
const vk::AccessFlagBits& srcAccessMask,
const vk::AccessFlagBits& dstAccessMask,
const vk::PipelineStageFlagBits& srcStageMask,
const vk::PipelineStageFlagBits& dstStageMask,
bool barrierOnPrimary = true);
/**
* Default destructor, which is in charge of destroying the reference to the tensors
* and all the relevant access / stage masks created
* Default destructor, which is in charge of destroying the reference to the
* tensors and all the relevant access / stage masks created
*/
virtual ~OpMemoryBarrier() override;
@ -66,7 +69,7 @@ class OpMemoryBarrier : public OpBase
*/
virtual void postEval(const vk::CommandBuffer& commandBuffer) override;
private:
private:
const vk::AccessFlagBits mSrcAccessMask;
const vk::AccessFlagBits mDstAccessMask;
const vk::PipelineStageFlagBits mSrcStageMask;
@ -76,4 +79,3 @@ private:
};
} // End namespace kp

View file

@ -21,7 +21,6 @@ namespace kp {
class OpMult : public OpAlgoDispatch
{
public:
/**
* Default constructor with parameters that provides the bare minimum
* requirements for the operations to be able to create and manage their
@ -31,19 +30,21 @@ class OpMult : public OpAlgoDispatch
* @param algorithm An algorithm that will be overridden with the OpMult
* shader data and the tensors provided which are expected to be 3
*/
OpMult(std::vector<std::shared_ptr<Tensor>> tensors, std::shared_ptr<Algorithm> algorithm)
: OpAlgoDispatch(algorithm)
OpMult(std::vector<std::shared_ptr<Tensor>> tensors,
std::shared_ptr<Algorithm> algorithm)
: OpAlgoDispatch(algorithm)
{
KP_LOG_DEBUG("Kompute OpMult constructor with params");
if (tensors.size() != 3) {
throw std::runtime_error("Kompute OpMult expected 3 tensors but got " + tensors.size());
throw std::runtime_error(
"Kompute OpMult expected 3 tensors but got " + tensors.size());
}
std::vector<uint32_t> spirv(
(uint32_t*)shader_data::shaders_glsl_opmult_comp_spv,
(uint32_t*)(shader_data::shaders_glsl_opmult_comp_spv +
kp::shader_data::shaders_glsl_opmult_comp_spv_len));
kp::shader_data::shaders_glsl_opmult_comp_spv_len));
algorithm->rebuild<>(tensors, spirv);
}
@ -52,7 +53,8 @@ class OpMult : public OpAlgoDispatch
* Default destructor, which is in charge of destroying the algorithm
* components but does not destroy the underlying tensors
*/
virtual ~OpMult() override {
virtual ~OpMult() override
{
KP_LOG_DEBUG("Kompute OpMult destructor started");
}
};

View file

@ -10,30 +10,30 @@
namespace kp {
/**
* Operation that copies the data from the first tensor to the rest of the tensors
* provided, using a record command for all the vectors. This operation does not
* own/manage the memory of the tensors passed to it. The operation must only
* receive tensors of type
*/
* Operation that copies the data from the first tensor to the rest of the
* tensors provided, using a record command for all the vectors. This operation
* does not own/manage the memory of the tensors passed to it. The operation
* must only receive tensors of type
*/
class OpTensorCopy : public OpBase
{
public:
/**
* Default constructor with parameters that provides the core vulkan resources
* and the tensors that will be used in the operation.
* Default constructor with parameters that provides the core vulkan
* resources and the tensors that will be used in the operation.
*
* @param tensors Tensors that will be used to create in operation.
*/
OpTensorCopy(const std::vector<std::shared_ptr<Tensor>>& tensors);
/**
* Default destructor. This class does not manage memory so it won't be
* Default destructor. This class does not manage memory so it won't be
* expecting the parent to perform a release.
*/
~OpTensorCopy() override;
/**
* Records the copy commands from the first tensor into all the other
* Records the copy commands from the first tensor into all the other
* tensors provided. Also optionally records a barrier.
*
* @param commandBuffer The command buffer to record the command into.
@ -48,7 +48,8 @@ class OpTensorCopy : public OpBase
virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
/**
* Copies the local vectors for all the tensors to sync the data with the gpu.
* Copies the local vectors for all the tensors to sync the data with the
* gpu.
*
* @param commandBuffer The command buffer to record the command into.
*/
@ -60,4 +61,3 @@ class OpTensorCopy : public OpBase
};
} // End namespace kp

View file

@ -2,39 +2,40 @@
#pragma once
#include "kompute/Core.hpp"
#include "kompute/operations/OpBase.hpp"
#include "kompute/Tensor.hpp"
#include "kompute/operations/OpBase.hpp"
namespace kp {
/**
* Operation that syncs tensor's device by mapping local data into the device memory.
* For TensorTypes::eDevice it will use a record operation for the memory to be syncd
* into GPU memory which means that the operation will be done in sync with GPU commands.
* For TensorTypes::eHost it will only map the data into host memory which will
* happen during preEval before the recorded commands are dispatched.
*/
* Operation that syncs tensor's device by mapping local data into the device
* memory. For TensorTypes::eDevice it will use a record operation for the
* memory to be syncd into GPU memory which means that the operation will be
* done in sync with GPU commands. For TensorTypes::eHost it will only map the
* data into host memory which will happen during preEval before the recorded
* commands are dispatched.
*/
class OpTensorSyncDevice : public OpBase
{
public:
/**
* Default constructor with parameters that provides the core vulkan resources
* and the tensors that will be used in the operation. The tensos provided cannot
* be of type TensorTypes::eStorage.
* Default constructor with parameters that provides the core vulkan
* resources and the tensors that will be used in the operation. The tensos
* provided cannot be of type TensorTypes::eStorage.
*
* @param tensors Tensors that will be used to create in operation.
*/
OpTensorSyncDevice(const std::vector<std::shared_ptr<Tensor>>& tensors);
/**
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
* Default destructor. This class does not manage memory so it won't be
* expecting the parent to perform a release.
*/
~OpTensorSyncDevice() override;
/**
* For device tensors, it records the copy command for the tensor to copy the
* data from its staging to device memory.
* For device tensors, it records the copy command for the tensor to copy
* the data from its staging to device memory.
*
* @param commandBuffer The command buffer to record the command into.
*/
@ -60,5 +61,3 @@ class OpTensorSyncDevice : public OpBase
};
} // End namespace kp

View file

@ -10,34 +10,34 @@
namespace kp {
/**
* Operation that syncs tensor's local memory by mapping device data into the
* local CPU memory. For TensorTypes::eDevice it will use a record operation
* for the memory to be syncd into GPU memory which means that the operation
* will be done in sync with GPU commands. For TensorTypes::eHost it will
* only map the data into host memory which will happen during preEval before
* Operation that syncs tensor's local memory by mapping device data into the
* local CPU memory. For TensorTypes::eDevice it will use a record operation
* for the memory to be syncd into GPU memory which means that the operation
* will be done in sync with GPU commands. For TensorTypes::eHost it will
* only map the data into host memory which will happen during preEval before
* the recorded commands are dispatched.
*/
*/
class OpTensorSyncLocal : public OpBase
{
public:
/**
* Default constructor with parameters that provides the core vulkan resources
* and the tensors that will be used in the operation. The tensors provided
* cannot be of type TensorTypes::eStorage.
* Default constructor with parameters that provides the core vulkan
* resources and the tensors that will be used in the operation. The tensors
* provided cannot be of type TensorTypes::eStorage.
*
* @param tensors Tensors that will be used to create in operation.
*/
OpTensorSyncLocal(const std::vector<std::shared_ptr<Tensor>>& tensors);
/**
* Default destructor. This class does not manage memory so it won't be expecting
* the parent to perform a release.
* Default destructor. This class does not manage memory so it won't be
* expecting the parent to perform a release.
*/
~OpTensorSyncLocal() override;
/**
* For device tensors, it records the copy command for the tensor to copy the
* data from its device to staging memory.
* For device tensors, it records the copy command for the tensor to copy
* the data from its device to staging memory.
*
* @param commandBuffer The command buffer to record the command into.
*/
@ -51,19 +51,16 @@ class OpTensorSyncLocal : public OpBase
virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
/**
* For host tensors it performs the map command from the host memory into local memory.
* For host tensors it performs the map command from the host memory into
* local memory.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void postEval(const vk::CommandBuffer& commandBuffer) override;
private:
// -------------- ALWAYS OWNED RESOURCES
std::vector<std::shared_ptr<Tensor>> mTensors;
};
} // End namespace kp

View file

@ -27,16 +27,15 @@ TEST(TestDestroy, TestDestroyTensorSingle)
{
kp::Manager mgr;
const std::vector<float> initialValues = {0.0f, 0.0f, 0.0f};
const std::vector<float> initialValues = { 0.0f, 0.0f, 0.0f };
tensorA = mgr.tensor(initialValues);
std::shared_ptr<kp::Algorithm> algo =
mgr.algorithm({tensorA}, spirv);
mgr.algorithm({ tensorA }, spirv);
// Sync values to and from device
mgr.sequence()
->eval<kp::OpTensorSyncDevice>(algo->getTensors());
mgr.sequence()->eval<kp::OpTensorSyncDevice>(algo->getTensors());
EXPECT_EQ(tensorA->vector(), initialValues);
@ -45,7 +44,7 @@ TEST(TestDestroy, TestDestroyTensorSingle)
->eval()
->eval<kp::OpTensorSyncLocal>(algo->getTensors());
const std::vector<float> expectedFinalValues = {1.0f, 1.0f, 1.0f};
const std::vector<float> expectedFinalValues = { 1.0f, 1.0f, 1.0f };
EXPECT_EQ(tensorA->vector(), expectedFinalValues);
tensorA->destroy();

View file

@ -2,31 +2,34 @@
#pragma once
#include <fstream>
#include <iostream>
#include <vector>
#include <fstream>
/**
* Compile a single glslang source from string value. This is only meant
* to be used for testing as it's non threadsafe, and it had to be removed
* from the glslang dependency and now can only run the CLI directly due to
* from the glslang dependency and now can only run the CLI directly due to
* license issues: see https://github.com/KomputeProject/kompute/pull/235
*
* @param source An individual raw glsl shader in string format
* @return The compiled SPIR-V binary in unsigned int32 format
*/
static
std::vector<uint32_t>
compileSource(
const std::string& source)
static std::vector<uint32_t>
compileSource(const std::string& source)
{
std::ofstream fileOut("tmp_kp_shader.comp");
fileOut << source;
fileOut.close();
if (system(std::string("glslangValidator -V tmp_kp_shader.comp -o tmp_kp_shader.comp.spv").c_str()))
fileOut << source;
fileOut.close();
if (system(
std::string(
"glslangValidator -V tmp_kp_shader.comp -o tmp_kp_shader.comp.spv")
.c_str()))
throw std::runtime_error("Error running glslangValidator command");
std::ifstream fileStream("tmp_kp_shader.comp.spv", std::ios::binary);
std::vector<char> buffer;
buffer.insert(buffer.begin(), std::istreambuf_iterator<char>(fileStream), {});
return {(uint32_t*)buffer.data(), (uint32_t*)(buffer.data() + buffer.size())};
buffer.insert(
buffer.begin(), std::istreambuf_iterator<char>(fileStream), {});
return { (uint32_t*)buffer.data(),
(uint32_t*)(buffer.data() + buffer.size()) };
}