Merge pull request #286 from COM8/clang-format
Added .clang-format file and formatted everything
This commit is contained in:
commit
de46d30678
35 changed files with 1198 additions and 1051 deletions
5
.clang-format
Normal file
5
.clang-format
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
BasedOnStyle: Mozilla
|
||||
IndentWidth: 4
|
||||
|
||||
...
|
||||
4
Makefile
4
Makefile
|
|
@ -198,7 +198,9 @@ win_build_xxd:
|
|||
cd external/bin/ && gcc.exe -o xxd.exe xxd.c -DCYGWIN
|
||||
|
||||
format:
|
||||
$(CLANG_FORMAT_BIN) -i -style="{BasedOnStyle: mozilla, IndentWidth: 4}" src/*.cpp src/include/kompute/*.hpp test/*cpp
|
||||
for val in "examples single_include src test" ; do \
|
||||
find $$val -depth -iname *.h -or -iname *.c -or -iname *.hpp -or -iname *.cpp | grep -v "shaders" | xargs $(CLANG_FORMAT_BIN) -style=file -i; \
|
||||
done
|
||||
|
||||
static_scan:
|
||||
cppcheck --project=build/compile_commands.json -iexternal/
|
||||
|
|
|
|||
|
|
@ -12,17 +12,16 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
// Includes the Jni utilities for Android to be able to create the
|
||||
// relevant bindings for java, including JNIEXPORT, JNICALL , and
|
||||
// Includes the Jni utilities for Android to be able to create the
|
||||
// relevant bindings for java, including JNIEXPORT, JNICALL , and
|
||||
// other "j-variables".
|
||||
#include <jni.h>
|
||||
|
||||
// The ML class exposing the Kompute ML workflow for training and
|
||||
// The ML class exposing the Kompute ML workflow for training and
|
||||
// prediction of inference data.
|
||||
#include "KomputeModelML.hpp"
|
||||
|
||||
// Allows us to use the C++ sleep function to wait when loading the
|
||||
// Allows us to use the C++ sleep function to wait when loading the
|
||||
// Vulkan library in android
|
||||
#include <unistd.h>
|
||||
|
||||
|
|
@ -30,86 +29,92 @@
|
|||
#define KOMPUTE_VK_INIT_RETRIES 5
|
||||
#endif
|
||||
|
||||
static std::vector<float> jfloatArrayToVector(JNIEnv *env, const jfloatArray & fromArray) {
|
||||
float *inCArray = env->GetFloatArrayElements(fromArray, NULL);
|
||||
if (NULL == inCArray) return std::vector<float>();
|
||||
static std::vector<float>
|
||||
jfloatArrayToVector(JNIEnv* env, const jfloatArray& fromArray)
|
||||
{
|
||||
float* inCArray = env->GetFloatArrayElements(fromArray, NULL);
|
||||
if (NULL == inCArray)
|
||||
return std::vector<float>();
|
||||
int32_t length = env->GetArrayLength(fromArray);
|
||||
|
||||
std::vector<float> outVector(inCArray, inCArray + length);
|
||||
return outVector;
|
||||
}
|
||||
|
||||
static jfloatArray vectorToJFloatArray(JNIEnv *env, const std::vector<float> & fromVector) {
|
||||
static jfloatArray
|
||||
vectorToJFloatArray(JNIEnv* env, const std::vector<float>& fromVector)
|
||||
{
|
||||
jfloatArray ret = env->NewFloatArray(fromVector.size());
|
||||
if (NULL == ret) return NULL;
|
||||
if (NULL == ret)
|
||||
return NULL;
|
||||
env->SetFloatArrayRegion(ret, 0, fromVector.size(), fromVector.data());
|
||||
return ret;
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
extern "C"
|
||||
{
|
||||
|
||||
JNIEXPORT jboolean JNICALL
|
||||
Java_com_ethicalml_kompute_KomputeJni_initVulkan(JNIEnv *env, jobject thiz) {
|
||||
JNIEXPORT jboolean JNICALL
|
||||
Java_com_ethicalml_kompute_KomputeJni_initVulkan(JNIEnv* env, jobject thiz)
|
||||
{
|
||||
|
||||
KP_LOG_INFO("Initialising vulkan");
|
||||
KP_LOG_INFO("Initialising vulkan");
|
||||
|
||||
uint32_t totalRetries = 0;
|
||||
uint32_t totalRetries = 0;
|
||||
|
||||
while (totalRetries < KOMPUTE_VK_INIT_RETRIES) {
|
||||
KP_LOG_INFO("VULKAN LOAD TRY NUMBER: %u", totalRetries);
|
||||
if(InitVulkan()) {
|
||||
break;
|
||||
while (totalRetries < KOMPUTE_VK_INIT_RETRIES) {
|
||||
KP_LOG_INFO("VULKAN LOAD TRY NUMBER: %u", totalRetries);
|
||||
if (InitVulkan()) {
|
||||
break;
|
||||
}
|
||||
sleep(1);
|
||||
totalRetries++;
|
||||
}
|
||||
sleep(1);
|
||||
totalRetries++;
|
||||
|
||||
return totalRetries < KOMPUTE_VK_INIT_RETRIES;
|
||||
}
|
||||
|
||||
return totalRetries < KOMPUTE_VK_INIT_RETRIES;
|
||||
}
|
||||
|
||||
|
||||
JNIEXPORT jfloatArray JNICALL
|
||||
Java_com_ethicalml_kompute_KomputeJni_kompute(
|
||||
JNIEnv *env,
|
||||
jobject thiz,
|
||||
jfloatArray xiJFloatArr,
|
||||
jfloatArray xjJFloatArr,
|
||||
jfloatArray yJFloatArr) {
|
||||
|
||||
KP_LOG_INFO("Creating manager");
|
||||
|
||||
std::vector<float> xiVector = jfloatArrayToVector(env, xiJFloatArr);
|
||||
std::vector<float> xjVector = jfloatArrayToVector(env, xjJFloatArr);
|
||||
std::vector<float> yVector = jfloatArrayToVector(env, yJFloatArr);
|
||||
|
||||
KomputeModelML kml;
|
||||
kml.train(yVector, xiVector, xjVector);
|
||||
|
||||
std::vector<float> pred = kml.predict(xiVector, xjVector);
|
||||
|
||||
return vectorToJFloatArray(env, pred);
|
||||
}
|
||||
|
||||
JNIEXPORT jfloatArray JNICALL
|
||||
Java_com_ethicalml_kompute_KomputeJni_komputeParams(
|
||||
JNIEnv *env,
|
||||
jobject thiz,
|
||||
jfloatArray xiJFloatArr,
|
||||
jfloatArray xjJFloatArr,
|
||||
jfloatArray yJFloatArr) {
|
||||
|
||||
KP_LOG_INFO("Creating manager");
|
||||
|
||||
std::vector<float> xiVector = jfloatArrayToVector(env, xiJFloatArr);
|
||||
std::vector<float> xjVector = jfloatArrayToVector(env, xjJFloatArr);
|
||||
std::vector<float> yVector = jfloatArrayToVector(env, yJFloatArr);
|
||||
|
||||
KomputeModelML kml;
|
||||
kml.train(yVector, xiVector, xjVector);
|
||||
|
||||
std::vector<float> params = kml.get_params();
|
||||
|
||||
return vectorToJFloatArray(env, params);
|
||||
}
|
||||
|
||||
JNIEXPORT jfloatArray JNICALL
|
||||
Java_com_ethicalml_kompute_KomputeJni_kompute(JNIEnv* env,
|
||||
jobject thiz,
|
||||
jfloatArray xiJFloatArr,
|
||||
jfloatArray xjJFloatArr,
|
||||
jfloatArray yJFloatArr)
|
||||
{
|
||||
|
||||
KP_LOG_INFO("Creating manager");
|
||||
|
||||
std::vector<float> xiVector = jfloatArrayToVector(env, xiJFloatArr);
|
||||
std::vector<float> xjVector = jfloatArrayToVector(env, xjJFloatArr);
|
||||
std::vector<float> yVector = jfloatArrayToVector(env, yJFloatArr);
|
||||
|
||||
KomputeModelML kml;
|
||||
kml.train(yVector, xiVector, xjVector);
|
||||
|
||||
std::vector<float> pred = kml.predict(xiVector, xjVector);
|
||||
|
||||
return vectorToJFloatArray(env, pred);
|
||||
}
|
||||
|
||||
JNIEXPORT jfloatArray JNICALL
|
||||
Java_com_ethicalml_kompute_KomputeJni_komputeParams(JNIEnv* env,
|
||||
jobject thiz,
|
||||
jfloatArray xiJFloatArr,
|
||||
jfloatArray xjJFloatArr,
|
||||
jfloatArray yJFloatArr)
|
||||
{
|
||||
|
||||
KP_LOG_INFO("Creating manager");
|
||||
|
||||
std::vector<float> xiVector = jfloatArrayToVector(env, xiJFloatArr);
|
||||
std::vector<float> xjVector = jfloatArrayToVector(env, xjJFloatArr);
|
||||
std::vector<float> yVector = jfloatArrayToVector(env, yJFloatArr);
|
||||
|
||||
KomputeModelML kml;
|
||||
kml.train(yVector, xiVector, xjVector);
|
||||
|
||||
std::vector<float> params = kml.get_params();
|
||||
|
||||
return vectorToJFloatArray(env, params);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
43
examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp
Executable file → Normal file
43
examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp
Executable file → Normal file
|
|
@ -1,15 +1,15 @@
|
|||
|
||||
#include "KomputeModelML.hpp"
|
||||
|
||||
KomputeModelML::KomputeModelML() {
|
||||
KomputeModelML::KomputeModelML() {}
|
||||
|
||||
}
|
||||
KomputeModelML::~KomputeModelML() {}
|
||||
|
||||
KomputeModelML::~KomputeModelML() {
|
||||
|
||||
}
|
||||
|
||||
void KomputeModelML::train(std::vector<float> yData, std::vector<float> xIData, std::vector<float> xJData) {
|
||||
void
|
||||
KomputeModelML::train(std::vector<float> yData,
|
||||
std::vector<float> xIData,
|
||||
std::vector<float> xJData)
|
||||
{
|
||||
|
||||
std::vector<float> zerosData;
|
||||
|
||||
|
|
@ -42,17 +42,19 @@ void KomputeModelML::train(std::vector<float> yData, std::vector<float> xIData,
|
|||
bIn, bOut, lOut };
|
||||
|
||||
std::vector<uint32_t> spirv = std::vector<uint32_t>(
|
||||
(uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
|
||||
(uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv +
|
||||
kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));
|
||||
|
||||
(uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
|
||||
(uint32_t*)(kp::shader_data::
|
||||
shaders_glsl_logisticregression_comp_spv +
|
||||
kp::shader_data::
|
||||
shaders_glsl_logisticregression_comp_spv_len));
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algorithm = mgr.algorithm(
|
||||
params, spirv, kp::Workgroup({ 5 }), std::vector<float>({ 5.0 }));
|
||||
params, spirv, kp::Workgroup({ 5 }), std::vector<float>({ 5.0 }));
|
||||
|
||||
mgr.sequence()->eval<kp::OpTensorSyncDevice>(params);
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.sequence()
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.sequence()
|
||||
->record<kp::OpTensorSyncDevice>({ wIn, bIn })
|
||||
->record<kp::OpAlgoDispatch>(algorithm)
|
||||
->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
|
||||
|
|
@ -79,7 +81,9 @@ void KomputeModelML::train(std::vector<float> yData, std::vector<float> xIData,
|
|||
}
|
||||
}
|
||||
|
||||
std::vector<float> KomputeModelML::predict(std::vector<float> xI, std::vector<float> xJ) {
|
||||
std::vector<float>
|
||||
KomputeModelML::predict(std::vector<float> xI, std::vector<float> xJ)
|
||||
{
|
||||
|
||||
KP_LOG_INFO("Running prediction inference");
|
||||
|
||||
|
|
@ -93,9 +97,8 @@ std::vector<float> KomputeModelML::predict(std::vector<float> xI, std::vector<fl
|
|||
for (size_t i = 0; i < xI.size(); i++) {
|
||||
float xIVal = xI[i];
|
||||
float xJVal = xJ[i];
|
||||
float result = (xIVal * this->mWeights[0]
|
||||
+ xJVal * this->mWeights[1]
|
||||
+ this->mBias[0]);
|
||||
float result = (xIVal * this->mWeights[0] + xJVal * this->mWeights[1] +
|
||||
this->mBias[0]);
|
||||
|
||||
// Instead of using sigmoid we'll just return full numbers
|
||||
float var = result > 0 ? 1 : 0;
|
||||
|
|
@ -107,13 +110,15 @@ std::vector<float> KomputeModelML::predict(std::vector<float> xI, std::vector<fl
|
|||
return retVector;
|
||||
}
|
||||
|
||||
std::vector<float> KomputeModelML::get_params() {
|
||||
std::vector<float>
|
||||
KomputeModelML::get_params()
|
||||
{
|
||||
|
||||
KP_LOG_INFO("Displaying results");
|
||||
|
||||
std::vector<float> retVector;
|
||||
|
||||
if(this->mWeights.size() + this->mBias.size() == 0) {
|
||||
if (this->mWeights.size() + this->mBias.size() == 0) {
|
||||
return retVector;
|
||||
}
|
||||
|
||||
|
|
|
|||
18
examples/android/android-simple/app/src/main/cpp/KomputeModelML.hpp
Executable file → Normal file
18
examples/android/android-simple/app/src/main/cpp/KomputeModelML.hpp
Executable file → Normal file
|
|
@ -2,28 +2,30 @@
|
|||
#ifndef KOMPUTEMODELML_HPP
|
||||
#define KOMPUTEMODELML_HPP
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "kompute/Kompute.hpp"
|
||||
|
||||
class KomputeModelML {
|
||||
class KomputeModelML
|
||||
{
|
||||
|
||||
public:
|
||||
public:
|
||||
KomputeModelML();
|
||||
virtual ~KomputeModelML();
|
||||
|
||||
void train(std::vector<float> yData, std::vector<float> xIData, std::vector<float> xJData);
|
||||
void train(std::vector<float> yData,
|
||||
std::vector<float> xIData,
|
||||
std::vector<float> xJData);
|
||||
|
||||
std::vector<float> predict(std::vector<float> xI, std::vector<float> xJ);
|
||||
|
||||
std::vector<float> get_params();
|
||||
|
||||
private:
|
||||
private:
|
||||
std::vector<float> mWeights;
|
||||
std::vector<float> mBias;
|
||||
|
||||
};
|
||||
|
||||
static std::string LR_SHADER = R"(
|
||||
|
|
@ -83,4 +85,4 @@ void main() {
|
|||
}
|
||||
)";
|
||||
|
||||
#endif //ANDROID_SIMPLE_KOMPUTEMODELML_HPP
|
||||
#endif // ANDROID_SIMPLE_KOMPUTEMODELML_HPP
|
||||
|
|
|
|||
44
examples/array_multiplication/src/Main.cpp
Executable file → Normal file
44
examples/array_multiplication/src/Main.cpp
Executable file → Normal file
|
|
@ -5,23 +5,27 @@
|
|||
|
||||
#include "kompute/Kompute.hpp"
|
||||
|
||||
static
|
||||
std::vector<uint32_t>
|
||||
compileSource(
|
||||
const std::string& source)
|
||||
static std::vector<uint32_t>
|
||||
compileSource(const std::string& source)
|
||||
{
|
||||
std::ofstream fileOut("tmp_kp_shader.comp");
|
||||
fileOut << source;
|
||||
fileOut.close();
|
||||
if (system(std::string("glslangValidator -V tmp_kp_shader.comp -o tmp_kp_shader.comp.spv").c_str()))
|
||||
fileOut << source;
|
||||
fileOut.close();
|
||||
if (system(
|
||||
std::string(
|
||||
"glslangValidator -V tmp_kp_shader.comp -o tmp_kp_shader.comp.spv")
|
||||
.c_str()))
|
||||
throw std::runtime_error("Error running glslangValidator command");
|
||||
std::ifstream fileStream("tmp_kp_shader.comp.spv", std::ios::binary);
|
||||
std::vector<char> buffer;
|
||||
buffer.insert(buffer.begin(), std::istreambuf_iterator<char>(fileStream), {});
|
||||
return {(uint32_t*)buffer.data(), (uint32_t*)(buffer.data() + buffer.size())};
|
||||
buffer.insert(
|
||||
buffer.begin(), std::istreambuf_iterator<char>(fileStream), {});
|
||||
return { (uint32_t*)buffer.data(),
|
||||
(uint32_t*)(buffer.data() + buffer.size()) };
|
||||
}
|
||||
|
||||
int main()
|
||||
int
|
||||
main()
|
||||
{
|
||||
#if KOMPUTE_ENABLE_SPDLOG
|
||||
spdlog::set_level(
|
||||
|
|
@ -53,21 +57,23 @@ int main()
|
|||
}
|
||||
)");
|
||||
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorInA, tensorInB, tensorOut };
|
||||
std::vector<std::shared_ptr<kp::Tensor>> params = { tensorInA,
|
||||
tensorInB,
|
||||
tensorOut };
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(params, compileSource(shader));
|
||||
std::shared_ptr<kp::Algorithm> algo =
|
||||
mgr.algorithm(params, compileSource(shader));
|
||||
|
||||
mgr.sequence()
|
||||
->record<kp::OpTensorSyncDevice>(params)
|
||||
->record<kp::OpAlgoDispatch>(algo)
|
||||
->record<kp::OpTensorSyncLocal>(params)
|
||||
->eval();
|
||||
->record<kp::OpTensorSyncDevice>(params)
|
||||
->record<kp::OpAlgoDispatch>(algo)
|
||||
->record<kp::OpTensorSyncLocal>(params)
|
||||
->eval();
|
||||
|
||||
// prints "Output { 0 4 12 }"
|
||||
std::cout<< "Output: { ";
|
||||
std::cout << "Output: { ";
|
||||
for (const float& elem : tensorOut->vector()) {
|
||||
std::cout << elem << " ";
|
||||
std::cout << elem << " ";
|
||||
}
|
||||
std::cout << "}" << std::endl;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4,55 +4,63 @@
|
|||
|
||||
#include "KomputeSummatorNode.h"
|
||||
|
||||
static
|
||||
std::vector<uint32_t>
|
||||
compileSource(
|
||||
const std::string& source)
|
||||
static std::vector<uint32_t>
|
||||
compileSource(const std::string& source)
|
||||
{
|
||||
std::ofstream fileOut("tmp_kp_shader.comp");
|
||||
fileOut << source;
|
||||
fileOut.close();
|
||||
if (system(std::string("glslangValidator -V tmp_kp_shader.comp -o tmp_kp_shader.comp.spv").c_str()))
|
||||
fileOut << source;
|
||||
fileOut.close();
|
||||
if (system(
|
||||
std::string(
|
||||
"glslangValidator -V tmp_kp_shader.comp -o tmp_kp_shader.comp.spv")
|
||||
.c_str()))
|
||||
throw std::runtime_error("Error running glslangValidator command");
|
||||
std::ifstream fileStream("tmp_kp_shader.comp.spv", std::ios::binary);
|
||||
std::vector<char> buffer;
|
||||
buffer.insert(buffer.begin(), std::istreambuf_iterator<char>(fileStream), {});
|
||||
return {(uint32_t*)buffer.data(), (uint32_t*)(buffer.data() + buffer.size())};
|
||||
buffer.insert(
|
||||
buffer.begin(), std::istreambuf_iterator<char>(fileStream), {});
|
||||
return { (uint32_t*)buffer.data(),
|
||||
(uint32_t*)(buffer.data() + buffer.size()) };
|
||||
}
|
||||
|
||||
|
||||
KomputeSummatorNode::KomputeSummatorNode() {
|
||||
KomputeSummatorNode::KomputeSummatorNode()
|
||||
{
|
||||
this->_init();
|
||||
}
|
||||
|
||||
void KomputeSummatorNode::add(float value) {
|
||||
void
|
||||
KomputeSummatorNode::add(float value)
|
||||
{
|
||||
// Set the new data in the local device
|
||||
this->mSecondaryTensor->setData({value});
|
||||
this->mSecondaryTensor->setData({ value });
|
||||
// Execute recorded sequence
|
||||
if (std::shared_ptr<kp::Sequence> sq = this->mSequence) {
|
||||
sq->eval();
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
throw std::runtime_error("Sequence pointer no longer available");
|
||||
}
|
||||
}
|
||||
|
||||
void KomputeSummatorNode::reset() {
|
||||
}
|
||||
void
|
||||
KomputeSummatorNode::reset()
|
||||
{}
|
||||
|
||||
float KomputeSummatorNode::get_total() const {
|
||||
float
|
||||
KomputeSummatorNode::get_total() const
|
||||
{
|
||||
return this->mPrimaryTensor->data()[0];
|
||||
}
|
||||
|
||||
void KomputeSummatorNode::_init() {
|
||||
void
|
||||
KomputeSummatorNode::_init()
|
||||
{
|
||||
std::cout << "CALLING INIT" << std::endl;
|
||||
this->mPrimaryTensor = this->mManager.tensor({ 0.0 });
|
||||
this->mSecondaryTensor = this->mManager.tensor({ 0.0 });
|
||||
this->mSequence = this->mManager.sequence();
|
||||
|
||||
// We now record the steps in the sequence
|
||||
if (std::shared_ptr<kp::Sequence> sq = this->mSequence)
|
||||
{
|
||||
if (std::shared_ptr<kp::Sequence> sq = this->mSequence) {
|
||||
|
||||
std::string shader(R"(
|
||||
#version 450
|
||||
|
|
@ -68,40 +76,38 @@ void KomputeSummatorNode::_init() {
|
|||
}
|
||||
)");
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo =
|
||||
this->mManager.algorithm(
|
||||
{ this->mPrimaryTensor, this->mSecondaryTensor },
|
||||
compileSource(shader));
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo = this->mManager.algorithm(
|
||||
{ this->mPrimaryTensor, this->mSecondaryTensor },
|
||||
compileSource(shader));
|
||||
|
||||
// First we ensure secondary tensor loads to GPU
|
||||
// No need to sync the primary tensor as it should not be changed
|
||||
sq->record<kp::OpTensorSyncDevice>(
|
||||
{ this->mSecondaryTensor });
|
||||
sq->record<kp::OpTensorSyncDevice>({ this->mSecondaryTensor });
|
||||
|
||||
// Then we run the operation with both tensors
|
||||
sq->record<kp::OpAlgoDispatch>(algo);
|
||||
|
||||
// We map the result back to local
|
||||
sq->record<kp::OpTensorSyncLocal>(
|
||||
{ this->mPrimaryTensor });
|
||||
// We map the result back to local
|
||||
sq->record<kp::OpTensorSyncLocal>({ this->mPrimaryTensor });
|
||||
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
throw std::runtime_error("Sequence pointer no longer available");
|
||||
}
|
||||
}
|
||||
|
||||
void KomputeSummatorNode::_process(float delta) {
|
||||
void
|
||||
KomputeSummatorNode::_process(float delta)
|
||||
{}
|
||||
|
||||
}
|
||||
|
||||
void KomputeSummatorNode::_bind_methods() {
|
||||
ClassDB::bind_method(D_METHOD("_process", "delta"), &KomputeSummatorNode::_process);
|
||||
void
|
||||
KomputeSummatorNode::_bind_methods()
|
||||
{
|
||||
ClassDB::bind_method(D_METHOD("_process", "delta"),
|
||||
&KomputeSummatorNode::_process);
|
||||
ClassDB::bind_method(D_METHOD("_init"), &KomputeSummatorNode::_init);
|
||||
|
||||
ClassDB::bind_method(D_METHOD("add", "value"), &KomputeSummatorNode::add);
|
||||
ClassDB::bind_method(D_METHOD("reset"), &KomputeSummatorNode::reset);
|
||||
ClassDB::bind_method(D_METHOD("get_total"), &KomputeSummatorNode::get_total);
|
||||
ClassDB::bind_method(D_METHOD("get_total"),
|
||||
&KomputeSummatorNode::get_total);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6,10 +6,11 @@
|
|||
|
||||
#include "scene/main/node.h"
|
||||
|
||||
class KomputeSummatorNode : public Node {
|
||||
class KomputeSummatorNode : public Node
|
||||
{
|
||||
GDCLASS(KomputeSummatorNode, Node);
|
||||
|
||||
public:
|
||||
public:
|
||||
KomputeSummatorNode();
|
||||
|
||||
void add(float value);
|
||||
|
|
@ -19,13 +20,12 @@ public:
|
|||
void _process(float delta);
|
||||
void _init();
|
||||
|
||||
protected:
|
||||
protected:
|
||||
static void _bind_methods();
|
||||
|
||||
private:
|
||||
private:
|
||||
kp::Manager mManager;
|
||||
std::shared_ptr<kp::Sequence> mSequence;
|
||||
std::shared_ptr<kp::Tensor> mPrimaryTensor;
|
||||
std::shared_ptr<kp::Tensor> mSecondaryTensor;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -2,13 +2,17 @@
|
|||
|
||||
#include "register_types.h"
|
||||
|
||||
#include "core/class_db.h"
|
||||
#include "KomputeSummatorNode.h"
|
||||
#include "core/class_db.h"
|
||||
|
||||
void register_kompute_summator_types() {
|
||||
void
|
||||
register_kompute_summator_types()
|
||||
{
|
||||
ClassDB::register_class<KomputeSummatorNode>();
|
||||
}
|
||||
|
||||
void unregister_kompute_summator_types() {
|
||||
// Nothing to do here in this example.
|
||||
void
|
||||
unregister_kompute_summator_types()
|
||||
{
|
||||
// Nothing to do here in this example.
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
/* register_types.h */
|
||||
#pragma once
|
||||
|
||||
void register_kompute_summator_types();
|
||||
void unregister_kompute_summator_types();
|
||||
void
|
||||
register_kompute_summator_types();
|
||||
void
|
||||
unregister_kompute_summator_types();
|
||||
/* yes, the word in the middle must be the same as the module folder name */
|
||||
|
|
|
|||
|
|
@ -1,14 +1,20 @@
|
|||
#include "KomputeSummator.hpp"
|
||||
|
||||
extern "C" void GDN_EXPORT godot_gdnative_init(godot_gdnative_init_options *o) {
|
||||
extern "C" void GDN_EXPORT
|
||||
godot_gdnative_init(godot_gdnative_init_options* o)
|
||||
{
|
||||
godot::Godot::gdnative_init(o);
|
||||
}
|
||||
|
||||
extern "C" void GDN_EXPORT godot_gdnative_terminate(godot_gdnative_terminate_options *o) {
|
||||
extern "C" void GDN_EXPORT
|
||||
godot_gdnative_terminate(godot_gdnative_terminate_options* o)
|
||||
{
|
||||
godot::Godot::gdnative_terminate(o);
|
||||
}
|
||||
|
||||
extern "C" void GDN_EXPORT godot_nativescript_init(void *handle) {
|
||||
extern "C" void GDN_EXPORT
|
||||
godot_nativescript_init(void* handle)
|
||||
{
|
||||
godot::Godot::nativescript_init(handle);
|
||||
|
||||
godot::register_class<godot::KomputeSummator>();
|
||||
|
|
|
|||
|
|
@ -1,49 +1,59 @@
|
|||
/* summator.cpp */
|
||||
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
#include "KomputeSummator.hpp"
|
||||
|
||||
static
|
||||
std::vector<uint32_t>
|
||||
compileSource(
|
||||
const std::string& source)
|
||||
static std::vector<uint32_t>
|
||||
compileSource(const std::string& source)
|
||||
{
|
||||
std::ofstream fileOut("tmp_kp_shader.comp");
|
||||
fileOut << source;
|
||||
fileOut.close();
|
||||
if (system(std::string("glslangValidator -V tmp_kp_shader.comp -o tmp_kp_shader.comp.spv").c_str()))
|
||||
fileOut << source;
|
||||
fileOut.close();
|
||||
if (system(
|
||||
std::string(
|
||||
"glslangValidator -V tmp_kp_shader.comp -o tmp_kp_shader.comp.spv")
|
||||
.c_str()))
|
||||
throw std::runtime_error("Error running glslangValidator command");
|
||||
std::ifstream fileStream("tmp_kp_shader.comp.spv", std::ios::binary);
|
||||
std::vector<char> buffer;
|
||||
buffer.insert(buffer.begin(), std::istreambuf_iterator<char>(fileStream), {});
|
||||
return {(uint32_t*)buffer.data(), (uint32_t*)(buffer.data() + buffer.size())};
|
||||
buffer.insert(
|
||||
buffer.begin(), std::istreambuf_iterator<char>(fileStream), {});
|
||||
return { (uint32_t*)buffer.data(),
|
||||
(uint32_t*)(buffer.data() + buffer.size()) };
|
||||
}
|
||||
|
||||
|
||||
namespace godot {
|
||||
|
||||
KomputeSummator::KomputeSummator() {
|
||||
KomputeSummator::KomputeSummator()
|
||||
{
|
||||
std::cout << "CALLING CONSTRUCTOR" << std::endl;
|
||||
this->_init();
|
||||
}
|
||||
|
||||
void KomputeSummator::add(float value) {
|
||||
void
|
||||
KomputeSummator::add(float value)
|
||||
{
|
||||
// Set the new data in the local device
|
||||
this->mSecondaryTensor->setData({value});
|
||||
this->mSecondaryTensor->setData({ value });
|
||||
// Execute recorded sequence
|
||||
this->mSequence->eval();
|
||||
}
|
||||
|
||||
void KomputeSummator::reset() {
|
||||
}
|
||||
void
|
||||
KomputeSummator::reset()
|
||||
{}
|
||||
|
||||
float KomputeSummator::get_total() const {
|
||||
float
|
||||
KomputeSummator::get_total() const
|
||||
{
|
||||
return this->mPrimaryTensor->data()[0];
|
||||
}
|
||||
|
||||
void KomputeSummator::_init() {
|
||||
void
|
||||
KomputeSummator::_init()
|
||||
{
|
||||
std::cout << "CALLING INIT" << std::endl;
|
||||
this->mPrimaryTensor = this->mManager.tensor({ 0.0 });
|
||||
this->mSecondaryTensor = this->mManager.tensor({ 0.0 });
|
||||
|
|
@ -70,33 +80,34 @@ void KomputeSummator::_init() {
|
|||
// First we ensure secondary tensor loads to GPU
|
||||
// No need to sync the primary tensor as it should not be changed
|
||||
this->mSequence->record<kp::OpTensorSyncDevice>(
|
||||
{ this->mSecondaryTensor });
|
||||
{ this->mSecondaryTensor });
|
||||
|
||||
// Then we run the operation with both tensors
|
||||
this->mSequence->record<kp::OpAlgoCreate>(
|
||||
{ this->mPrimaryTensor, this->mSecondaryTensor },
|
||||
compileSource(shader));
|
||||
{ this->mPrimaryTensor, this->mSecondaryTensor },
|
||||
compileSource(shader));
|
||||
|
||||
// We map the result back to local
|
||||
// We map the result back to local
|
||||
this->mSequence->record<kp::OpTensorSyncLocal>(
|
||||
{ this->mPrimaryTensor });
|
||||
{ this->mPrimaryTensor });
|
||||
|
||||
this->mSequence->end();
|
||||
}
|
||||
}
|
||||
|
||||
void KomputeSummator::_process(float delta) {
|
||||
void
|
||||
KomputeSummator::_process(float delta)
|
||||
{}
|
||||
|
||||
}
|
||||
void
|
||||
KomputeSummator::_register_methods()
|
||||
{
|
||||
register_method((char*)"_process", &KomputeSummator::_process);
|
||||
register_method((char*)"_init", &KomputeSummator::_init);
|
||||
|
||||
void KomputeSummator::_register_methods() {
|
||||
register_method((char *)"_process", &KomputeSummator::_process);
|
||||
register_method((char *)"_init", &KomputeSummator::_init);
|
||||
|
||||
register_method((char *)"add", &KomputeSummator::add);
|
||||
register_method((char *)"reset", &KomputeSummator::reset);
|
||||
register_method((char *)"get_total", &KomputeSummator::get_total);
|
||||
register_method((char*)"add", &KomputeSummator::add);
|
||||
register_method((char*)"reset", &KomputeSummator::reset);
|
||||
register_method((char*)"get_total", &KomputeSummator::get_total);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -8,11 +8,12 @@
|
|||
#include "kompute/Kompute.hpp"
|
||||
|
||||
namespace godot {
|
||||
class KomputeSummator : public Node2D {
|
||||
private:
|
||||
class KomputeSummator : public Node2D
|
||||
{
|
||||
private:
|
||||
GODOT_CLASS(KomputeSummator, Node2D);
|
||||
|
||||
public:
|
||||
public:
|
||||
KomputeSummator();
|
||||
|
||||
void add(float value);
|
||||
|
|
@ -24,7 +25,7 @@ public:
|
|||
|
||||
static void _register_methods();
|
||||
|
||||
private:
|
||||
private:
|
||||
kp::Manager mManager;
|
||||
std::shared_ptr<kp::Sequence> mSequence;
|
||||
std::shared_ptr<kp::Tensor> mPrimaryTensor;
|
||||
|
|
|
|||
|
|
@ -4,12 +4,15 @@
|
|||
|
||||
#include "KomputeModelMLNode.h"
|
||||
|
||||
KomputeModelMLNode::KomputeModelMLNode() {
|
||||
KomputeModelMLNode::KomputeModelMLNode()
|
||||
{
|
||||
std::cout << "CALLING CONSTRUCTOR" << std::endl;
|
||||
this->_init();
|
||||
}
|
||||
|
||||
void KomputeModelMLNode::train(Array yArr, Array xIArr, Array xJArr) {
|
||||
void
|
||||
KomputeModelMLNode::train(Array yArr, Array xIArr, Array xJArr)
|
||||
{
|
||||
|
||||
assert(yArr.size() == xIArr.size());
|
||||
assert(xIArr.size() == xJArr.size());
|
||||
|
|
@ -52,15 +55,19 @@ void KomputeModelMLNode::train(Array yArr, Array xIArr, Array xJArr) {
|
|||
|
||||
{
|
||||
std::vector<uint32_t> spirv(
|
||||
(uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
|
||||
(uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv
|
||||
+ kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));
|
||||
(uint32_t*)
|
||||
kp::shader_data::shaders_glsl_logisticregression_comp_spv,
|
||||
(uint32_t*)(kp::shader_data::
|
||||
shaders_glsl_logisticregression_comp_spv +
|
||||
kp::shader_data::
|
||||
shaders_glsl_logisticregression_comp_spv_len));
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(params, spirv);
|
||||
|
||||
mgr.sequence()->eval<kp::OpTensorSyncDevice>(params);
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.sequence()
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.sequence()
|
||||
->record<kp::OpTensorSyncDevice>({ wIn, bIn })
|
||||
->record<kp::OpAlgoDispatch>(algo)
|
||||
->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
|
||||
|
|
@ -88,20 +95,22 @@ void KomputeModelMLNode::train(Array yArr, Array xIArr, Array xJArr) {
|
|||
}
|
||||
}
|
||||
|
||||
Array KomputeModelMLNode::predict(Array xI, Array xJ) {
|
||||
Array
|
||||
KomputeModelMLNode::predict(Array xI, Array xJ)
|
||||
{
|
||||
assert(xI.size() == xJ.size());
|
||||
|
||||
Array retArray;
|
||||
|
||||
// We run the inference in the CPU for simplicity
|
||||
// BUt you can also implement the inference on GPU
|
||||
// BUt you can also implement the inference on GPU
|
||||
// GPU implementation would speed up minibatching
|
||||
for (size_t i = 0; i < xI.size(); i++) {
|
||||
float xIVal = xI[i];
|
||||
float xJVal = xJ[i];
|
||||
float result = (xIVal * this->mWeights.data()[0]
|
||||
+ xJVal * this->mWeights.data()[1]
|
||||
+ this->mBias.data()[0]);
|
||||
float result =
|
||||
(xIVal * this->mWeights.data()[0] + xJVal * this->mWeights.data()[1] +
|
||||
this->mBias.data()[0]);
|
||||
|
||||
// Instead of using sigmoid we'll just return full numbers
|
||||
Variant var = result > 0 ? 1 : 0;
|
||||
|
|
@ -111,12 +120,14 @@ Array KomputeModelMLNode::predict(Array xI, Array xJ) {
|
|||
return retArray;
|
||||
}
|
||||
|
||||
Array KomputeModelMLNode::get_params() {
|
||||
Array
|
||||
KomputeModelMLNode::get_params()
|
||||
{
|
||||
Array retArray;
|
||||
|
||||
KP_LOG_INFO(this->mWeights.size() + this->mBias.size());
|
||||
|
||||
if(this->mWeights.size() + this->mBias.size() == 0) {
|
||||
if (this->mWeights.size() + this->mBias.size() == 0) {
|
||||
return retArray;
|
||||
}
|
||||
|
||||
|
|
@ -128,20 +139,27 @@ Array KomputeModelMLNode::get_params() {
|
|||
return retArray;
|
||||
}
|
||||
|
||||
void KomputeModelMLNode::_init() {
|
||||
void
|
||||
KomputeModelMLNode::_init()
|
||||
{
|
||||
std::cout << "CALLING INIT" << std::endl;
|
||||
}
|
||||
|
||||
void KomputeModelMLNode::_process(float delta) {
|
||||
void
|
||||
KomputeModelMLNode::_process(float delta)
|
||||
{}
|
||||
|
||||
}
|
||||
|
||||
void KomputeModelMLNode::_bind_methods() {
|
||||
ClassDB::bind_method(D_METHOD("_process", "delta"), &KomputeModelMLNode::_process);
|
||||
void
|
||||
KomputeModelMLNode::_bind_methods()
|
||||
{
|
||||
ClassDB::bind_method(D_METHOD("_process", "delta"),
|
||||
&KomputeModelMLNode::_process);
|
||||
ClassDB::bind_method(D_METHOD("_init"), &KomputeModelMLNode::_init);
|
||||
|
||||
ClassDB::bind_method(D_METHOD("train", "yArr", "xIArr", "xJArr"), &KomputeModelMLNode::train);
|
||||
ClassDB::bind_method(D_METHOD("predict", "xI", "xJ"), &KomputeModelMLNode::predict);
|
||||
ClassDB::bind_method(D_METHOD("get_params"), &KomputeModelMLNode::get_params);
|
||||
ClassDB::bind_method(D_METHOD("train", "yArr", "xIArr", "xJArr"),
|
||||
&KomputeModelMLNode::train);
|
||||
ClassDB::bind_method(D_METHOD("predict", "xI", "xJ"),
|
||||
&KomputeModelMLNode::predict);
|
||||
ClassDB::bind_method(D_METHOD("get_params"),
|
||||
&KomputeModelMLNode::get_params);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6,10 +6,11 @@
|
|||
|
||||
#include "scene/main/node.h"
|
||||
|
||||
class KomputeModelMLNode : public Node {
|
||||
class KomputeModelMLNode : public Node
|
||||
{
|
||||
GDCLASS(KomputeModelMLNode, Node);
|
||||
|
||||
public:
|
||||
public:
|
||||
KomputeModelMLNode();
|
||||
|
||||
void train(Array y, Array xI, Array xJ);
|
||||
|
|
@ -21,10 +22,10 @@ public:
|
|||
void _process(float delta);
|
||||
void _init();
|
||||
|
||||
protected:
|
||||
protected:
|
||||
static void _bind_methods();
|
||||
|
||||
private:
|
||||
private:
|
||||
kp::Tensor mWeights;
|
||||
kp::Tensor mBias;
|
||||
};
|
||||
|
|
@ -85,4 +86,3 @@ void main() {
|
|||
lout[idx] = calculateLoss(yHat, yCurr);
|
||||
}
|
||||
)";
|
||||
|
||||
|
|
|
|||
|
|
@ -2,13 +2,17 @@
|
|||
|
||||
#include "register_types.h"
|
||||
|
||||
#include "core/class_db.h"
|
||||
#include "KomputeModelMLNode.h"
|
||||
#include "core/class_db.h"
|
||||
|
||||
void register_kompute_model_ml_types() {
|
||||
void
|
||||
register_kompute_model_ml_types()
|
||||
{
|
||||
ClassDB::register_class<KomputeModelMLNode>();
|
||||
}
|
||||
|
||||
void unregister_kompute_model_ml_types() {
|
||||
// Nothing to do here in this example.
|
||||
void
|
||||
unregister_kompute_model_ml_types()
|
||||
{
|
||||
// Nothing to do here in this example.
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
/* register_types.h */
|
||||
#pragma once
|
||||
|
||||
void register_kompute_model_ml_types();
|
||||
void unregister_kompute_model_ml_types();
|
||||
void
|
||||
register_kompute_model_ml_types();
|
||||
void
|
||||
unregister_kompute_model_ml_types();
|
||||
/* yes, the word in the middle must be the same as the module folder name */
|
||||
|
|
|
|||
|
|
@ -1,14 +1,20 @@
|
|||
#include "KomputeModelML.hpp"
|
||||
|
||||
extern "C" void GDN_EXPORT godot_gdnative_init(godot_gdnative_init_options *o) {
|
||||
extern "C" void GDN_EXPORT
|
||||
godot_gdnative_init(godot_gdnative_init_options* o)
|
||||
{
|
||||
godot::Godot::gdnative_init(o);
|
||||
}
|
||||
|
||||
extern "C" void GDN_EXPORT godot_gdnative_terminate(godot_gdnative_terminate_options *o) {
|
||||
extern "C" void GDN_EXPORT
|
||||
godot_gdnative_terminate(godot_gdnative_terminate_options* o)
|
||||
{
|
||||
godot::Godot::gdnative_terminate(o);
|
||||
}
|
||||
|
||||
extern "C" void GDN_EXPORT godot_nativescript_init(void *handle) {
|
||||
extern "C" void GDN_EXPORT
|
||||
godot_nativescript_init(void* handle)
|
||||
{
|
||||
godot::Godot::nativescript_init(handle);
|
||||
|
||||
godot::register_class<godot::KomputeModelML>();
|
||||
|
|
|
|||
|
|
@ -1,19 +1,22 @@
|
|||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "KomputeModelML.hpp"
|
||||
|
||||
namespace godot {
|
||||
|
||||
KomputeModelML::KomputeModelML() {
|
||||
KomputeModelML::KomputeModelML()
|
||||
{
|
||||
std::cout << "CALLING CONSTRUCTOR" << std::endl;
|
||||
this->_init();
|
||||
}
|
||||
|
||||
void KomputeModelML::train(Array yArr, Array xIArr, Array xJArr) {
|
||||
void
|
||||
KomputeModelML::train(Array yArr, Array xIArr, Array xJArr)
|
||||
{
|
||||
|
||||
assert(yArr.size() == xIArr.size());
|
||||
assert(xIArr.size() == xJArr.size());
|
||||
|
|
@ -56,15 +59,19 @@ void KomputeModelML::train(Array yArr, Array xIArr, Array xJArr) {
|
|||
|
||||
{
|
||||
std::vector<uint32_t> spirv(
|
||||
(uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
|
||||
(uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv
|
||||
+ kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));
|
||||
(uint32_t*)
|
||||
kp::shader_data::shaders_glsl_logisticregression_comp_spv,
|
||||
(uint32_t*)(kp::shader_data::
|
||||
shaders_glsl_logisticregression_comp_spv +
|
||||
kp::shader_data::
|
||||
shaders_glsl_logisticregression_comp_spv_len));
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(params, spirv);
|
||||
|
||||
mgr.sequence()->eval<kp::OpTensorSyncDevice>(params);
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.sequence()
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.sequence()
|
||||
->record<kp::OpTensorSyncDevice>({ wIn, bIn })
|
||||
->record<kp::OpAlgoDispatch>(algo)
|
||||
->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
|
||||
|
|
@ -92,20 +99,22 @@ void KomputeModelML::train(Array yArr, Array xIArr, Array xJArr) {
|
|||
}
|
||||
}
|
||||
|
||||
Array KomputeModelML::predict(Array xI, Array xJ) {
|
||||
Array
|
||||
KomputeModelML::predict(Array xI, Array xJ)
|
||||
{
|
||||
assert(xI.size() == xJ.size());
|
||||
|
||||
Array retArray;
|
||||
|
||||
// We run the inference in the CPU for simplicity
|
||||
// BUt you can also implement the inference on GPU
|
||||
// BUt you can also implement the inference on GPU
|
||||
// GPU implementation would speed up minibatching
|
||||
for (size_t i = 0; i < xI.size(); i++) {
|
||||
float xIVal = xI[i];
|
||||
float xJVal = xJ[i];
|
||||
float result = (xIVal * this->mWeights->data()[0]
|
||||
+ xJVal * this->mWeights->data()[1]
|
||||
+ this->mBias->data()[0]);
|
||||
float result =
|
||||
(xIVal * this->mWeights->data()[0] +
|
||||
xJVal * this->mWeights->data()[1] + this->mBias->data()[0]);
|
||||
|
||||
// Instead of using sigmoid we'll just return full numbers
|
||||
Variant var = result > 0 ? 1 : 0;
|
||||
|
|
@ -115,12 +124,14 @@ Array KomputeModelML::predict(Array xI, Array xJ) {
|
|||
return retArray;
|
||||
}
|
||||
|
||||
Array KomputeModelML::get_params() {
|
||||
Array
|
||||
KomputeModelML::get_params()
|
||||
{
|
||||
Array retArray;
|
||||
|
||||
KP_LOG_INFO(this->mWeights->size() + this->mBias->size());
|
||||
|
||||
if(this->mWeights->size() + this->mBias->size() == 0) {
|
||||
if (this->mWeights->size() + this->mBias->size() == 0) {
|
||||
return retArray;
|
||||
}
|
||||
|
||||
|
|
@ -132,22 +143,25 @@ Array KomputeModelML::get_params() {
|
|||
return retArray;
|
||||
}
|
||||
|
||||
void KomputeModelML::_init() {
|
||||
void
|
||||
KomputeModelML::_init()
|
||||
{
|
||||
std::cout << "CALLING INIT" << std::endl;
|
||||
}
|
||||
|
||||
void KomputeModelML::_process(float delta) {
|
||||
void
|
||||
KomputeModelML::_process(float delta)
|
||||
{}
|
||||
|
||||
}
|
||||
void
|
||||
KomputeModelML::_register_methods()
|
||||
{
|
||||
register_method((char*)"_process", &KomputeModelML::_process);
|
||||
register_method((char*)"_init", &KomputeModelML::_init);
|
||||
|
||||
void KomputeModelML::_register_methods() {
|
||||
register_method((char *)"_process", &KomputeModelML::_process);
|
||||
register_method((char *)"_init", &KomputeModelML::_init);
|
||||
|
||||
register_method((char *)"train", &KomputeModelML::train);
|
||||
register_method((char *)"predict", &KomputeModelML::predict);
|
||||
register_method((char *)"get_params", &KomputeModelML::get_params);
|
||||
register_method((char*)"train", &KomputeModelML::train);
|
||||
register_method((char*)"predict", &KomputeModelML::predict);
|
||||
register_method((char*)"get_params", &KomputeModelML::get_params);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,19 +1,20 @@
|
|||
#pragma once
|
||||
|
||||
#include <Array.hpp>
|
||||
#include <Godot.hpp>
|
||||
#include <Node2D.hpp>
|
||||
#include <Array.hpp>
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "kompute/Kompute.hpp"
|
||||
|
||||
namespace godot {
|
||||
class KomputeModelML : public Node2D {
|
||||
private:
|
||||
class KomputeModelML : public Node2D
|
||||
{
|
||||
private:
|
||||
GODOT_CLASS(KomputeModelML, Node2D);
|
||||
|
||||
public:
|
||||
public:
|
||||
KomputeModelML();
|
||||
|
||||
void train(Array y, Array xI, Array xJ);
|
||||
|
|
@ -27,7 +28,7 @@ public:
|
|||
|
||||
static void _register_methods();
|
||||
|
||||
private:
|
||||
private:
|
||||
std::shared_ptr<kp::Tensor> mWeights;
|
||||
std::shared_ptr<kp::Tensor> mBias;
|
||||
};
|
||||
|
|
|
|||
16
examples/logistic_regression/src/Main.cpp
Executable file → Normal file
16
examples/logistic_regression/src/Main.cpp
Executable file → Normal file
|
|
@ -5,7 +5,8 @@
|
|||
|
||||
#include "kompute/Kompute.hpp"
|
||||
|
||||
int main()
|
||||
int
|
||||
main()
|
||||
{
|
||||
#if KOMPUTE_ENABLE_SPDLOG
|
||||
spdlog::set_level(
|
||||
|
|
@ -36,16 +37,18 @@ int main()
|
|||
bIn, bOut, lOut };
|
||||
|
||||
std::vector<uint32_t> spirv(
|
||||
(uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
|
||||
(uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv
|
||||
+ kp::shader_data::shaders_glsl_logisticregression_comp_spv_len));
|
||||
(uint32_t*)kp::shader_data::shaders_glsl_logisticregression_comp_spv,
|
||||
(uint32_t*)(kp::shader_data::shaders_glsl_logisticregression_comp_spv +
|
||||
kp::shader_data::
|
||||
shaders_glsl_logisticregression_comp_spv_len));
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(
|
||||
params, spirv, kp::Workgroup({ 5 }), std::vector<float>({ 5.0 }));
|
||||
params, spirv, kp::Workgroup({ 5 }), std::vector<float>({ 5.0 }));
|
||||
|
||||
mgr.sequence()->eval<kp::OpTensorSyncDevice>(params);
|
||||
|
||||
std::shared_ptr<kp::Sequence> sq = mgr.sequence()
|
||||
std::shared_ptr<kp::Sequence> sq =
|
||||
mgr.sequence()
|
||||
->record<kp::OpTensorSyncDevice>({ wIn, bIn })
|
||||
->record<kp::OpAlgoDispatch>(algo)
|
||||
->record<kp::OpTensorSyncLocal>({ wOutI, wOutJ, bOut, lOut });
|
||||
|
|
@ -67,4 +70,3 @@ int main()
|
|||
std::cout << "w2: " << wIn->data()[1] << std::endl;
|
||||
std::cout << "b: " << bIn->data()[0] << std::endl;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,15 +1,15 @@
|
|||
#pragma once
|
||||
#include "kompute/shaders/shaderopmult.hpp"
|
||||
#include "kompute/shaders/shaderlogisticregression.hpp"
|
||||
#include "kompute/Core.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
#include "kompute/Algorithm.hpp"
|
||||
#include "kompute/Core.hpp"
|
||||
#include "kompute/Manager.hpp"
|
||||
#include "kompute/Sequence.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
#include "kompute/operations/OpAlgoDispatch.hpp"
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
#include "kompute/operations/OpMemoryBarrier.hpp"
|
||||
#include "kompute/operations/OpMult.hpp"
|
||||
#include "kompute/operations/OpTensorCopy.hpp"
|
||||
#include "kompute/operations/OpTensorSyncDevice.hpp"
|
||||
#include "kompute/operations/OpTensorSyncLocal.hpp"
|
||||
#include "kompute/operations/OpAlgoDispatch.hpp"
|
||||
#include "kompute/operations/OpMult.hpp"
|
||||
#include "kompute/Sequence.hpp"
|
||||
#include "kompute/Manager.hpp"
|
||||
#include "kompute/shaders/shaderlogisticregression.hpp"
|
||||
#include "kompute/shaders/shaderopmult.hpp"
|
||||
|
|
|
|||
1280
single_include/kompute/Kompute.hpp
Executable file → Normal file
1280
single_include/kompute/Kompute.hpp
Executable file → Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -305,7 +305,8 @@ Algorithm::createPipeline()
|
|||
this->mFreePipeline = true;
|
||||
#else
|
||||
vk::Pipeline pipeline =
|
||||
this->mDevice->createComputePipeline(*this->mPipelineCache, pipelineInfo).value;
|
||||
this->mDevice->createComputePipeline(*this->mPipelineCache, pipelineInfo)
|
||||
.value;
|
||||
this->mPipeline = std::make_shared<vk::Pipeline>(pipeline);
|
||||
this->mFreePipeline = true;
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -291,7 +291,7 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices,
|
|||
|
||||
// Getting an integer that says how many vuklan devices we have
|
||||
std::vector<vk::PhysicalDevice> physicalDevices =
|
||||
this->mInstance->enumeratePhysicalDevices();
|
||||
this->mInstance->enumeratePhysicalDevices();
|
||||
uint32_t deviceCount = physicalDevices.size();
|
||||
|
||||
// This means there are no devices at all
|
||||
|
|
|
|||
|
|
@ -345,33 +345,55 @@ class TensorT : public Tensor
|
|||
/**
|
||||
* fmt fromater for kp::Tensor::TensorDataTypes.
|
||||
*/
|
||||
template <> struct fmt::formatter<kp::Tensor::TensorDataTypes>: formatter<std::string> {
|
||||
template <typename FormatContext>
|
||||
auto format(kp::Tensor::TensorDataTypes dt, FormatContext& ctx) {
|
||||
std::string name = "unknown";
|
||||
switch (dt) {
|
||||
case kp::Tensor::TensorDataTypes::eBool: name = "eBool"; break;
|
||||
case kp::Tensor::TensorDataTypes::eDouble: name = "eDouble"; break;
|
||||
case kp::Tensor::TensorDataTypes::eFloat: name = "eFloat"; break;
|
||||
case kp::Tensor::TensorDataTypes::eInt: name = "eInt"; break;
|
||||
case kp::Tensor::TensorDataTypes::eUnsignedInt: name = "eUnsignedInt"; break;
|
||||
template<>
|
||||
struct fmt::formatter<kp::Tensor::TensorDataTypes> : formatter<std::string>
|
||||
{
|
||||
template<typename FormatContext>
|
||||
auto format(kp::Tensor::TensorDataTypes dt, FormatContext& ctx)
|
||||
{
|
||||
std::string name = "unknown";
|
||||
switch (dt) {
|
||||
case kp::Tensor::TensorDataTypes::eBool:
|
||||
name = "eBool";
|
||||
break;
|
||||
case kp::Tensor::TensorDataTypes::eDouble:
|
||||
name = "eDouble";
|
||||
break;
|
||||
case kp::Tensor::TensorDataTypes::eFloat:
|
||||
name = "eFloat";
|
||||
break;
|
||||
case kp::Tensor::TensorDataTypes::eInt:
|
||||
name = "eInt";
|
||||
break;
|
||||
case kp::Tensor::TensorDataTypes::eUnsignedInt:
|
||||
name = "eUnsignedInt";
|
||||
break;
|
||||
}
|
||||
return formatter<std::string>::format(name, ctx);
|
||||
}
|
||||
return formatter<std::string>::format(name, ctx);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* fmt fromater for kp::Tensor::TensorTypes.
|
||||
*/
|
||||
template <> struct fmt::formatter<kp::Tensor::TensorTypes>: formatter<std::string> {
|
||||
template <typename FormatContext>
|
||||
auto format(kp::Tensor::TensorTypes dt, FormatContext& ctx) {
|
||||
std::string name = "unknown";
|
||||
switch (dt) {
|
||||
case kp::Tensor::TensorTypes::eDevice: name = "eDevice"; break;
|
||||
case kp::Tensor::TensorTypes::eHost: name = "eHost"; break;
|
||||
case kp::Tensor::TensorTypes::eStorage: name = "eStorage"; break;
|
||||
template<>
|
||||
struct fmt::formatter<kp::Tensor::TensorTypes> : formatter<std::string>
|
||||
{
|
||||
template<typename FormatContext>
|
||||
auto format(kp::Tensor::TensorTypes dt, FormatContext& ctx)
|
||||
{
|
||||
std::string name = "unknown";
|
||||
switch (dt) {
|
||||
case kp::Tensor::TensorTypes::eDevice:
|
||||
name = "eDevice";
|
||||
break;
|
||||
case kp::Tensor::TensorTypes::eHost:
|
||||
name = "eHost";
|
||||
break;
|
||||
case kp::Tensor::TensorTypes::eStorage:
|
||||
name = "eStorage";
|
||||
break;
|
||||
}
|
||||
return formatter<std::string>::format(name, ctx);
|
||||
}
|
||||
return formatter<std::string>::format(name, ctx);
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,15 +1,15 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
#pragma once
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
#include "kompute/Algorithm.hpp"
|
||||
#include "kompute/Core.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
* Operation that provides a general abstraction that simplifies the use of
|
||||
* Operation that provides a general abstraction that simplifies the use of
|
||||
* algorithm and parameter components which can be used with shaders.
|
||||
* By default it enables the user to provide a dynamic number of tensors
|
||||
* which are then passed as inputs.
|
||||
|
|
@ -17,7 +17,6 @@ namespace kp {
|
|||
class OpAlgoDispatch : public OpBase
|
||||
{
|
||||
public:
|
||||
|
||||
/**
|
||||
* Constructor that stores the algorithm to use as well as the relevant
|
||||
* push constants to override when recording.
|
||||
|
|
@ -27,7 +26,7 @@ class OpAlgoDispatch : public OpBase
|
|||
*/
|
||||
template<typename T = float>
|
||||
OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm,
|
||||
const std::vector<T>& pushConstants = {})
|
||||
const std::vector<T>& pushConstants = {})
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor");
|
||||
|
||||
|
|
@ -76,7 +75,7 @@ class OpAlgoDispatch : public OpBase
|
|||
*/
|
||||
virtual void postEval(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
private:
|
||||
private:
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::shared_ptr<Algorithm> mAlgorithm;
|
||||
void* mPushConstantsData = nullptr;
|
||||
|
|
@ -85,4 +84,3 @@ private:
|
|||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
#pragma once
|
||||
|
||||
#include "kompute/Algorithm.hpp"
|
||||
#include "kompute/Core.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
#include "kompute/Algorithm.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
|
|
@ -18,16 +18,12 @@ namespace kp {
|
|||
class OpBase
|
||||
{
|
||||
public:
|
||||
|
||||
/**
|
||||
* Default destructor for OpBase class. This OpBase destructor class should
|
||||
* always be called to destroy and free owned resources unless it is
|
||||
* intended to destroy the resources in the parent class.
|
||||
*/
|
||||
virtual ~OpBase()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpBase destructor started");
|
||||
}
|
||||
virtual ~OpBase() { KP_LOG_DEBUG("Kompute OpBase destructor started"); }
|
||||
|
||||
/**
|
||||
* The record function is intended to only send a record command or run
|
||||
|
|
@ -39,24 +35,24 @@ class OpBase
|
|||
virtual void record(const vk::CommandBuffer& commandBuffer) = 0;
|
||||
|
||||
/**
|
||||
* Pre eval is called before the Sequence has called eval and submitted the commands to
|
||||
* the GPU for processing, and can be used to perform any per-eval setup steps
|
||||
* required as the computation iteration begins. It's worth noting that
|
||||
* there are situations where eval can be called multiple times, so the
|
||||
* resources that are created should be idempotent in case it's called multiple
|
||||
* times in a row.
|
||||
* Pre eval is called before the Sequence has called eval and submitted the
|
||||
* commands to the GPU for processing, and can be used to perform any
|
||||
* per-eval setup steps required as the computation iteration begins. It's
|
||||
* worth noting that there are situations where eval can be called multiple
|
||||
* times, so the resources that are created should be idempotent in case
|
||||
* it's called multiple times in a row.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
*/
|
||||
virtual void preEval(const vk::CommandBuffer& commandBuffer) = 0;
|
||||
|
||||
/**
|
||||
* Post eval is called after the Sequence has called eval and submitted the commands to
|
||||
* the GPU for processing, and can be used to perform any tear-down steps
|
||||
* required as the computation iteration finishes. It's worth noting that
|
||||
* there are situations where eval can be called multiple times, so the
|
||||
* resources that are destroyed should not require a re-init unless explicitly
|
||||
* provided by the user.
|
||||
* Post eval is called after the Sequence has called eval and submitted the
|
||||
* commands to the GPU for processing, and can be used to perform any
|
||||
* tear-down steps required as the computation iteration finishes. It's
|
||||
* worth noting that there are situations where eval can be called multiple
|
||||
* times, so the resources that are destroyed should not require a re-init
|
||||
* unless explicitly provided by the user.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -1,15 +1,15 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
#pragma once
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
#include "kompute/Algorithm.hpp"
|
||||
#include "kompute/Core.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
* Operation that provides a general abstraction that simplifies the use of
|
||||
* Operation that provides a general abstraction that simplifies the use of
|
||||
* algorithm and parameter components which can be used with shaders.
|
||||
* It exposes the pipeline barrier functionality specifically for memory
|
||||
* barriers that can be configured through the respective source and destination
|
||||
|
|
@ -18,29 +18,32 @@ namespace kp {
|
|||
class OpMemoryBarrier : public OpBase
|
||||
{
|
||||
public:
|
||||
|
||||
/**
|
||||
* Constructor that stores tensors as well as memory barrier parameters to be
|
||||
* used to create a pipeline barrier on the respective primary or staging tensor.
|
||||
* Constructor that stores tensors as well as memory barrier parameters to
|
||||
* be used to create a pipeline barrier on the respective primary or staging
|
||||
* tensor.
|
||||
*
|
||||
* @param tensors The tensors to apply the memory barriers on
|
||||
* @param srcAccessMask The kp::AccessFlagBits for the source access mask
|
||||
* @param dstAccessMask The kp::AccessFlagBits for the destination access mask
|
||||
* @param srcStageMask The kp::PipelineStageFlagBits for the source stage mask
|
||||
* @param dstStageMask The kp::PipelineStageFlagBits for the destination stage mask
|
||||
* @param barrierOnPrimary Boolean to select primary or secondary buffers on tensors
|
||||
* @param dstAccessMask The kp::AccessFlagBits for the destination access
|
||||
* mask
|
||||
* @param srcStageMask The kp::PipelineStageFlagBits for the source stage
|
||||
* mask
|
||||
* @param dstStageMask The kp::PipelineStageFlagBits for the destination
|
||||
* stage mask
|
||||
* @param barrierOnPrimary Boolean to select primary or secondary buffers on
|
||||
* tensors
|
||||
*/
|
||||
OpMemoryBarrier(
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const vk::AccessFlagBits& srcAccessMask,
|
||||
const vk::AccessFlagBits& dstAccessMask,
|
||||
const vk::PipelineStageFlagBits& srcStageMask,
|
||||
const vk::PipelineStageFlagBits& dstStageMask,
|
||||
bool barrierOnPrimary = true);
|
||||
OpMemoryBarrier(const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const vk::AccessFlagBits& srcAccessMask,
|
||||
const vk::AccessFlagBits& dstAccessMask,
|
||||
const vk::PipelineStageFlagBits& srcStageMask,
|
||||
const vk::PipelineStageFlagBits& dstStageMask,
|
||||
bool barrierOnPrimary = true);
|
||||
|
||||
/**
|
||||
* Default destructor, which is in charge of destroying the reference to the tensors
|
||||
* and all the relevant access / stage masks created
|
||||
* Default destructor, which is in charge of destroying the reference to the
|
||||
* tensors and all the relevant access / stage masks created
|
||||
*/
|
||||
virtual ~OpMemoryBarrier() override;
|
||||
|
||||
|
|
@ -66,7 +69,7 @@ class OpMemoryBarrier : public OpBase
|
|||
*/
|
||||
virtual void postEval(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
private:
|
||||
private:
|
||||
const vk::AccessFlagBits mSrcAccessMask;
|
||||
const vk::AccessFlagBits mDstAccessMask;
|
||||
const vk::PipelineStageFlagBits mSrcStageMask;
|
||||
|
|
@ -76,4 +79,3 @@ private:
|
|||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
||||
|
|
|
|||
|
|
@ -21,7 +21,6 @@ namespace kp {
|
|||
class OpMult : public OpAlgoDispatch
|
||||
{
|
||||
public:
|
||||
|
||||
/**
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
* requirements for the operations to be able to create and manage their
|
||||
|
|
@ -31,19 +30,21 @@ class OpMult : public OpAlgoDispatch
|
|||
* @param algorithm An algorithm that will be overridden with the OpMult
|
||||
* shader data and the tensors provided which are expected to be 3
|
||||
*/
|
||||
OpMult(std::vector<std::shared_ptr<Tensor>> tensors, std::shared_ptr<Algorithm> algorithm)
|
||||
: OpAlgoDispatch(algorithm)
|
||||
OpMult(std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
std::shared_ptr<Algorithm> algorithm)
|
||||
: OpAlgoDispatch(algorithm)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpMult constructor with params");
|
||||
|
||||
if (tensors.size() != 3) {
|
||||
throw std::runtime_error("Kompute OpMult expected 3 tensors but got " + tensors.size());
|
||||
throw std::runtime_error(
|
||||
"Kompute OpMult expected 3 tensors but got " + tensors.size());
|
||||
}
|
||||
|
||||
std::vector<uint32_t> spirv(
|
||||
(uint32_t*)shader_data::shaders_glsl_opmult_comp_spv,
|
||||
(uint32_t*)(shader_data::shaders_glsl_opmult_comp_spv +
|
||||
kp::shader_data::shaders_glsl_opmult_comp_spv_len));
|
||||
kp::shader_data::shaders_glsl_opmult_comp_spv_len));
|
||||
|
||||
algorithm->rebuild<>(tensors, spirv);
|
||||
}
|
||||
|
|
@ -52,7 +53,8 @@ class OpMult : public OpAlgoDispatch
|
|||
* Default destructor, which is in charge of destroying the algorithm
|
||||
* components but does not destroy the underlying tensors
|
||||
*/
|
||||
virtual ~OpMult() override {
|
||||
virtual ~OpMult() override
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpMult destructor started");
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -10,30 +10,30 @@
|
|||
namespace kp {
|
||||
|
||||
/**
|
||||
* Operation that copies the data from the first tensor to the rest of the tensors
|
||||
* provided, using a record command for all the vectors. This operation does not
|
||||
* own/manage the memory of the tensors passed to it. The operation must only
|
||||
* receive tensors of type
|
||||
*/
|
||||
* Operation that copies the data from the first tensor to the rest of the
|
||||
* tensors provided, using a record command for all the vectors. This operation
|
||||
* does not own/manage the memory of the tensors passed to it. The operation
|
||||
* must only receive tensors of type
|
||||
*/
|
||||
class OpTensorCopy : public OpBase
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Default constructor with parameters that provides the core vulkan resources
|
||||
* and the tensors that will be used in the operation.
|
||||
* Default constructor with parameters that provides the core vulkan
|
||||
* resources and the tensors that will be used in the operation.
|
||||
*
|
||||
* @param tensors Tensors that will be used to create in operation.
|
||||
*/
|
||||
OpTensorCopy(const std::vector<std::shared_ptr<Tensor>>& tensors);
|
||||
|
||||
/**
|
||||
* Default destructor. This class does not manage memory so it won't be
|
||||
* Default destructor. This class does not manage memory so it won't be
|
||||
* expecting the parent to perform a release.
|
||||
*/
|
||||
~OpTensorCopy() override;
|
||||
|
||||
/**
|
||||
* Records the copy commands from the first tensor into all the other
|
||||
* Records the copy commands from the first tensor into all the other
|
||||
* tensors provided. Also optionally records a barrier.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
|
|
@ -48,7 +48,8 @@ class OpTensorCopy : public OpBase
|
|||
virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
/**
|
||||
* Copies the local vectors for all the tensors to sync the data with the gpu.
|
||||
* Copies the local vectors for all the tensors to sync the data with the
|
||||
* gpu.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
*/
|
||||
|
|
@ -60,4 +61,3 @@ class OpTensorCopy : public OpBase
|
|||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
||||
|
|
|
|||
|
|
@ -2,39 +2,40 @@
|
|||
#pragma once
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
* Operation that syncs tensor's device by mapping local data into the device memory.
|
||||
* For TensorTypes::eDevice it will use a record operation for the memory to be syncd
|
||||
* into GPU memory which means that the operation will be done in sync with GPU commands.
|
||||
* For TensorTypes::eHost it will only map the data into host memory which will
|
||||
* happen during preEval before the recorded commands are dispatched.
|
||||
*/
|
||||
* Operation that syncs tensor's device by mapping local data into the device
|
||||
* memory. For TensorTypes::eDevice it will use a record operation for the
|
||||
* memory to be syncd into GPU memory which means that the operation will be
|
||||
* done in sync with GPU commands. For TensorTypes::eHost it will only map the
|
||||
* data into host memory which will happen during preEval before the recorded
|
||||
* commands are dispatched.
|
||||
*/
|
||||
class OpTensorSyncDevice : public OpBase
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Default constructor with parameters that provides the core vulkan resources
|
||||
* and the tensors that will be used in the operation. The tensos provided cannot
|
||||
* be of type TensorTypes::eStorage.
|
||||
* Default constructor with parameters that provides the core vulkan
|
||||
* resources and the tensors that will be used in the operation. The tensos
|
||||
* provided cannot be of type TensorTypes::eStorage.
|
||||
*
|
||||
* @param tensors Tensors that will be used to create in operation.
|
||||
*/
|
||||
OpTensorSyncDevice(const std::vector<std::shared_ptr<Tensor>>& tensors);
|
||||
|
||||
/**
|
||||
* Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
|
||||
* Default destructor. This class does not manage memory so it won't be
|
||||
* expecting the parent to perform a release.
|
||||
*/
|
||||
~OpTensorSyncDevice() override;
|
||||
|
||||
/**
|
||||
* For device tensors, it records the copy command for the tensor to copy the
|
||||
* data from its staging to device memory.
|
||||
* For device tensors, it records the copy command for the tensor to copy
|
||||
* the data from its staging to device memory.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
*/
|
||||
|
|
@ -60,5 +61,3 @@ class OpTensorSyncDevice : public OpBase
|
|||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -10,34 +10,34 @@
|
|||
namespace kp {
|
||||
|
||||
/**
|
||||
* Operation that syncs tensor's local memory by mapping device data into the
|
||||
* local CPU memory. For TensorTypes::eDevice it will use a record operation
|
||||
* for the memory to be syncd into GPU memory which means that the operation
|
||||
* will be done in sync with GPU commands. For TensorTypes::eHost it will
|
||||
* only map the data into host memory which will happen during preEval before
|
||||
* Operation that syncs tensor's local memory by mapping device data into the
|
||||
* local CPU memory. For TensorTypes::eDevice it will use a record operation
|
||||
* for the memory to be syncd into GPU memory which means that the operation
|
||||
* will be done in sync with GPU commands. For TensorTypes::eHost it will
|
||||
* only map the data into host memory which will happen during preEval before
|
||||
* the recorded commands are dispatched.
|
||||
*/
|
||||
*/
|
||||
class OpTensorSyncLocal : public OpBase
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Default constructor with parameters that provides the core vulkan resources
|
||||
* and the tensors that will be used in the operation. The tensors provided
|
||||
* cannot be of type TensorTypes::eStorage.
|
||||
* Default constructor with parameters that provides the core vulkan
|
||||
* resources and the tensors that will be used in the operation. The tensors
|
||||
* provided cannot be of type TensorTypes::eStorage.
|
||||
*
|
||||
* @param tensors Tensors that will be used to create in operation.
|
||||
*/
|
||||
OpTensorSyncLocal(const std::vector<std::shared_ptr<Tensor>>& tensors);
|
||||
|
||||
/**
|
||||
* Default destructor. This class does not manage memory so it won't be expecting
|
||||
* the parent to perform a release.
|
||||
* Default destructor. This class does not manage memory so it won't be
|
||||
* expecting the parent to perform a release.
|
||||
*/
|
||||
~OpTensorSyncLocal() override;
|
||||
|
||||
/**
|
||||
* For device tensors, it records the copy command for the tensor to copy the
|
||||
* data from its device to staging memory.
|
||||
* For device tensors, it records the copy command for the tensor to copy
|
||||
* the data from its device to staging memory.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
*/
|
||||
|
|
@ -51,19 +51,16 @@ class OpTensorSyncLocal : public OpBase
|
|||
virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
/**
|
||||
* For host tensors it performs the map command from the host memory into local memory.
|
||||
* For host tensors it performs the map command from the host memory into
|
||||
* local memory.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
*/
|
||||
virtual void postEval(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
|
||||
private:
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::vector<std::shared_ptr<Tensor>> mTensors;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -27,16 +27,15 @@ TEST(TestDestroy, TestDestroyTensorSingle)
|
|||
{
|
||||
kp::Manager mgr;
|
||||
|
||||
const std::vector<float> initialValues = {0.0f, 0.0f, 0.0f};
|
||||
const std::vector<float> initialValues = { 0.0f, 0.0f, 0.0f };
|
||||
|
||||
tensorA = mgr.tensor(initialValues);
|
||||
|
||||
std::shared_ptr<kp::Algorithm> algo =
|
||||
mgr.algorithm({tensorA}, spirv);
|
||||
mgr.algorithm({ tensorA }, spirv);
|
||||
|
||||
// Sync values to and from device
|
||||
mgr.sequence()
|
||||
->eval<kp::OpTensorSyncDevice>(algo->getTensors());
|
||||
mgr.sequence()->eval<kp::OpTensorSyncDevice>(algo->getTensors());
|
||||
|
||||
EXPECT_EQ(tensorA->vector(), initialValues);
|
||||
|
||||
|
|
@ -45,7 +44,7 @@ TEST(TestDestroy, TestDestroyTensorSingle)
|
|||
->eval()
|
||||
->eval<kp::OpTensorSyncLocal>(algo->getTensors());
|
||||
|
||||
const std::vector<float> expectedFinalValues = {1.0f, 1.0f, 1.0f};
|
||||
const std::vector<float> expectedFinalValues = { 1.0f, 1.0f, 1.0f };
|
||||
EXPECT_EQ(tensorA->vector(), expectedFinalValues);
|
||||
|
||||
tensorA->destroy();
|
||||
|
|
|
|||
|
|
@ -2,31 +2,34 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <fstream>
|
||||
|
||||
/**
|
||||
* Compile a single glslang source from string value. This is only meant
|
||||
* to be used for testing as it's non threadsafe, and it had to be removed
|
||||
* from the glslang dependency and now can only run the CLI directly due to
|
||||
* from the glslang dependency and now can only run the CLI directly due to
|
||||
* license issues: see https://github.com/KomputeProject/kompute/pull/235
|
||||
*
|
||||
* @param source An individual raw glsl shader in string format
|
||||
* @return The compiled SPIR-V binary in unsigned int32 format
|
||||
*/
|
||||
static
|
||||
std::vector<uint32_t>
|
||||
compileSource(
|
||||
const std::string& source)
|
||||
static std::vector<uint32_t>
|
||||
compileSource(const std::string& source)
|
||||
{
|
||||
std::ofstream fileOut("tmp_kp_shader.comp");
|
||||
fileOut << source;
|
||||
fileOut.close();
|
||||
if (system(std::string("glslangValidator -V tmp_kp_shader.comp -o tmp_kp_shader.comp.spv").c_str()))
|
||||
fileOut << source;
|
||||
fileOut.close();
|
||||
if (system(
|
||||
std::string(
|
||||
"glslangValidator -V tmp_kp_shader.comp -o tmp_kp_shader.comp.spv")
|
||||
.c_str()))
|
||||
throw std::runtime_error("Error running glslangValidator command");
|
||||
std::ifstream fileStream("tmp_kp_shader.comp.spv", std::ios::binary);
|
||||
std::vector<char> buffer;
|
||||
buffer.insert(buffer.begin(), std::istreambuf_iterator<char>(fileStream), {});
|
||||
return {(uint32_t*)buffer.data(), (uint32_t*)(buffer.data() + buffer.size())};
|
||||
buffer.insert(
|
||||
buffer.begin(), std::istreambuf_iterator<char>(fileStream), {});
|
||||
return { (uint32_t*)buffer.data(),
|
||||
(uint32_t*)(buffer.data() + buffer.size()) };
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue