Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@ option(CMAKE_EXPORT_COMPILE_COMMANDS "Generate compile_commands.json" ON) # for
# Option to enable debug logging
option(ENABLE_DEBUG "Enable debug logging" OFF)

# Enable AddressSanitizer for Debug builds
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
message(STATUS "Enabling AddressSanitizer (ASan)")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
set(CMAKE_LINKER_FLAGS "${CMAKE_LINKER_FLAGS} -fsanitize=address")
endif()

# Add a preprocessor definition based on the ENABLE_DEBUG option
if (ENABLE_DEBUG)
add_compile_definitions(DEBUG)
Expand Down
2 changes: 1 addition & 1 deletion csrc/layers/feed_forward_layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ class FeedForwardLayer : public Layer {
}

std::string printMe() override {
std::string s = "Layer(" + std::to_string(this->nin) + "," +
std::string s = "FeedForwardLayer(" + std::to_string(this->nin) + "," +
std::to_string(this->nout) + ")";
return s;
}
Expand Down
72 changes: 54 additions & 18 deletions csrc/loss.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "loss.h"
#include <memory>
#include <stdexcept>
#include <string>
#include "value.h"

std::shared_ptr<Value> mean_squared_error(
Expand All @@ -25,26 +26,61 @@ std::shared_ptr<Value> mean_squared_error(
return out->div(n + 1);
}

std::shared_ptr<Value> cross_entropy(std::shared_ptr<Tensor> logits, std::shared_ptr<Tensor> actual){
if (logits->shape != actual->shape) {
std::string x_shape_str = logits->tensor_shape_str();
std::string y_shape_str = actual->tensor_shape_str();
std::string error_string =
"Shapes of the two tensors for computing cross_entropy don't match: tensor-1 shape (" +
x_shape_str + ") vs tensor-1 shape(" + y_shape_str + ")\n";
throw std::runtime_error(error_string);
std::shared_ptr<Value> cross_entropy(
std::shared_ptr<Tensor> logits,
int actualIdx) {
if (actualIdx < 0) {
throw std::runtime_error(
"Expected Idx can't be smaller than 0. Got: " +
std::to_string(actualIdx));
}
if (logits->shape.size() != 1 || logits->shape[0] < actualIdx) {
throw std::runtime_error(
"logits must be a one-dimensional tensor. And actualIdx must be smaller than logits size. Got: logits shape =>" +
logits->tensor_shape_str() +
", and expectedIdx: " + std::to_string(actualIdx));
}
// compute softmax of logits
std::shared_ptr<Tensor> logits_softmax = logits->softmax();

std::shared_ptr logits_ln = logits_softmax->get(actualIdx)->ln();

return logits_ln->mul(-1); // not averaging it
}

std::shared_ptr<Value> binary_cross_entropy(
std::shared_ptr<Tensor> logits,
int actualIdx) {
if (actualIdx < 0 || actualIdx > 1) {
throw std::runtime_error(
"Expected Idx can't be smaller than 0 or greater than 1. Got: " +
std::to_string(actualIdx));
}
// compute softmax of logits
std::shared_ptr<Value> out = std::make_shared<Value>(0.0);
std::shared_ptr<Tensor> logits_softmax = logits->softmax();
if (logits->shape.size() != 1) {
throw std::runtime_error(
"logits must be a one-dimensional tensor.. Got: logits shape =>" +
logits->tensor_shape_str());
}
std::shared_ptr<Value> logit_value = logits->get(0);

int n = logits->maxIdx;
for(int i=0;i<=n;i++){
std::shared_ptr logits_ln = logits_softmax->get(i)->ln();
std::shared_ptr<Value> pro_log = actual->get(i)->mul(logits_ln); // product of log
std::shared_ptr<Value> updated_logit_value = logit_value;
if (actualIdx == 0) {
updated_logit_value = std::make_shared<Value>(1.0)->sub(logit_value);
}

out = out->add(pro_log);
}
if (updated_logit_value->data < 0 || updated_logit_value->data > 1) {
throw std::runtime_error(
"logit value can't be less than 0, and more than 1. Got: " +
std::to_string(logit_value->data));
}

constexpr double EPSILION = 1e-6;
if (updated_logit_value->data <= 0.0) {
updated_logit_value->data = EPSILION; // Handle near-zero values
} else if (updated_logit_value->data >= 1.0) {
updated_logit_value->data = 1.0 - EPSILION; // Handle near-one values
}

return out->mul(-1); // not averaging it
std::shared_ptr logits_ln = updated_logit_value->ln();
return logits_ln->mul(-1);
}
16 changes: 12 additions & 4 deletions csrc/loss.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
#pragma once
#include "value.h"
#include "tensor.h"
#include <memory>
#include "tensor.h"
#include "value.h"

std::shared_ptr<Value> mean_squared_error(
std::shared_ptr<Tensor> x,
std::shared_ptr<Tensor> y);

std::shared_ptr<Value> mean_squared_error(std::shared_ptr<Tensor> x, std::shared_ptr<Tensor> y);
std::shared_ptr<Value> cross_entropy(
std::shared_ptr<Tensor> logits,
int actualIdx);

std::shared_ptr<Value> cross_entropy(std::shared_ptr<Tensor> logits, std::shared_ptr<Tensor> actual);
std::shared_ptr<Value> binary_cross_entropy(
std::shared_ptr<Tensor> logits,
int actualIdx);
13 changes: 11 additions & 2 deletions csrc/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -109,11 +109,16 @@ PYBIND11_MODULE(_core, m) {
"get",
static_cast<std::shared_ptr<Value> (Tensor::*)(std::vector<int>)>(
&Tensor::get))
.def_readonly("shape", &Tensor::shape)
.def_readonly("strides", &Tensor::strides)
.def_readonly("maxIdx", &Tensor::maxIdx)
.def_readonly("minIdx", &Tensor::minIdx)
.def_readonly("vals", &Tensor::v)
.def("normalize_idx", &Tensor::normalize_idx)
.def("backward", &Tensor::backward)
.def("zero_grad", &Tensor::zero_grad)
.def("add", &Tensor::add)
.def("div", &Tensor::div)
.def("__add__", &Tensor::add)
.def("__truediv__", &Tensor::div)
.def("matmul", &Tensor::matmul)
.def("relu", &Tensor::relu)
.def("gelu", &Tensor::gelu)
Expand Down Expand Up @@ -241,4 +246,8 @@ PYBIND11_MODULE(_core, m) {
"cross_entropy",
&cross_entropy,
"A function that value object with cross_entropy applied");
m.def(
"binary_cross_entropy",
&binary_cross_entropy,
"A function that value object with cross_entropy applied");
}
51 changes: 35 additions & 16 deletions csrc/tensor.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#pragma once
#include <iostream>
#include <memory>
#include <stdexcept>
#include <string>
Expand All @@ -10,8 +11,8 @@ class Tensor : public std::enable_shared_from_this<Tensor> {
std::vector<int> shape;
std::vector<int> strides; // jump each index needs to make
std::vector<std::shared_ptr<Value>> v;
int maxIdx;
int minIdx;
int maxIdx = 0;
int minIdx = 0;

Tensor(std::vector<int> shape) : shape(std::move(shape)) {
int total_size = 1;
Expand All @@ -20,18 +21,23 @@ class Tensor : public std::enable_shared_from_this<Tensor> {
}
v.resize(total_size);

strides.resize(this->shape.size());
strides.back() = 1;
this->compute_stride();
}

void compute_stride() {
this->strides.clear();
this->strides.resize(this->shape.size());
this->strides.back() = 1;
for (int i = int(this->shape.size()) - 2; i >= 0; --i) {
strides[i] = strides[i + 1] * this->shape[i + 1];
this->strides[i] = this->strides[i + 1] * this->shape[i + 1];
}

this->minIdx = 0;
this->maxIdx = 1;
for (auto& e : this->shape) {
this->maxIdx *= e;
}
maxIdx--; // 1 less
this->maxIdx--; // 1 less
}

std::string tensor_shape_str() {
Expand All @@ -46,7 +52,8 @@ class Tensor : public std::enable_shared_from_this<Tensor> {
void set(std::vector<int> idx, std::shared_ptr<Value> _v) {
int original_idx = normalize_idx(idx);
if ((original_idx < this->minIdx) || (original_idx > this->maxIdx)) {
std::string error_msg = "Index must be in the range. Limit (" +
std::string error_msg =
"Tensor set method: Index must be in the range. Limit (" +
std::to_string(this->minIdx) + "," + std::to_string(this->maxIdx) +
"), but found: " + std::to_string(original_idx) + ".";

Expand All @@ -58,7 +65,8 @@ class Tensor : public std::enable_shared_from_this<Tensor> {
std::shared_ptr<Value> get(std::vector<int> idx) {
int original_idx = normalize_idx(idx);
if ((original_idx < this->minIdx) || (original_idx > this->maxIdx)) {
std::string error_msg = "Index must be in the range. Limit (" +
std::string error_msg =
"Tensor get method: Index must be in the range. Limit (" +
std::to_string(this->minIdx) + "," + std::to_string(this->maxIdx) +
"), but found: " + std::to_string(original_idx) + ".";

Expand All @@ -70,7 +78,8 @@ class Tensor : public std::enable_shared_from_this<Tensor> {
// real index
void set(int idx, std::shared_ptr<Value> _v) {
if ((idx < this->minIdx) || (idx > this->maxIdx)) {
std::string error_msg = "Index must be in the range. Limit (" +
std::string error_msg =
"Tensor set method: Index must be in the range. Limit (" +
std::to_string(this->minIdx) + "," + std::to_string(this->maxIdx) +
"), but found: " + std::to_string(idx) + ".";

Expand All @@ -82,7 +91,8 @@ class Tensor : public std::enable_shared_from_this<Tensor> {
// real index
std::shared_ptr<Value> get(int idx) {
if ((idx < this->minIdx) || (idx > this->maxIdx)) {
std::string error_msg = "Index must be in the range. Limit (" +
std::string error_msg =
"Tensor get method: Index must be in the range. Limit (" +
std::to_string(this->minIdx) + "," + std::to_string(this->maxIdx) +
"), but found: " + std::to_string(idx) + ".";

Expand Down Expand Up @@ -122,6 +132,7 @@ class Tensor : public std::enable_shared_from_this<Tensor> {
new_shape.push_back(1);
}
t->shape = new_shape;
t->compute_stride();
}

std::shared_ptr<Tensor> add(std::shared_ptr<Tensor> other) {
Expand Down Expand Up @@ -174,23 +185,31 @@ class Tensor : public std::enable_shared_from_this<Tensor> {
throw std::runtime_error("Cannot perform matmul with a null tensor.");
}

if (this->shape.size() > 2 || other->shape.size() > 2) {
throw std::runtime_error("For now, only 2-D matmul is allowed");
}

// Determine effective shapes
std::vector<int> this_shape = this->shape;
std::vector<int> other_shape = other->shape;

// Reshape if either is a vector (1D tensor)
if (this_shape.size() == 1) {
this_shape.insert(this_shape.begin(), 1); // Treat as row vector
std::vector<int> new_shape = {1, this_shape[0]};
this->shape = new_shape;
this->compute_stride();
this_shape = new_shape;
}
if (other_shape.size() == 1) {
other_shape.push_back(1); // Treat as column vector
other->shape.push_back(1);
// other_shape.push_back(1); // Treat as column vector
// other->shape.push_back(1);
// this->recompute_stride();

throw std::runtime_error("other tensor can't be 1D for matmul.");
}

// Validate dimensions for matrix multiplication
if (this_shape[1] != other_shape[0]) {
if (this->shape[1] != other_shape[0]) {
throw std::runtime_error(
"Dimensions do not align for matmul. Got shapes: (" +
std::to_string(this_shape[0]) + ", " + std::to_string(this_shape[1]) +
Expand All @@ -199,8 +218,8 @@ class Tensor : public std::enable_shared_from_this<Tensor> {
}

// Compute output shape
std::vector<int> output_shape = {this_shape[0], other_shape[1]};
auto out = std::make_shared<Tensor>(output_shape);
std::vector<int> output_shape = {this_shape[0], other->shape[1]};
std::shared_ptr<Tensor> out = std::make_shared<Tensor>(output_shape);

// Perform matrix multiplication
for (int i = 0; i < output_shape[0]; i++) {
Expand Down
5 changes: 2 additions & 3 deletions csrc/value.cc
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#include "value.h"
#include <cassert>
#include <cmath>
#include <iostream>
#include <memory>
#include <stdexcept>
#include <string>
Expand Down Expand Up @@ -43,9 +42,9 @@ void Value::backward() {
this->grad = 1.0;

// Iterating the vector in reverse order
std::cout << "topo list: \n";
// std::cout << "topo list: \n";
for (int i = int(topo_list.size()) - 1; i >= 0; i--) {
std::cout << "i: " << i << "; node: " << topo_list[i]->printMe() << "\n";
// std::cout << "i: " << i << "; node: " << topo_list[i]->printMe() << "\n";
topo_list[i]->executeBackWardMethod();
}
}
Expand Down
Loading
Loading