Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 76 additions & 55 deletions csrc/layers/convolutional_layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ class Conv2D : public Layer {
int in_channels;
int out_channels;
int kernel_size;
int stride;
int padding;
int stride = 1;
int padding = 0;
int seed = -1;
std::string technique = constant::HE;
std::string mode = constant::NORMAL;
Expand All @@ -29,7 +29,11 @@ class Conv2D : public Layer {

// Create the RandomNumberGenerator
RandomNumberGenerator rng(
this->technique, this->mode, this->in_channels, this->out_channels, seed_to_use);
this->technique,
this->mode,
this->in_channels,
this->out_channels,
seed_to_use);
for (int oc = 0; oc < out_channels; ++oc) {
for (int ic = 0; ic < in_channels; ++ic) {
for (int kh = 0; kh < kernel_size; ++kh) {
Expand All @@ -44,12 +48,18 @@ class Conv2D : public Layer {
}

public:
Conv2D(int in_channels, int out_channels, int kernel_size)
: in_channels(in_channels),
out_channels(out_channels),
kernel_size(kernel_size) {
_initialize();
}
Conv2D(
int in_channels,
int out_channels,
int kernel_size,
int stride = 1,
int padding = 0)
int stride,
int padding)
: in_channels(in_channels),
out_channels(out_channels),
kernel_size(kernel_size),
Expand Down Expand Up @@ -90,39 +100,39 @@ class Conv2D : public Layer {
std::shared_ptr<Tensor> call(std::shared_ptr<Tensor> input, bool using_cuda)
override {
auto input_shape = input->shape; // [batch_size, in_channels, height, width]
int batch_size = input_shape[0];
int height = input_shape[2];
int width = input_shape[3];
// -- no batch for now
// int batch = input_shape[0];
int in_channel = input_shape[0];
int height = input_shape[1];
int width = input_shape[2];

// Compute output dimensions
int output_height = (height - kernel_size + 2 * padding) / stride + 1;
int output_width = (width - kernel_size + 2 * padding) / stride + 1;

// Output tensor
auto output = std::make_shared<Tensor>(std::vector<int>{
batch_size, out_channels, output_height, output_width});

for (int b = 0; b < batch_size; ++b) {
for (int oc = 0; oc < out_channels; ++oc) {
for (int oh = 0; oh < output_height; ++oh) {
for (int ow = 0; ow < output_width; ++ow) {
// Compute the dot product of the kernel and the input patch
double result = 0.0;
for (int ic = 0; ic < in_channels; ++ic) {
for (int kh = 0; kh < kernel_size; ++kh) {
for (int kw = 0; kw < kernel_size; ++kw) {
int ih = oh * stride + kh - padding;
int iw = ow * stride + kw - padding;
if (ih >= 0 && ih < height && iw >= 0 && iw < width) {
result += input->get({b, ic, ih, iw})->data *
weights->get({oc, ic, kh, kw})->data;
}
auto output = std::make_shared<Tensor>(
std::vector<int>{out_channels, output_height, output_width});

for (int oc = 0; oc < out_channels; ++oc) {
for (int oh = 0; oh < output_height; ++oh) {
for (int ow = 0; ow < output_width; ++ow) {
// Compute the dot product of the kernel and the input patch
double result = 0.0;
for (int ic = 0; ic < in_channels; ++ic) {
for (int kh = 0; kh < kernel_size; ++kh) {
for (int kw = 0; kw < kernel_size; ++kw) {
int ih = oh * stride + kh - padding;
int iw = ow * stride + kw - padding;
if (ih >= 0 && ih < height && iw >= 0 && iw < width) {
result += input->get({ic, ih, iw})->data *
weights->get({oc, ic, kh, kw})->data;
}
}
}
result += bias->get(oc)->data; // Add bias
output->set({b, oc, oh, ow}, std::make_shared<Value>(result));
}
result += bias->get(oc)->data; // Add bias
output->set({oc, oh, ow}, std::make_shared<Value>(result));
}
}
}
Expand All @@ -139,56 +149,67 @@ class Conv2D : public Layer {
}

void zero_grad() override {
weights->zero_grad();
bias->zero_grad();
this->weights->zero_grad();
this->bias->zero_grad();
}

std::vector<std::shared_ptr<Value>> parameters() override {
std::vector<std::shared_ptr<Value>> out;
for (int i = 0; i <= this->weights->maxIdx; i++) {
out.push_back(this->weights->get(i));
}
for (int i = 0; i <= this->bias->maxIdx; i++) {
out.push_back(this->bias->get(i));
}
return out;
}
};

class MaxPooling2D : public Layer {
private:
int pool_size;
int stride;
int stride = 1;

public:
MaxPooling2D(int pool_size, int stride = 1)
MaxPooling2D(int pool_size) : pool_size(pool_size) {}
MaxPooling2D(int pool_size, int stride)
: pool_size(pool_size), stride(stride) {}

std::shared_ptr<Tensor> call(std::shared_ptr<Tensor> input, bool using_cuda)
override {
auto input_shape = input->shape; // [batch_size, channels, height, width]
int batch_size = input_shape[0];
int channels = input_shape[1];
int height = input_shape[2];
int width = input_shape[3];
auto input_shape = input->shape; // [batch_size, channels, height, width] --
// no batch for now
// int batch_size = input_shape[0];
int channels = input_shape[0];
int height = input_shape[1];
int width = input_shape[2];

// Compute output dimensions
int output_height = (height - pool_size) / stride + 1;
int output_width = (width - pool_size) / stride + 1;

// Output tensor
auto output = std::make_shared<Tensor>(
std::vector<int>{batch_size, channels, output_height, output_width});

for (int b = 0; b < batch_size; ++b) {
for (int c = 0; c < channels; ++c) {
for (int oh = 0; oh < output_height; ++oh) {
for (int ow = 0; ow < output_width; ++ow) {
std::shared_ptr<Value> max_val = std::make_shared<Value>(
-std::numeric_limits<double>::infinity());
for (int ph = 0; ph < pool_size; ++ph) {
for (int pw = 0; pw < pool_size; ++pw) {
int ih = oh * stride + ph;
int iw = ow * stride + pw;
if (ih < height && iw < width) {
std::shared_ptr<Value> curr_val = input->get({b, c, ih, iw});
if (max_val->data < curr_val->data) {
max_val = curr_val;
}
std::vector<int>{channels, output_height, output_width});

for (int c = 0; c < channels; ++c) {
for (int oh = 0; oh < output_height; ++oh) {
for (int ow = 0; ow < output_width; ++ow) {
std::shared_ptr<Value> max_val =
std::make_shared<Value>(-std::numeric_limits<double>::infinity());
for (int ph = 0; ph < pool_size; ++ph) {
for (int pw = 0; pw < pool_size; ++pw) {
int ih = oh * stride + ph;
int iw = ow * stride + pw;
if (ih < height && iw < width) {
std::shared_ptr<Value> curr_val = input->get({c, ih, iw});
if (max_val->data < curr_val->data) {
max_val = curr_val;
}
}
}
output->set({b, c, oh, ow}, max_val);
}
output->set({c, oh, ow}, max_val);
}
}
}
Expand Down
19 changes: 19 additions & 0 deletions csrc/layers/flatten.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#pragma once
#include <memory>
#include <string>
#include "../neural_network.h"
#include "../tensor.h"

class Flatten : public Layer {
public:
std::shared_ptr<Tensor> call(std::shared_ptr<Tensor> input, bool using_cuda)
override {
return input->flatten();
}

std::string printMe() override {
return "Flatten()";
}

void zero_grad() override {};
};
30 changes: 0 additions & 30 deletions csrc/layers/non_linear_layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,6 @@ class ReLu : public Layer {
}

void zero_grad() override {};

std::vector<std::shared_ptr<Value>> parameters() override {
// no parameters
return std::vector<std::shared_ptr<Value>>{};
}
};

class GeLu : public Layer {
Expand All @@ -33,11 +28,6 @@ class GeLu : public Layer {
}

void zero_grad() override {};

std::vector<std::shared_ptr<Value>> parameters() override {
// no parameters
return std::vector<std::shared_ptr<Value>>{};
}
};

class Tanh : public Layer {
Expand All @@ -52,11 +42,6 @@ class Tanh : public Layer {
}

void zero_grad() override {};

std::vector<std::shared_ptr<Value>> parameters() override {
// no parameters
return std::vector<std::shared_ptr<Value>>{};
}
};

class Sigmoid : public Layer {
Expand All @@ -71,11 +56,6 @@ class Sigmoid : public Layer {
}

void zero_grad() override {};

std::vector<std::shared_ptr<Value>> parameters() override {
// no parameters
return std::vector<std::shared_ptr<Value>>{};
}
};

class LeakyReLu : public Layer {
Expand All @@ -92,11 +72,6 @@ class LeakyReLu : public Layer {
}

void zero_grad() override {};

std::vector<std::shared_ptr<Value>> parameters() override {
// no parameters
return std::vector<std::shared_ptr<Value>>{};
}
};

class SoftMax : public Layer {
Expand All @@ -111,9 +86,4 @@ class SoftMax : public Layer {
}

void zero_grad() override {};

std::vector<std::shared_ptr<Value>> parameters() override {
// no parameters
return std::vector<std::shared_ptr<Value>>{};
}
};
26 changes: 26 additions & 0 deletions csrc/main.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include "layers/convolutional_layer.h"
#include "layers/feed_forward_layer.h"
#include "layers/flatten.h"
#include "layers/non_linear_layer.h"
#include "loss.h"
#include "neural_network.h"
Expand Down Expand Up @@ -145,6 +147,30 @@ PYBIND11_MODULE(_core, m) {
.def("__call__", &FeedForwardLayer::call)
.def("__repr__", &FeedForwardLayer::printMe);

py::class_<Conv2D, Layer, std::shared_ptr<Conv2D>>(m, "Conv2D")
.def(py::init<int, int, int>())
.def(py::init<int, int, int, int, int>())
.def(py::init<int, int, int, int, int, int, std::string, std::string>())
.def("zero_grad", &Conv2D::zero_grad)
.def("parameters", &Conv2D::parameters)
.def("__call__", &Conv2D::call)
.def("__repr__", &Conv2D::printMe);

py::class_<MaxPooling2D, Layer, std::shared_ptr<MaxPooling2D>>(
m, "MaxPooling2D")
.def(py::init<int>())
.def(py::init<int, int>())
.def("zero_grad", &MaxPooling2D::zero_grad)
.def("parameters", &MaxPooling2D::parameters)
.def("__call__", &MaxPooling2D::call)
.def("__repr__", &MaxPooling2D::printMe);

py::class_<Flatten, Layer, std::shared_ptr<Flatten>>(m, "Flatten")
.def("zero_grad", &Flatten::zero_grad)
.def("parameters", &Flatten::parameters)
.def("__call__", &Flatten::call)
.def("__repr__", &Flatten::printMe);

py::class_<ReLu, Layer, std::shared_ptr<ReLu>>(m, "ReLu")
.def(py::init<>())
.def("zero_grad", &ReLu::zero_grad)
Expand Down
5 changes: 4 additions & 1 deletion csrc/neural_network.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@ class Layer {

virtual std::string printMe() = 0;

virtual std::vector<std::shared_ptr<Value>> parameters() = 0;
virtual std::vector<std::shared_ptr<Value>> parameters() {
// no parameters
return std::vector<std::shared_ptr<Value>>{};
}

virtual void zero_grad() = 0;
};
Expand Down
12 changes: 11 additions & 1 deletion csrc/tensor.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#pragma once
#include <iostream>
#include <memory>
#include <stdexcept>
#include <string>
Expand Down Expand Up @@ -330,4 +329,15 @@ class Tensor : public std::enable_shared_from_this<Tensor> {
std::string my_shape = "tensor of shape: " + tensor_shape_str();
return my_shape;
}

std::shared_ptr<Tensor> flatten() {
std::shared_ptr<Tensor> out =
std::make_shared<Tensor>(std::vector<int>{maxIdx + 1});
int i = 0;
for (auto& e : this->v) {
out->set(i, e);
i++;
}
return out;
}
};
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ build-backend = "scikit_build_core.build"
name = "deeptensor"
version = "0.4.0" # new api
url = "https://github.com/deependujha/deeptensor"
description = "A minimal PyTorch-like **deep learning library** focused on custom autograd and efficient tensor operations."
description = "A minimal PyTorch-like deep learning library focused on custom autograd and efficient tensor operations."
readme = "README.md"
authors = [
{ name = "Deependu Jha", email = "deependujha21@gmail.com" },
Expand Down
Loading
Loading