Skip to content

Commit a25ad98

Browse files
committed
Create regression_models_multivariate_cv.cpp
1 parent 604bb21 commit a25ad98

File tree

1 file changed

+266
-0
lines changed

1 file changed

+266
-0
lines changed
Lines changed: 266 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
#include <iostream>
2+
#include <vector>
3+
#include <cmath>
4+
#include <random>
5+
#include <fstream>
6+
#include <numeric>
7+
#include <Eigen/Dense>
8+
#include <algorithm>
9+
10+
using namespace std;
11+
using namespace Eigen;
12+
13+
// Utility functions
14+
double mean_squared_error(const VectorXd& y_true, const VectorXd& y_pred) {
15+
return (y_true - y_pred).squaredNorm() / y_true.size();
16+
}
17+
18+
double r2_score(const VectorXd& y_true, const VectorXd& y_pred) {
19+
double mean_y = y_true.mean();
20+
double total = (y_true.array() - mean_y).square().sum();
21+
double residual = (y_true - y_pred).squaredNorm();
22+
return 1.0 - residual / total;
23+
}
24+
25+
void save_csv(const string& filename, const MatrixXd& X, const VectorXd& y_true, const VectorXd& y_pred) {
26+
ofstream file(filename);
27+
file << "X1,X2,...,True Y,Predicted Y\n";
28+
for (int i = 0; i < X.rows(); ++i) {
29+
for (int j = 0; j < X.cols(); ++j)
30+
file << X(i, j) << (j == X.cols() - 1 ? "," : ",");
31+
file << y_true(i) << "," << y_pred(i) << "\n";
32+
}
33+
file.close();
34+
}
35+
36+
// k-fold cross-validation
37+
void cross_validate(const MatrixXd& X, const VectorXd& y, int k,
38+
function<void(const MatrixXd&, const VectorXd&)> fit_func,
39+
function<VectorXd(const MatrixXd&)> predict_func,
40+
double& avg_mse, double& avg_r2) {
41+
int n = X.rows();
42+
vector<int> indices(n);
43+
iota(indices.begin(), indices.end(), 0);
44+
random_shuffle(indices.begin(), indices.end());
45+
46+
avg_mse = 0.0;
47+
avg_r2 = 0.0;
48+
for (int i = 0; i < k; ++i) {
49+
int start = i * n / k;
50+
int end = (i + 1) * n / k;
51+
52+
vector<int> test_idx(indices.begin() + start, indices.begin() + end);
53+
vector<int> train_idx;
54+
for (int j = 0; j < n; ++j) {
55+
if (j < start || j >= end)
56+
train_idx.push_back(indices[j]);
57+
}
58+
59+
MatrixXd X_train(train_idx.size(), X.cols());
60+
VectorXd y_train(train_idx.size());
61+
for (int j = 0; j < train_idx.size(); ++j) {
62+
X_train.row(j) = X.row(train_idx[j]);
63+
y_train(j) = y(train_idx[j]);
64+
}
65+
66+
MatrixXd X_test(test_idx.size(), X.cols());
67+
VectorXd y_test(test_idx.size());
68+
for (int j = 0; j < test_idx.size(); ++j) {
69+
X_test.row(j) = X.row(test_idx[j]);
70+
y_test(j) = y(test_idx[j]);
71+
}
72+
73+
fit_func(X_train, y_train);
74+
VectorXd y_pred = predict_func(X_test);
75+
avg_mse += mean_squared_error(y_test, y_pred);
76+
avg_r2 += r2_score(y_test, y_pred);
77+
}
78+
79+
avg_mse /= k;
80+
avg_r2 /= k;
81+
}
82+
83+
// Linear Regression
84+
class LinearRegression {
85+
public:
86+
VectorXd weights;
87+
88+
void fit(const MatrixXd& X, const VectorXd& y) {
89+
MatrixXd X_bias(X.rows(), X.cols() + 1);
90+
X_bias << MatrixXd::Ones(X.rows(), 1), X;
91+
weights = (X_bias.transpose() * X_bias).ldlt().solve(X_bias.transpose() * y);
92+
}
93+
94+
VectorXd predict(const MatrixXd& X) const {
95+
MatrixXd X_bias(X.rows(), X.cols() + 1);
96+
X_bias << MatrixXd::Ones(X.rows(), 1), X;
97+
return X_bias * weights;
98+
}
99+
};
100+
101+
// Ridge Regression
102+
class RidgeRegression {
103+
public:
104+
VectorXd weights;
105+
double alpha;
106+
107+
RidgeRegression(double alpha = 1.0) : alpha(alpha) {}
108+
109+
void fit(const MatrixXd& X, const VectorXd& y) {
110+
MatrixXd X_bias(X.rows(), X.cols() + 1);
111+
X_bias << MatrixXd::Ones(X.rows(), 1), X;
112+
MatrixXd I = MatrixXd::Identity(X_bias.cols(), X_bias.cols());
113+
I(0, 0) = 0;
114+
weights = (X_bias.transpose() * X_bias + alpha * I).ldlt().solve(X_bias.transpose() * y);
115+
}
116+
117+
VectorXd predict(const MatrixXd& X) const {
118+
MatrixXd X_bias(X.rows(), X.cols() + 1);
119+
X_bias << MatrixXd::Ones(X.rows(), 1), X;
120+
return X_bias * weights;
121+
}
122+
};
123+
124+
// Kernel Ridge Regression
125+
class KernelRidgeRegression {
126+
public:
127+
double alpha, gamma;
128+
MatrixXd X_train;
129+
VectorXd alpha_vec;
130+
131+
KernelRidgeRegression(double alpha = 1.0, double gamma = 1.0) : alpha(alpha), gamma(gamma) {}
132+
133+
MatrixXd rbf_kernel(const MatrixXd& A, const MatrixXd& B) const {
134+
MatrixXd K(A.rows(), B.rows());
135+
for (int i = 0; i < A.rows(); ++i) {
136+
for (int j = 0; j < B.rows(); ++j) {
137+
K(i, j) = exp(-gamma * (A.row(i) - B.row(j)).squaredNorm());
138+
}
139+
}
140+
return K;
141+
}
142+
143+
void fit(const MatrixXd& X, const VectorXd& y) {
144+
X_train = X;
145+
MatrixXd K = rbf_kernel(X, X);
146+
alpha_vec = (K + alpha * MatrixXd::Identity(K.rows(), K.cols())).ldlt().solve(y);
147+
}
148+
149+
VectorXd predict(const MatrixXd& X) const {
150+
MatrixXd K = rbf_kernel(X, X_train);
151+
return K * alpha_vec;
152+
}
153+
};
154+
155+
// Lasso Regression
156+
class LassoRegression {
157+
public:
158+
VectorXd weights;
159+
double alpha;
160+
int max_iter;
161+
double tol;
162+
163+
LassoRegression(double alpha = 0.1, int max_iter = 1000, double tol = 1e-4)
164+
: alpha(alpha), max_iter(max_iter), tol(tol) {}
165+
166+
void fit(const MatrixXd& X, const VectorXd& y) {
167+
MatrixXd X_bias(X.rows(), X.cols() + 1);
168+
X_bias << MatrixXd::Ones(X.rows(), 1), X;
169+
int n_samples = X_bias.rows();
170+
int n_features = X_bias.cols();
171+
weights = VectorXd::Zero(n_features);
172+
173+
for (int iter = 0; iter < max_iter; ++iter) {
174+
VectorXd weights_old = weights;
175+
for (int j = 0; j < n_features; ++j) {
176+
double tmp = 0.0;
177+
for (int i = 0; i < n_samples; ++i) {
178+
double dot = 0.0;
179+
for (int k = 0; k < n_features; ++k) {
180+
if (k != j)
181+
dot += X_bias(i, k) * weights(k);
182+
}
183+
tmp += X_bias(i, j) * (y(i) - dot);
184+
}
185+
double rho = tmp;
186+
double norm_sq = X_bias.col(j).squaredNorm();
187+
188+
if (j == 0) {
189+
weights(j) = rho / norm_sq;
190+
} else {
191+
if (rho < -alpha / 2)
192+
weights(j) = (rho + alpha / 2) / norm_sq;
193+
else if (rho > alpha / 2)
194+
weights(j) = (rho - alpha / 2) / norm_sq;
195+
else
196+
weights(j) = 0.0;
197+
}
198+
}
199+
if ((weights - weights_old).lpNorm<1>() < tol)
200+
break;
201+
}
202+
}
203+
204+
VectorXd predict(const MatrixXd& X) const {
205+
MatrixXd X_bias(X.rows(), X.cols() + 1);
206+
X_bias << MatrixXd::Ones(X.rows(), 1), X;
207+
return X_bias * weights;
208+
}
209+
};
210+
211+
int main() {
212+
// Generate multivariate synthetic data
213+
int n_samples = 150;
214+
int n_features = 3;
215+
MatrixXd X = MatrixXd::Random(n_samples, n_features);
216+
VectorXd y = 2.0 + X * VectorXd::LinSpaced(n_features, 1.0, 3.0) + VectorXd::Random(n_samples) * 0.5;
217+
218+
LinearRegression linear;
219+
RidgeRegression ridge(1.0);
220+
LassoRegression lasso(0.1);
221+
KernelRidgeRegression kernel(1.0, 2.0);
222+
223+
linear.fit(X, y);
224+
ridge.fit(X, y);
225+
lasso.fit(X, y);
226+
kernel.fit(X, y);
227+
228+
VectorXd y_pred_linear = linear.predict(X);
229+
VectorXd y_pred_ridge = ridge.predict(X);
230+
VectorXd y_pred_lasso = lasso.predict(X);
231+
VectorXd y_pred_kernel = kernel.predict(X);
232+
233+
save_csv("predictions_linear.csv", X, y, y_pred_linear);
234+
save_csv("predictions_ridge.csv", X, y, y_pred_ridge);
235+
save_csv("predictions_lasso.csv", X, y, y_pred_lasso);
236+
save_csv("predictions_kernel.csv", X, y, y_pred_kernel);
237+
238+
cout << "Cross-validation results (5-fold):\n";
239+
double mse_avg, r2_avg;
240+
241+
cross_validate(X, y, 5,
242+
[&](const MatrixXd& Xtr, const VectorXd& ytr){ linear.fit(Xtr, ytr); },
243+
[&](const MatrixXd& Xte){ return linear.predict(Xte); },
244+
mse_avg, r2_avg);
245+
cout << "Linear -> MSE: " << mse_avg << ", R2: " << r2_avg << endl;
246+
247+
cross_validate(X, y, 5,
248+
[&](const MatrixXd& Xtr, const VectorXd& ytr){ ridge.fit(Xtr, ytr); },
249+
[&](const MatrixXd& Xte){ return ridge.predict(Xte); },
250+
mse_avg, r2_avg);
251+
cout << "Ridge -> MSE: " << mse_avg << ", R2: " << r2_avg << endl;
252+
253+
cross_validate(X, y, 5,
254+
[&](const MatrixXd& Xtr, const VectorXd& ytr){ lasso.fit(Xtr, ytr); },
255+
[&](const MatrixXd& Xte){ return lasso.predict(Xte); },
256+
mse_avg, r2_avg);
257+
cout << "Lasso -> MSE: " << mse_avg << ", R2: " << r2_avg << endl;
258+
259+
cross_validate(X, y, 5,
260+
[&](const MatrixXd& Xtr, const VectorXd& ytr){ kernel.fit(Xtr, ytr); },
261+
[&](const MatrixXd& Xte){ return kernel.predict(Xte); },
262+
mse_avg, r2_avg);
263+
cout << "Kernel -> MSE: " << mse_avg << ", R2: " << r2_avg << endl;
264+
265+
return 0;
266+
}

0 commit comments

Comments
 (0)