-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmultiple_linear_regression.py
More file actions
47 lines (36 loc) · 1.61 KB
/
multiple_linear_regression.py
File metadata and controls
47 lines (36 loc) · 1.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# Description: Multiple Linear Regression
# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# sklearn libraries
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LinearRegression
# Importing the dataset and split the dataset into the independent and dependent variables
df = pd.read_csv("50_Startups.csv")
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values
print(X)
# Encoding categorical data (State column)
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(
transformers=[("encoder", OneHotEncoder(), [3])], remainder="passthrough"
)
X = np.array(ct.fit_transform(X))
# split the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# Training the Multiple Linear Regression model on the Training set
regressor = LinearRegression()
regressor.fit(X_train, y_train)
# Predicting the Test set results and compare with the actual results
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.column_stack((y_pred, y_test)))
# Making a single prediction (for example the profit of a startup with R&D Spend = 160000, Administration Spend = 130000, Marketing Spend = 300000 and State = 'California')
print(regressor.predict([[1, 0, 0, 160000, 130000, 300000]]))
# Getting the final linear regression equation with the values of the coefficients
print(regressor.coef_)
print(regressor.intercept_)