-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMachineLearning_OOP_Python.py
More file actions
127 lines (100 loc) · 4.45 KB
/
MachineLearning_OOP_Python.py
File metadata and controls
127 lines (100 loc) · 4.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import matplotlib.pyplot as plt
import pickle
class DataRetrive:
filepath = "E:\\Python\Datasets\\weather_training.csv" # class variable
featute_col = ['outlook', 'temperature', 'humidity', 'windy']
class_col = "play"
x_train = None
x_test = None
y_train = None
y_test = None
def __init__(self, localPath, featute_col, class_col):
self.localPath = localPath
self.featute_col = featute_col
self.class_col = class_col
@classmethod
def getpath(cls):
return cls.filepath
@classmethod
def getfeatutecol(cls):
return cls.featute_col
@classmethod
def getclasscol(cls):
return cls.class_col
def getdata(self):
return pd.read_csv(self.filepath)
def datasplit(self, data):
x_train, x_test, y_train, y_test = train_test_split(data[self.featute_col], data[self.class_col],
train_size=0.75, random_state=0)
self.x_train, self.x_test, self.y_train, self.y_test = x_train, x_test, y_train, y_test
return x_train, x_test, y_train, y_test
def getdatatype(self, data, col=None):
return col is None and data.dtypes or data[col].dtype
class DataPreprocessing:
class_col = ["yes", "no"]
mode_file_name = 'finalized_model.sav'
custom_model_file_name = 'custom_model.sav'
def __init__(self, x_train, x_test):
self.x_train = x_train
self.x_test = x_test
@classmethod
def getclassvalue(cls):
return cls.class_col
@classmethod
def getfilename(cls):
return cls.mode_file_name
@classmethod
def getcustomfilename(cls):
return cls.custom_model_file_name
def labelencoding(self, col, dtype):
le = LabelEncoder()
le.fit(self.x_train[col].astype(dtype))
x_train[col] = le.transform(self.x_train[col].astype(dtype))
x_test[col] = le.transform(self.x_test[col].astype(dtype))
return x_train, x_test
class ClassificationPerformation(DataPreprocessing):
def __init__(self, x_train, x_test, y_train, y_test):
self.y_train = y_train
self.y_test = y_test
super().__init__(x_train, x_test)
def fitclassifier(self):
clf = tree.DecisionTreeClassifier()
# clf = RandomForestClassifier()
clf = clf.fit(self.x_train, self.y_train)
return clf
def saveclassifier(self, clf, filename):
pickle.dump(clf, open(filename, 'wb'))
def retrivesavefile(self, filename):
clf = pickle.load(open(filename, 'rb'))
return clf
def showperformance(self, clf, label):
print('Accuracy of the training set: {:.2f}'.format(clf.score(self.x_train, self.y_train) * 100) + ' %')
print('Accuracy of the test set: {:.2f}'.format(clf.score(self.x_test, self.y_test) * 100) + ' %')
predicted = clf.predict(self.x_test)
confusion = confusion_matrix(self.y_test.to_numpy(), predicted, labels=label)
print(confusion)
print(classification_report(y_test, predicted))
# call class
dataRetrive = DataRetrive(DataRetrive.getpath(), DataRetrive.getfeatutecol(), DataRetrive.getclasscol())
filedata = dataRetrive.getdata()
x_train, x_test, y_train, y_test = dataRetrive.datasplit(filedata)
classificationPerformation = ClassificationPerformation(x_train, x_test, y_train, y_test)
# custom model
classificationPerformation.saveclassifier(dataRetrive, classificationPerformation.getcustomfilename())
custom_model = classificationPerformation.retrivesavefile(classificationPerformation.getcustomfilename())
#
# test model
x_train, X_test = classificationPerformation.labelencoding('outlook', str)
clf = classificationPerformation.fitclassifier()
classificationPerformation.saveclassifier(clf, classificationPerformation.getfilename())
fitted_model = classificationPerformation.retrivesavefile(classificationPerformation.getfilename())
classificationPerformation.showperformance(fitted_model, classificationPerformation.getclassvalue())
#