-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlayer.py
More file actions
236 lines (180 loc) · 8.18 KB
/
layer.py
File metadata and controls
236 lines (180 loc) · 8.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
import common.util as util
import common.initializer as init
import numpy as np
class conv2d:
def __init__(self, filters, kernel_size, strides, w_init=init.xavier, b_init=0):
self.filters = filters
self.kernel_size = np.array(kernel_size)
self.strides = np.array(strides)
self.w_init = w_init
self.x_shape = None # [N, H, W, C]
self.out_shape = None #[H, W]
self.pad = None
self.col = None
self.db = None
self.dw = None
self.w = None
self.b = np.full(filters, b_init).astype(np.float32)
def forward(self, x):
N, H, W, C = x.shape
self.x_shape = np.array([N, H, W, C])
#weight init
if self.w is None:
self.w = self.w_init([self.filters, C, *self.kernel_size], np.prod(self.kernel_size)*C) # [filters, in, FH, FW]
out_shape = np.ceil( np.array([H, W]) / self.strides ).astype(np.int32)
self.out_shape = out_shape
pad = (out_shape * self.strides) - np.array([H, W]) + self.kernel_size - self.strides
pad[pad<0] = 0 # max(0, pad)
self.pad = pad
FH, FW = self.kernel_size
col = util.im2col(x, self.kernel_size, self.strides, pad, out_shape) # [N*out_H*out_W, FH*FW*C]
self.col = col
w = self.w.reshape(self.filters, FH*FW*C).T # [FH*FW*C, filters]
out = np.dot(col, w) + self.b # [N*out_H*out_W, filters]
out = out.reshape(N, *out_shape, self.filters) # [N, out_H, out_W, filters]
return out
def backward(self, grad):
#if grad.ndim == 2: #2차원인 경우 FCNN 계층에서 넘어왔다는 것이고, [N, out_H*out_W*filters] 의 shape를 가질것.
# grad = grad.reshape(self.x_shape[0], *self.out_shape, self.filters) # [N, out_H, out_W, filters]
N, out_H, out_W, filters = grad.shape
FH, FW = self.kernel_size
grad = grad.reshape(N*out_H*out_W, filters) # [N*out_H*out_W, filters]
w = self.w.reshape(filters, -1).T # [FH*FW*C, filters]
#self.w: [filters, C, FH, FW]
#self.col: [N*out_H*out_W, FH*FW*C]
#origin_x: [N, H, W, C]
self.db = np.sum(grad, axis=0) # [filters]
dw = np.dot(self.col.T, grad) # [FH*FW*C, filters] <= [FH*FW*C, N*out_H*out_W] * [N*out_H*out_W, filters]
dw = dw.T # [filters, FH*FW*C] # self.w shape과 맞춰주기 위해서 transpose 하고 reshape.
self.dw = dw.reshape(filters, -1, FH, FW) # [filters, C, FH, FW]
dx = np.dot(grad, w.T) # [N*out_H*out_W, FH*FW*C] <= [N*out_H*out_W, filters] * [filters, FH*FW*C]
dx = util.col2im(dx, self.x_shape, self.kernel_size, self.strides, self.pad, self.out_shape)
return dx # [*self.x_shape] == forward때 입력되었던 x의 shape [N, H, W, C]
class maxpool2d:
def __init__(self, kernel_size, strides):
self.kernel_size = np.array(kernel_size)
self.strides = np.array(strides)
self.x_shape = None # [N, H, W, C]
self.out_shape = None #[H, W]
self.pad = None
self.max_mask = None
def forward(self, x):
N, H, W, C = x.shape
self.x_shape = np.array([N, H, W, C])
out_shape = np.ceil( np.array([H, W]) / self.strides ).astype(np.int32)
self.out_shape = out_shape
pad = (out_shape * self.strides) - np.array([H, W]) + self.kernel_size - self.strides
pad[pad<0] = 0 # max(0, pad)
self.pad = pad
FH, FW = self.kernel_size
col = util.im2col(x, self.kernel_size, self.strides, pad, out_shape) # [N*out_H*out_W, FH*FW*C]
col = col.reshape(N*np.prod(out_shape)*C, FH*FW) # [N*out_H*out_W*C, FH*FW]
max_mask = np.argmax(col, axis=1) # [N*out_H*out_W*C]
self.max_mask = np.eye(FH*FW)[max_mask] # [N*out_H*out_W*C, FH*FW]
max_col = np.max(col, axis=1) # [N*out_H*out_W*C]
max_col = max_col.reshape(N, *out_shape, C) # [N, out_H, out_W, C]
return max_col
def backward(self, grad):
#if grad.ndim == 2: #2차원인 경우 FCNN 계층에서 넘어왔다는 것이고, [N, out_H*out_W*filters] 의 shape를 가질것.
# grad = grad.reshape(self.x_shape[0], *self.out_shape, -1) # [N, out_H, out_W, filters]
N, out_H, out_W, filters = grad.shape
FH, FW = self.kernel_size
grad = grad.reshape(N*out_H*out_W*filters, 1) # [N*out_H*out_W*filters, 1]
col = self.max_mask*grad # [N*out_H*out_W*filters, FH*FW] <= [N*out_H*out_W*filters, FH*FW] * broadcast([N*out_H*out_W*filters, 1])
col = col.reshape(N*out_H*out_W, FH*FW*filters) # [N*out_H*out_W, FH*FW*filters]
x = util.col2im(col, self.x_shape, self.kernel_size, self.strides, self.pad, self.out_shape)
return x # [*self.x_shape] == forward때 입력되었던 x의 shape [N, H, W, C]
class flatten:
def __init__(self):
self.x_shape = None
def forward(self, x):
self.x_shape = x.shape # [N, ~]
x = x.reshape(self.x_shape[0], -1)
return x
def backward(self, grad):
grad = grad.reshape(self.x_shape)
return grad
class dropout:
def __init__(self, keep_prob):
self.keep_prob = keep_prob
self.mask = None
def forward(self, x, is_train=True):
if is_train == True:
uniform = np.random.uniform(0, 1, size=x.shape) # [0, 1)
mask = uniform > self.keep_prob # keep_prob보다 작으면 false, 크면 true
#0.6으로 치면 0.6보다 작은 값들(==60%)는 false, 0.6보다 큰 값들(==40%)는 true
self.mask = mask #즉 mask는 지울 값들을 true로 mask 함.
x[mask] = 0 #true인 위치의 값을 0으로 dropout.
return x
else:
self.mask = np.full(x.shape, False)
return x
def backward(self, grad):
grad[self.mask] = 0 #dropout 시켰던 부분은 미분값이 0이므로 grad도 0으로 할당.
return grad
class affine:
def __init__(self, out_dim, w_init=init.xavier, b_init=0):
self.out_dim = out_dim
self.w_init = w_init
self.b = np.full(out_dim, b_init).astype(np.float32) # bias
self.x = None # input
self.w = None
self.dw = None # w gradient
self.db = None # bias gradient
def forward(self, x):
self.x = x # input
in_dim = x.shape[-1]
#weight init
if self.w is None:
self.w = self.w_init([in_dim, self.out_dim], in_dim) # [filters, in, FH, FW]
out = np.matmul(x, self.w) + self.b
return out
def backward(self, grad=1):
#x.T = [w_shape[0], batch], grad = [batch, w_shape[1]] 즉 np.matmul(x.T, grad) 하면 batch전체에 관해 dw가 계산됨.
self.dw = np.matmul(self.x.T, grad) #shape 때문에 이렇게 됨. 계산그래프 그려보면 이해됨.
self.db = np.mean(grad, axis=0) #batch별 평균.
dx = np.matmul(grad, self.w.T) # x에 관해서 계속 backpropagation 되기 때문에 x에관한 미분을 리턴해서 이전 layer에 전파.
return dx
class relu:
def __init__(self):
self.mask = None
def forward(self, x):
mask = (x<=0) # 0이하인 값 mask, if x = [10, -1, 3] => mask = [false, true, false]
self.mask = mask # backward할 때, mask된 부분(0이하인 값)은 미분 0
x[mask] = 0 # 0이하인 값에 0 할당.
return x
def backward(self, grad):
grad[self.mask] = 0 #forward 값이 0이하였던 부분은 미분값 0으로 할당.
return grad
class sigmoid:
def __init__(self):
self.sigvalue = None
def forward(self, x):
sigvalue = 1/(1+np.exp(-x))
self.sigvalue = sigvalue
return sigvalue
def backward(self, grad=1):
return grad*self.sigvalue*(1-self.sigvalue)
class softmax_cross_entropy_with_logits:
def __init__(self):
self.target = None
self.pred = None #softmax 결과
self.loss = None
def forward(self, x, target):
target = np.array(target)
self.target = target
#softmax
max_value = np.max(x, axis=1, keepdims=True)
exp = np.exp(x - max_value) #max값을 빼도, 빼지 않은 것과 결과는 동일하며, 빼지 않으면 값 overflow 발생 가능.
pred = exp / np.sum(exp, axis=1, keepdims=True)
self.pred = pred
#cross_entropy
epsilon = 1e-07
loss = -target*np.log(pred + epsilon) # pred가 0이면 np.log = -inf
loss = np.mean(np.sum(loss, axis=1), axis=0) #data별로 sum 하고, batch별로 mean
self.loss = loss
return loss
def backward(self, grad=1):
#return np.mean(self.pred-self.target, axis=0) #배치별로 gradient 평균냄.
return (self.pred-self.target)/self.target.shape[0] #배치사이즈로 나눠줌. 여기서 안나누고 affine.backward에서 나눠도되긴함
#근데 affine.backward에서 나누면 affine 레이어마다 나눠야해서 계산량이 더 많음.