-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathdata_generator.py
More file actions
161 lines (109 loc) · 5.04 KB
/
data_generator.py
File metadata and controls
161 lines (109 loc) · 5.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
''' Create generators from dataset '''
import numpy as np
import cv2
import random
HIGH_DIM = 512
GLLIM_K = 1
BATCH_SIZE = 128
# Mode for the validation set for our mixture model
def load_data_generator_List(rootpath, imIn, file_test, validation=1.0,subsampling=1.0,processingTarget=None,transform=[],outSize=(224,224),batch_size=BATCH_SIZE,shuffle=False):
''' create generators from data'''
def generator(rootpath, images):
N=len(images)
nbatches=N/batch_size+1
if N%batch_size==0:
nbatches-=1
if shuffle:
random.shuffle(images)
i=0
while 1:
X, Y = get_xy_from_file(rootpath, images[i*batch_size:(i+1)*batch_size],processingTarget=processingTarget,transform=transform,outSize=outSize)
yield(X, Y)
i=i+1
if i>=nbatches: # we shuffle the data when the end of the dataset is reached
i=0
random.shuffle(images)
imTest = open(rootpath+file_test, 'r').readlines()
gen_test = generator(rootpath, imTest)
test_size=len(imTest)
# we subsample the data if needed
if subsampling!=1.0:
im=imIn[0:int(subsampling*len(imIn))][:]
else:
im=imIn[:]
if validation!=1.0: # if we use a validation set
Ntot=len(im)
training_size = int(validation*len(im))
val_size = Ntot-training_size
gen_train = generator(rootpath, im[:training_size])
gen_val = generator(rootpath, im[training_size:])
return (gen_train,training_size),(gen_val,val_size), (gen_test,test_size)
else: # without validation set
gen_train = generator(rootpath, im)
training_size = len(im)
return (gen_train,training_size), (gen_test,test_size)
def load_data_generator(rootpath, file_train, file_test, validation=1.0,subsampling=1.0,processingTarget=None,transform=[],outSize=(224,224),batch_size=BATCH_SIZE,shuffle=False):
im = open(rootpath+file_train, 'r').readlines()
return load_data_generator_List(rootpath, im[:], file_test, validation,subsampling,processingTarget=processingTarget,transform=transform,outSize=outSize,batch_size=batch_size,shuffle=shuffle)
def load_data_generator_List_simple(rootpath, imIn,transform=[],outSize=(224,224),batch_size=BATCH_SIZE,processingTarget=None,sample_weights=None):
''' create generators from data'''
def generator(rootpath, images):
N=len(images)
nbatches=N/batch_size+1
if N%batch_size==0:
nbatches-=1
i=0
if sample_weights is not None:
rn= sample_weights[:]
while 1:
X, Y = get_xy_from_file(rootpath, images[i*batch_size:(i+1)*batch_size],processingTarget=processingTarget,transform=transform,outSize=outSize)
if sample_weights is None:
yield(X, Y)
else:
yield(X, Y,rn[i*batch_size:(i+1)*batch_size])
i=i+1
if i>=nbatches: # we shuffle the data when the end of the dataset is reached
i=0
if sample_weights is None:
random.shuffle(images)
else:
c = zip(images,rn)
np.random.shuffle(c)
images = np.asarray([e[0] for e in c])
rn = np.asarray([e[1] for e in c])
gen = generator(rootpath, imIn[:])
size=len(imIn)
return (gen,size)
def load_data_generator_simple(rootpath, fileName, transform=[],outSize=(224,224),batch_size=BATCH_SIZE,processingTarget=None):
im = open(rootpath+fileName, 'r').readlines()
return load_data_generator_List_simple(rootpath, im[:],transform=transform,outSize=outSize,batch_size=batch_size,processingTarget=processingTarget)
def applyTransform(x,transform):
for t in transform:
x=t(x)
return x
def get_xy_from_file(rootpath, images, processingTarget=None,transform=[],outSize=(224,224),batch_size=BATCH_SIZE):
'''Extract data arrays from text file'''
X = np.zeros((len(images),3, outSize[0], outSize[1]), dtype=np.float32)
Y=[]
for i,image in enumerate(images):
currentline=image.strip().split(" ")
imFile=currentline[0]
X[i]=get_image_for_vgg(rootpath+imFile,transform,outSize)
Y.append(np.asarray(map(lambda x: float(x),currentline[1:])))
if processingTarget:
Y=processingTarget(Y)
Y=np.squeeze(np.asarray(Y)).reshape((X.shape[0],len(Y[0])))
return (X,Y)
def get_image_for_vgg(imName,transform=[],outSize=(224,224),batch_size=BATCH_SIZE):
'''Preprocess images as VGG inputs'''
im = (cv2.resize(cv2.imread(imName), (outSize[1],outSize[0]))).astype(np.float32)
# we substract the mean value of imagenet
if outSize==(224,224):
im[:,:,0] -= 103.939
im[:,:,1] -= 116.779
im[:,:,2] -= 123.68
im = im.transpose(2,0,1)
if transform:
im=applyTransform(im,transform)
im = np.expand_dims(im, axis=0)
return im