Skip to content

Commit ec6ebef

Browse files
authored
Added utils and preprocessing code
1 parent e842c00 commit ec6ebef

2 files changed

Lines changed: 511 additions & 0 deletions

File tree

Chapter 9/yolo/preprocessing.py

Lines changed: 303 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,303 @@
1+
import os
2+
import cv2
3+
import copy
4+
import numpy as np
5+
import imgaug as ia
6+
from imgaug import augmenters as iaa
7+
from keras.utils import Sequence
8+
import xml.etree.ElementTree as ET
9+
from utils import BoundBox, bbox_iou
10+
11+
def parse_annotation(ann_dir, img_dir, labels=[]):
12+
all_imgs = []
13+
seen_labels = {}
14+
15+
for ann in sorted(os.listdir(ann_dir)):
16+
img = {'object':[]}
17+
18+
tree = ET.parse(ann_dir + ann)
19+
20+
for elem in tree.iter():
21+
if 'filename' in elem.tag:
22+
img['filename'] = img_dir + elem.text
23+
if 'width' in elem.tag:
24+
img['width'] = int(elem.text)
25+
if 'height' in elem.tag:
26+
img['height'] = int(elem.text)
27+
if 'object' in elem.tag or 'part' in elem.tag:
28+
obj = {}
29+
30+
for attr in list(elem):
31+
if 'name' in attr.tag:
32+
obj['name'] = attr.text
33+
34+
if obj['name'] in seen_labels:
35+
seen_labels[obj['name']] += 1
36+
else:
37+
seen_labels[obj['name']] = 1
38+
39+
if len(labels) > 0 and obj['name'] not in labels:
40+
break
41+
else:
42+
img['object'] += [obj]
43+
44+
if 'bndbox' in attr.tag:
45+
for dim in list(attr):
46+
if 'xmin' in dim.tag:
47+
obj['xmin'] = int(round(float(dim.text)))
48+
if 'ymin' in dim.tag:
49+
obj['ymin'] = int(round(float(dim.text)))
50+
if 'xmax' in dim.tag:
51+
obj['xmax'] = int(round(float(dim.text)))
52+
if 'ymax' in dim.tag:
53+
obj['ymax'] = int(round(float(dim.text)))
54+
55+
if len(img['object']) > 0:
56+
all_imgs += [img]
57+
58+
return all_imgs, seen_labels
59+
60+
class BatchGenerator(Sequence):
61+
def __init__(self, images,
62+
config,
63+
shuffle=True,
64+
jitter=True,
65+
norm=None):
66+
self.generator = None
67+
68+
self.images = images
69+
self.config = config
70+
71+
self.shuffle = shuffle
72+
self.jitter = jitter
73+
self.norm = norm
74+
75+
self.anchors = [BoundBox(0, 0, config['ANCHORS'][2*i], config['ANCHORS'][2*i+1]) for i in range(int(len(config['ANCHORS'])//2))]
76+
77+
### augmentors by https://github.com/aleju/imgaug
78+
sometimes = lambda aug: iaa.Sometimes(0.5, aug)
79+
80+
# Define our sequence of augmentation steps that will be applied to every image
81+
# All augmenters with per_channel=0.5 will sample one value _per image_
82+
# in 50% of all cases. In all other cases they will sample new values
83+
# _per channel_.
84+
self.aug_pipe = iaa.Sequential(
85+
[
86+
# apply the following augmenters to most images
87+
#iaa.Fliplr(0.5), # horizontally flip 50% of all images
88+
#iaa.Flipud(0.2), # vertically flip 20% of all images
89+
#sometimes(iaa.Crop(percent=(0, 0.1))), # crop images by 0-10% of their height/width
90+
sometimes(iaa.Affine(
91+
#scale={"x": (0.8, 1.2), "y": (0.8, 1.2)}, # scale images to 80-120% of their size, individually per axis
92+
#translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)}, # translate by -20 to +20 percent (per axis)
93+
#rotate=(-5, 5), # rotate by -45 to +45 degrees
94+
#shear=(-5, 5), # shear by -16 to +16 degrees
95+
#order=[0, 1], # use nearest neighbour or bilinear interpolation (fast)
96+
#cval=(0, 255), # if mode is constant, use a cval between 0 and 255
97+
#mode=ia.ALL # use any of scikit-image's warping modes (see 2nd image from the top for examples)
98+
)),
99+
# execute 0 to 5 of the following (less important) augmenters per image
100+
# don't execute all of them, as that would often be way too strong
101+
iaa.SomeOf((0, 5),
102+
[
103+
#sometimes(iaa.Superpixels(p_replace=(0, 1.0), n_segments=(20, 200))), # convert images into their superpixel representation
104+
iaa.OneOf([
105+
iaa.GaussianBlur((0, 3.0)), # blur images with a sigma between 0 and 3.0
106+
iaa.AverageBlur(k=(2, 7)), # blur image using local means with kernel sizes between 2 and 7
107+
iaa.MedianBlur(k=(3, 11)), # blur image using local medians with kernel sizes between 2 and 7
108+
]),
109+
iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)), # sharpen images
110+
#iaa.Emboss(alpha=(0, 1.0), strength=(0, 2.0)), # emboss images
111+
# search either for all edges or for directed edges
112+
#sometimes(iaa.OneOf([
113+
# iaa.EdgeDetect(alpha=(0, 0.7)),
114+
# iaa.DirectedEdgeDetect(alpha=(0, 0.7), direction=(0.0, 1.0)),
115+
#])),
116+
iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5), # add gaussian noise to images
117+
iaa.OneOf([
118+
iaa.Dropout((0.01, 0.1), per_channel=0.5), # randomly remove up to 10% of the pixels
119+
#iaa.CoarseDropout((0.03, 0.15), size_percent=(0.02, 0.05), per_channel=0.2),
120+
]),
121+
#iaa.Invert(0.05, per_channel=True), # invert color channels
122+
iaa.Add((-10, 10), per_channel=0.5), # change brightness of images (by -10 to 10 of original value)
123+
iaa.Multiply((0.5, 1.5), per_channel=0.5), # change brightness of images (50-150% of original value)
124+
iaa.ContrastNormalization((0.5, 2.0), per_channel=0.5), # improve or worsen the contrast
125+
#iaa.Grayscale(alpha=(0.0, 1.0)),
126+
#sometimes(iaa.ElasticTransformation(alpha=(0.5, 3.5), sigma=0.25)), # move pixels locally around (with random strengths)
127+
#sometimes(iaa.PiecewiseAffine(scale=(0.01, 0.05))) # sometimes move parts of the image around
128+
],
129+
random_order=True
130+
)
131+
],
132+
random_order=True
133+
)
134+
135+
if shuffle: np.random.shuffle(self.images)
136+
137+
def __len__(self):
138+
return int(np.ceil(float(len(self.images))/self.config['BATCH_SIZE']))
139+
140+
def num_classes(self):
141+
return len(self.config['LABELS'])
142+
143+
def size(self):
144+
return len(self.images)
145+
146+
def load_annotation(self, i):
147+
annots = []
148+
149+
for obj in self.images[i]['object']:
150+
annot = [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax'], self.config['LABELS'].index(obj['name'])]
151+
annots += [annot]
152+
153+
if len(annots) == 0: annots = [[]]
154+
155+
return np.array(annots)
156+
157+
def load_image(self, i):
158+
return cv2.imread(self.images[i]['filename'])
159+
160+
def __getitem__(self, idx):
161+
l_bound = idx*self.config['BATCH_SIZE']
162+
r_bound = (idx+1)*self.config['BATCH_SIZE']
163+
164+
if r_bound > len(self.images):
165+
r_bound = len(self.images)
166+
l_bound = r_bound - self.config['BATCH_SIZE']
167+
168+
instance_count = 0
169+
170+
x_batch = np.zeros((r_bound - l_bound, self.config['IMAGE_H'], self.config['IMAGE_W'], 3)) # input images
171+
b_batch = np.zeros((r_bound - l_bound, 1 , 1 , 1 , self.config['TRUE_BOX_BUFFER'], 4)) # list of self.config['TRUE_self.config['BOX']_BUFFER'] GT boxes
172+
y_batch = np.zeros((r_bound - l_bound, self.config['GRID_H'], self.config['GRID_W'], self.config['BOX'], 4+1+len(self.config['LABELS']))) # desired network output
173+
174+
for train_instance in self.images[l_bound:r_bound]:
175+
# augment input image and fix object's position and size
176+
img, all_objs = self.aug_image(train_instance, jitter=self.jitter)
177+
178+
# construct output from object's x, y, w, h
179+
true_box_index = 0
180+
181+
for obj in all_objs:
182+
if obj['xmax'] > obj['xmin'] and obj['ymax'] > obj['ymin'] and obj['name'] in self.config['LABELS']:
183+
center_x = .5*(obj['xmin'] + obj['xmax'])
184+
center_x = center_x / (float(self.config['IMAGE_W']) / self.config['GRID_W'])
185+
center_y = .5*(obj['ymin'] + obj['ymax'])
186+
center_y = center_y / (float(self.config['IMAGE_H']) / self.config['GRID_H'])
187+
188+
grid_x = int(np.floor(center_x))
189+
grid_y = int(np.floor(center_y))
190+
191+
if grid_x < self.config['GRID_W'] and grid_y < self.config['GRID_H']:
192+
obj_indx = self.config['LABELS'].index(obj['name'])
193+
194+
center_w = (obj['xmax'] - obj['xmin']) / (float(self.config['IMAGE_W']) / self.config['GRID_W']) # unit: grid cell
195+
center_h = (obj['ymax'] - obj['ymin']) / (float(self.config['IMAGE_H']) / self.config['GRID_H']) # unit: grid cell
196+
197+
box = [center_x, center_y, center_w, center_h]
198+
199+
# find the anchor that best predicts this box
200+
best_anchor = -1
201+
max_iou = -1
202+
203+
shifted_box = BoundBox(0,
204+
0,
205+
center_w,
206+
center_h)
207+
208+
for i in range(len(self.anchors)):
209+
anchor = self.anchors[i]
210+
iou = bbox_iou(shifted_box, anchor)
211+
212+
if max_iou < iou:
213+
best_anchor = i
214+
max_iou = iou
215+
216+
# assign ground truth x, y, w, h, confidence and class probs to y_batch
217+
y_batch[instance_count, grid_y, grid_x, best_anchor, 0:4] = box
218+
y_batch[instance_count, grid_y, grid_x, best_anchor, 4 ] = 1.
219+
y_batch[instance_count, grid_y, grid_x, best_anchor, 5+obj_indx] = 1
220+
221+
# assign the true box to b_batch
222+
b_batch[instance_count, 0, 0, 0, true_box_index] = box
223+
224+
true_box_index += 1
225+
true_box_index = true_box_index % self.config['TRUE_BOX_BUFFER']
226+
227+
# assign input image to x_batch
228+
if self.norm != None:
229+
x_batch[instance_count] = self.norm(img)
230+
else:
231+
# plot image and bounding boxes for sanity check
232+
for obj in all_objs:
233+
if obj['xmax'] > obj['xmin'] and obj['ymax'] > obj['ymin']:
234+
cv2.rectangle(img[:,:,::-1], (obj['xmin'],obj['ymin']), (obj['xmax'],obj['ymax']), (255,0,0), 3)
235+
cv2.putText(img[:,:,::-1], obj['name'],
236+
(obj['xmin']+2, obj['ymin']+12),
237+
0, 1.2e-3 * img.shape[0],
238+
(0,255,0), 2)
239+
240+
x_batch[instance_count] = img
241+
242+
# increase instance counter in current batch
243+
instance_count += 1
244+
245+
#print(' new batch created', idx)
246+
247+
return [x_batch, b_batch], y_batch
248+
249+
def on_epoch_end(self):
250+
if self.shuffle: np.random.shuffle(self.images)
251+
252+
def aug_image(self, train_instance, jitter):
253+
image_name = train_instance['filename']
254+
image = cv2.imread(image_name)
255+
256+
if image is None: print('Cannot find ', image_name)
257+
258+
h, w, c = image.shape
259+
all_objs = copy.deepcopy(train_instance['object'])
260+
261+
if jitter:
262+
### scale the image
263+
scale = np.random.uniform() / 10. + 1.
264+
image = cv2.resize(image, (0,0), fx = scale, fy = scale)
265+
266+
### translate the image
267+
max_offx = (scale-1.) * w
268+
max_offy = (scale-1.) * h
269+
offx = int(np.random.uniform() * max_offx)
270+
offy = int(np.random.uniform() * max_offy)
271+
272+
image = image[offy : (offy + h), offx : (offx + w)]
273+
274+
### flip the image
275+
flip = np.random.binomial(1, .5)
276+
if flip > 0.5: image = cv2.flip(image, 1)
277+
278+
image = self.aug_pipe.augment_image(image)
279+
280+
# resize the image to standard size
281+
image = cv2.resize(image, (self.config['IMAGE_H'], self.config['IMAGE_W']))
282+
image = image[:,:,::-1]
283+
284+
# fix object's position and size
285+
for obj in all_objs:
286+
for attr in ['xmin', 'xmax']:
287+
if jitter: obj[attr] = int(obj[attr] * scale - offx)
288+
289+
obj[attr] = int(obj[attr] * float(self.config['IMAGE_W']) / w)
290+
obj[attr] = max(min(obj[attr], self.config['IMAGE_W']), 0)
291+
292+
for attr in ['ymin', 'ymax']:
293+
if jitter: obj[attr] = int(obj[attr] * scale - offy)
294+
295+
obj[attr] = int(obj[attr] * float(self.config['IMAGE_H']) / h)
296+
obj[attr] = max(min(obj[attr], self.config['IMAGE_H']), 0)
297+
298+
if jitter and flip > 0.5:
299+
xmin = obj['xmin']
300+
obj['xmin'] = self.config['IMAGE_W'] - obj['xmax']
301+
obj['xmax'] = self.config['IMAGE_W'] - xmin
302+
303+
return image, all_objs

0 commit comments

Comments
 (0)