Skip to content

Commit 94629e4

Browse files
authored
Add files via upload
first commit
0 parents  commit 94629e4

22 files changed

+1626
-0
lines changed

README.md

+79
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# README
2+
3+
## Requirements
4+
Python 3.7
5+
6+
CUDA 10.0
7+
8+
PyTorch 1.1
9+
10+
11+
## Trouble shooting
12+
13+
Pytorch does not have `same padding`, do this:
14+
15+
### Step 1:
16+
Go to this file:
17+
`/venv/lib/python3.7/site-packages/torch/nn/modules/conv.py`
18+
19+
### Step 2:
20+
Modify `forward` function in `class Conv2d( _ConvNd)`
21+
22+
class Conv2d( _ConvNd):
23+
24+
@weak_script_method
25+
def forward(self, input):
26+
#return F.conv2d(input, self.weight, self.bias, self.stride,
27+
# self.padding, self.dilation, self.groups)
28+
return conv2d_same_padding(input, self.weight, self.bias, self.stride,
29+
self.padding, self.dilation, self.groups) ## ZZ: same padding like TensorFlow
30+
31+
### Step 3: Add custom function
32+
custom `con2d`, because pytorch don't have "padding='same'" option.
33+
34+
def conv2d_same_padding(input, weight, bias=None, stride=1, padding=1, dilation=1, groups=1):
35+
36+
input_rows = input.size(2)
37+
filter_rows = weight.size(2)
38+
effective_filter_size_rows = (filter_rows - 1) * dilation[0] + 1
39+
out_rows = (input_rows + stride[0] - 1) // stride[0]
40+
padding_needed = max(0, (out_rows - 1) * stride[0] + effective_filter_size_rows -
41+
input_rows)
42+
padding_rows = max(0, (out_rows - 1) * stride[0] +
43+
(filter_rows - 1) * dilation[0] + 1 - input_rows)
44+
rows_odd = (padding_rows % 2 != 0)
45+
padding_cols = max(0, (out_rows - 1) * stride[0] +
46+
(filter_rows - 1) * dilation[0] + 1 - input_rows)
47+
cols_odd = (padding_rows % 2 != 0)
48+
49+
if rows_odd or cols_odd:
50+
input = F.pad(input, [0, int(cols_odd), 0, int(rows_odd)])
51+
52+
return F.conv2d(input, weight, bias, stride,
53+
padding=(padding_rows // 2, padding_cols // 2),
54+
dilation=dilation, groups=groups)
55+
56+
57+
58+
59+
## How to Train?
60+
`main_model_parallel.py`
61+
62+
## Output
63+
./log
64+
./plot
65+
./checkpoints
66+
./weights
67+
68+
## Evaluation
69+
https://github.com/rafaelpadilla/Object-Detection-Metrics
70+
71+
72+
## Change Log
73+
1. Activation function
74+
75+
As the author mentioned:
76+
77+
``We use a linear activation for the final layer and all other layers use the leaky
78+
rectified linear activation.``
79+

__pycache__/dataset.cpython-36.pyc

3.18 KB
Binary file not shown.

__pycache__/dataset.cpython-37.pyc

3.44 KB
Binary file not shown.

__pycache__/draw.cpython-37.pyc

4.81 KB
Binary file not shown.

__pycache__/model.cpython-36.pyc

3.38 KB
Binary file not shown.

__pycache__/model.cpython-37.pyc

3.56 KB
Binary file not shown.
4.2 KB
Binary file not shown.

__pycache__/train.cpython-36.pyc

4.47 KB
Binary file not shown.

__pycache__/train.cpython-37.pyc

4.77 KB
Binary file not shown.

__pycache__/utils.cpython-36.pyc

12.8 KB
Binary file not shown.

__pycache__/utils.cpython-37.pyc

13.3 KB
Binary file not shown.

dataset.py

+124
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
from torch.utils.data import Dataset, DataLoader
2+
from skimage import io
3+
from skimage.transform import rescale, resize, downscale_local_mean
4+
import matplotlib.pyplot as plt
5+
from train import *
6+
from torchvision import transforms
7+
from torchvision.transforms import Normalize
8+
9+
10+
data_transform = transforms.Compose([
11+
# transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) # for imageNet
12+
transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) # good
13+
])
14+
15+
class VOC(Dataset):
16+
"""
17+
Pascal VOC dataset.
18+
Note:
19+
using YOLO label format
20+
https://github.com/pjreddie/darknet
21+
Example:
22+
voc2012 = VOC('2012_train_short.txt', 448, 448)
23+
dataloader = DataLoader(voc2012, batch_size=4)
24+
I = voc2012[0][0]
25+
I = I.permute(1, 2, 0)
26+
plt.imshow(I)
27+
plt.show()
28+
"""
29+
30+
def __init__(self, txt_file, img_width=None, img_height=None, transform=None):
31+
"""
32+
33+
:param txt_file: all image directories
34+
"""
35+
with open(txt_file, 'r') as f:
36+
lines = f.readlines()
37+
self.image_list = [i.rstrip('\n') for i in lines]
38+
self.label_list = [str.replace('JPEGImages', 'labels').replace('.jpg', '.txt')
39+
for str in self.image_list]
40+
41+
self.img_width = img_width
42+
self.img_height = img_height
43+
self.transform = transform
44+
45+
def __len__(self):
46+
return len(self.image_list)
47+
48+
def __getitem__(self, idx):
49+
# get image
50+
image = io.imread(self.image_list[idx])
51+
52+
if self.img_width and self.img_height:
53+
image = resize(image, (self.img_width, self.img_height))
54+
image = torch.Tensor(image).permute(2, 0, 1) # pytorch format: C W H
55+
56+
if self.transform:
57+
image = self.transform(image)
58+
59+
# get label
60+
label = read_labels(self.label_list[idx])
61+
# convert to S*S*5 Tensor with format <x> <y> <w> <h> <cls>
62+
label = labels2tensor(label)
63+
64+
# get filename
65+
filename = self.image_list[idx].split('/')[-1]
66+
67+
return image, label, filename
68+
69+
70+
if __name__ == "__main__":
71+
72+
73+
74+
"""
75+
# Train on VOC
76+
"""
77+
voc2012 = VOC('/home/bizon/Dataset/VOC_yolo_format/2012_train_short.txt', IMG_WIDTH, IMG_HEIGHT, data_transform)
78+
dataloader = DataLoader(voc2012, batch_size=4)
79+
80+
# Model
81+
yolo_model = build_darknet()
82+
yolo_model.train()
83+
84+
# Optimize
85+
learning_rate = 1e-4
86+
optimizer = torch.optim.Adam(yolo_model.parameters(), lr=learning_rate)
87+
88+
num_epoch = 1
89+
y_out_epoch = torch.Tensor() # record all output in a single epoch
90+
img_name_epoch = []
91+
for epoch in range(num_epoch):
92+
for i, (image_batch, label_batch, img_name_batch) in enumerate(dataloader):
93+
94+
print('batch = ', i)
95+
print('image = ', image_batch.size())
96+
print('label =', label_batch.size())
97+
98+
# foward pass
99+
y_out = yolo_model(image_batch)
100+
y_out_epoch = torch.cat((y_out_epoch, y_out), 0)
101+
img_name_epoch += img_name_batch
102+
# compute loss
103+
loss = calc_loss(y_out.clone(), label_batch.clone())
104+
optimizer.zero_grad()
105+
loss.backward()
106+
optimizer.step()
107+
print('\nEpoch = ', epoch, 'Batch = ', i, 'Loss = ', loss.item())
108+
109+
# evaluation
110+
det = prediction2detection(y_out_epoch, img_name_epoch)
111+
ground_truth = ground_truth_detection(voc2012.label_list)
112+
res = evaluate_IOU(det, ground_truth)
113+
res_tp_fp = evaluate_TP_FP(res, 0.5)
114+
results, acc_tps, acc_fps = evaluate_precision_recall(res_tp_fp, 0.5, ground_truth)
115+
116+
print('Epoch {} done.'.format(epoch))
117+
print('Acc TP for all classes = {} \n, Acc FP for all classes = {}\n'.format(acc_tps, acc_fps))
118+
print('Done.')
119+
120+
121+
122+
123+
124+

draw.py

+177
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
import os
2+
import numpy as np
3+
import cv2
4+
import pickle as pkl
5+
import random
6+
from utils import *
7+
8+
classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat",
9+
"chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
10+
11+
12+
def draw_single(img_name, label_dir='./', out_dir='./', show_flag=False):
13+
"""
14+
Draw bounding boxes of a SINGLE image.
15+
16+
Note: Labels share the same name as images, using YOLO format.
17+
e.g. Image = 000001.jpg
18+
Label = 000001.txt
19+
format = <class> <x> <y> <w> <h>
20+
11 0.344192634561 0.611 0.416430594901 0.262
21+
14 0.509915014164 0.51 0.974504249292 0.972
22+
23+
24+
:param img_name: single image name / path + name
25+
:param label_dir: the corresponding label directory
26+
:param out_dir: declare output directory, which will be created if not exist.
27+
:param show_flag: display if True.
28+
:return:
29+
"""
30+
# Read image
31+
file_name = img_name.split('/')[-1].split('.')[0]
32+
33+
img = cv2.imread(img_name)
34+
height, width = img.shape[:2]
35+
36+
# Read label
37+
labels = read_labels(os.path.join(label_dir, file_name + '.txt'))
38+
39+
# Color
40+
colors = pkl.load(open('pallete', 'rb'))
41+
font = cv2.FONT_HERSHEY_SIMPLEX
42+
m = 10
43+
44+
# Draw box + class
45+
for l in labels:
46+
cls = classes[int(l[0])]
47+
upper_left_x = int((l[1] - l[3] / 2) * width)
48+
upper_left_y = int((l[2] - l[4] / 2) * height)
49+
bottom_right_x = int((l[1] + l[3] / 2) * width)
50+
bottom_right_y = int((l[2] + l[4] / 2) * height)
51+
52+
color = random.choice(colors)
53+
cv2.rectangle(img, (upper_left_x, upper_left_y), (bottom_right_x, bottom_right_y), color, 3)
54+
55+
if len(l) > 5:
56+
# has confidence score
57+
cv2.putText(img, cls + ' ' + str(l[5]), (upper_left_x - m, upper_left_y - m), font, 0.8, color, 2)
58+
else:
59+
# no confidence score
60+
cv2.putText(img, cls, (upper_left_x - m, upper_left_y - m), font, 0.8, color, 2)
61+
62+
cv2.imwrite(os.path.join(out_dir, 'det_' + file_name + '.png'), img)
63+
64+
if show_flag:
65+
cv2.imshow(file_name, img)
66+
cv2.waitKey(0)
67+
cv2.destroyAllWindows()
68+
69+
70+
def draw(img_dir, label_dir, out_dir, show_flag=False):
71+
"""
72+
Draw bounding boxes of MULTIPLE images.
73+
74+
Note: Labels share the same name as images, using YOLO format.
75+
e.g. Image = 000001.jpg
76+
Label = 000001.txt
77+
format = <class> <x> <y> <w> <h>
78+
11 0.344192634561 0.611 0.416430594901 0.262
79+
14 0.509915014164 0.51 0.974504249292 0.972
80+
81+
:param img_dir: directory of images OR
82+
list of image names
83+
:param label_dir: directory of labels
84+
:param out_dir: declare output directory, which will be created if not exist.
85+
:param show_flag: display if True.
86+
:return:
87+
"""
88+
if not os.path.exists(out_dir):
89+
os.makedirs(out_dir)
90+
print('"{}" is created.'.format(out_dir))
91+
else:
92+
print('"{}" exists.'.format(out_dir))
93+
94+
95+
# Image sources
96+
if isinstance(img_dir, list): # from list of image names
97+
img_list = img_dir
98+
else: # from directory of images
99+
img_list = os.listdir(img_dir)
100+
img_list = [os.path.join(img_dir, elem) for elem in img_list]
101+
102+
for img_name in img_list:
103+
draw_single(img_name, label_dir, out_dir, show_flag) # core
104+
105+
106+
def visualize(y_out_epoch, img_name_epoch, image_list, out_dir, conf_threshold=0.1):
107+
"""
108+
Visualize bbox a batch/epoch of images
109+
:param y_out_epoch: N * S * S * (B * 5+C) Tensor
110+
:param img_name_epoch: list of image name
111+
:param image_list: list of path + image_name
112+
:param out_dir: output to be stored here
113+
:param conf_threshold: filter out bbox with small confidence
114+
:return:
115+
"""
116+
assert y_out_epoch.size(0) == len(img_name_epoch)
117+
118+
# convert to image coordinate [0,1]
119+
# #### Do ONLY once !!!
120+
Tensors = [convert_coord_cell2img(y_out_epoch[i]) for i in range(y_out_epoch.size(0))]
121+
122+
# loop over each image
123+
for k in range(y_out_epoch.size(0)):
124+
T = y_out_epoch[k]
125+
img_name = img_name_epoch[k]
126+
res = [] # results to be write to .txt
127+
128+
# loop over each grid cell
129+
for i in range(S):
130+
for j in range(S):
131+
_, cls = torch.max(T[i, j, :][-C:], 0)
132+
133+
best_conf = 0
134+
for b in range(B):
135+
bbox = [cls.item()]
136+
bbox = bbox + T[i, j, 5*b: 5*b+5].tolist()
137+
138+
if b == 0:
139+
best_bbox = bbox
140+
141+
# for each grid cell, select the box with highest confidence score
142+
if T[i, j, 5*b+4] > best_conf:
143+
best_bbox = bbox
144+
145+
# filter out bbox with small confidence
146+
if best_bbox[-1] > conf_threshold:
147+
res.append(best_bbox)
148+
149+
# write to file
150+
with open(os.path.join(out_dir, img_name.split('.')[0] + '.txt'), 'w') as f:
151+
for r in res:
152+
for index in range(len(r)):
153+
if index == 0:
154+
f.write("%d " % r[index])
155+
else:
156+
f.write("%.4f " % r[index])
157+
f.write("\n")
158+
159+
# draw box
160+
draw(image_list, out_dir, out_dir)
161+
162+
163+
if __name__ == "__main__":
164+
# # Single
165+
img_name = '000001.jpg'
166+
draw_single(img_name, show_flag = True)
167+
168+
# # Multiple
169+
'''
170+
img_dir = '/Users/erica/Workspace/my-yolo-implementation/data/image'
171+
label_dir = '/Users/erica/Workspace/my-yolo-implementation/data/label'
172+
out_dir = '/Users/erica/Workspace/my-yolo-implementation/det'
173+
174+
draw(img_dir, label_dir, out_dir)
175+
'''
176+
177+
print('Done.')

0 commit comments

Comments
 (0)