zzzheng
diff --git a/‎README.md
+79 b/‎README.md
+79
diff --git a/‎__pycache__/dataset.cpython-36.pyc
3.18 KB b/‎__pycache__/dataset.cpython-36.pyc
3.18 KB
diff --git a/‎__pycache__/dataset.cpython-37.pyc
3.44 KB b/‎__pycache__/dataset.cpython-37.pyc
3.44 KB
diff --git a/‎__pycache__/draw.cpython-37.pyc
4.81 KB b/‎__pycache__/draw.cpython-37.pyc
4.81 KB
diff --git a/‎__pycache__/model.cpython-36.pyc
3.38 KB b/‎__pycache__/model.cpython-36.pyc
3.38 KB
diff --git a/‎__pycache__/model.cpython-37.pyc
3.56 KB b/‎__pycache__/model.cpython-37.pyc
3.56 KB
diff --git a/‎__pycache__/model_parallel.cpython-37.pyc
4.2 KB b/‎__pycache__/model_parallel.cpython-37.pyc
4.2 KB
diff --git a/‎__pycache__/train.cpython-36.pyc
4.47 KB b/‎__pycache__/train.cpython-36.pyc
4.47 KB
diff --git a/‎__pycache__/train.cpython-37.pyc
4.77 KB b/‎__pycache__/train.cpython-37.pyc
4.77 KB
diff --git a/‎__pycache__/utils.cpython-36.pyc
12.8 KB b/‎__pycache__/utils.cpython-36.pyc
12.8 KB
diff --git a/‎__pycache__/utils.cpython-37.pyc
13.3 KB b/‎__pycache__/utils.cpython-37.pyc
13.3 KB
diff --git a/‎dataset.py
+124 b/‎dataset.py
+124
diff --git a/‎draw.py
+177 b/‎draw.py
+177
@@ -0,0 +1,79 @@
+# README
+
+## Requirements
+Python 3.7
+
+CUDA 10.0
+
+PyTorch 1.1
+
+
+## Trouble shooting
+
+Pytorch does not have `same padding`, do this:
+
+### Step 1: 
+Go to this file:
+`/venv/lib/python3.7/site-packages/torch/nn/modules/conv.py`
+
+### Step 2:
+Modify `forward` function in `class Conv2d( _ConvNd)`
+
+    class Conv2d( _ConvNd):
+
+        @weak_script_method
+        def forward(self, input):
+            #return F.conv2d(input, self.weight, self.bias, self.stride,
+            #                        self.padding, self.dilation, self.groups)
+            return conv2d_same_padding(input, self.weight, self.bias, self.stride,
+                        self.padding, self.dilation, self.groups) ## ZZ: same padding like TensorFlow
+
+### Step 3: Add custom function
+custom `con2d`, because pytorch don't have "padding='same'" option.
+    
+    def conv2d_same_padding(input, weight, bias=None, stride=1, padding=1, dilation=1, groups=1):
+
+        input_rows = input.size(2)
+        filter_rows = weight.size(2)
+        effective_filter_size_rows = (filter_rows - 1) * dilation[0] + 1
+        out_rows = (input_rows + stride[0] - 1) // stride[0]
+        padding_needed = max(0, (out_rows - 1) * stride[0] + effective_filter_size_rows -
+                  input_rows)
+        padding_rows = max(0, (out_rows - 1) * stride[0] +
+                        (filter_rows - 1) * dilation[0] + 1 - input_rows)
+        rows_odd = (padding_rows % 2 != 0)
+        padding_cols = max(0, (out_rows - 1) * stride[0] +
+                        (filter_rows - 1) * dilation[0] + 1 - input_rows)
+        cols_odd = (padding_rows % 2 != 0)
+
+        if rows_odd or cols_odd:
+            input = F.pad(input, [0, int(cols_odd), 0, int(rows_odd)])
+
+        return F.conv2d(input, weight, bias, stride,
+                  padding=(padding_rows // 2, padding_cols // 2),
+                  dilation=dilation, groups=groups)
+    
+
+
+
+## How to Train?
+`main_model_parallel.py`
+
+## Output
+                ./log
+                ./plot
+                ./checkpoints
+                ./weights
+
+## Evaluation
+https://github.com/rafaelpadilla/Object-Detection-Metrics
+
+
+## Change Log
+1. Activation function
+
+As the author mentioned:
+
+``We use a linear activation for the final layer and all other layers use the leaky
+rectified linear activation.``
+
@@ -0,0 +1,124 @@
+from torch.utils.data import Dataset, DataLoader
+from skimage import io
+from skimage.transform import rescale, resize, downscale_local_mean
+import matplotlib.pyplot as plt
+from train import *
+from torchvision import transforms
+from torchvision.transforms import Normalize
+
+
+data_transform = transforms.Compose([
+    # transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) # for imageNet
+    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))  # good
+])
+
+class VOC(Dataset):
+    """
+        Pascal VOC dataset.
+    Note:
+        using YOLO label format
+        https://github.com/pjreddie/darknet
+    Example:
+        voc2012 = VOC('2012_train_short.txt', 448, 448)
+        dataloader = DataLoader(voc2012, batch_size=4)
+        I = voc2012[0][0]
+        I = I.permute(1, 2, 0)
+        plt.imshow(I)
+        plt.show()
+    """
+
+    def __init__(self, txt_file, img_width=None, img_height=None, transform=None):
+        """
+
+        :param txt_file: all image directories
+        """
+        with open(txt_file, 'r') as f:
+            lines = f.readlines()
+        self.image_list = [i.rstrip('\n') for i in lines]
+        self.label_list = [str.replace('JPEGImages', 'labels').replace('.jpg', '.txt')
+                           for str in self.image_list]
+
+        self.img_width = img_width
+        self.img_height = img_height
+        self.transform = transform
+
+    def __len__(self):
+        return len(self.image_list)
+
+    def __getitem__(self, idx):
+        # get image
+        image = io.imread(self.image_list[idx])
+
+        if self.img_width and self.img_height:
+            image = resize(image, (self.img_width, self.img_height))
+            image = torch.Tensor(image).permute(2, 0, 1)  # pytorch format: C W H
+
+        if self.transform:
+            image = self.transform(image)
+
+        # get label
+        label = read_labels(self.label_list[idx])
+        # convert to S*S*5 Tensor with format <x> <y> <w> <h> <cls>
+        label = labels2tensor(label)
+
+        # get filename
+        filename = self.image_list[idx].split('/')[-1]
+
+        return image, label, filename
+
+
+if __name__ == "__main__":
+
+
+
+    """
+    # Train on VOC
+    """
+    voc2012 = VOC('/home/bizon/Dataset/VOC_yolo_format/2012_train_short.txt', IMG_WIDTH, IMG_HEIGHT, data_transform)
+    dataloader = DataLoader(voc2012, batch_size=4)
+
+    # Model
+    yolo_model = build_darknet()
+    yolo_model.train()
+
+    # Optimize
+    learning_rate = 1e-4
+    optimizer = torch.optim.Adam(yolo_model.parameters(), lr=learning_rate)
+
+    num_epoch = 1
+    y_out_epoch = torch.Tensor()  # record all output in a single epoch
+    img_name_epoch = []
+    for epoch in range(num_epoch):
+        for i, (image_batch, label_batch, img_name_batch) in enumerate(dataloader):
+
+            print('batch = ', i)
+            print('image  = ', image_batch.size())
+            print('label =', label_batch.size())
+
+            # foward pass
+            y_out = yolo_model(image_batch)
+            y_out_epoch = torch.cat((y_out_epoch, y_out), 0)
+            img_name_epoch += img_name_batch
+            # compute loss
+            loss = calc_loss(y_out.clone(), label_batch.clone())
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+            print('\nEpoch = ', epoch, 'Batch = ', i, 'Loss = ', loss.item())
+
+        # evaluation
+        det = prediction2detection(y_out_epoch, img_name_epoch)
+        ground_truth = ground_truth_detection(voc2012.label_list)
+        res = evaluate_IOU(det, ground_truth)
+        res_tp_fp = evaluate_TP_FP(res, 0.5)
+        results, acc_tps, acc_fps = evaluate_precision_recall(res_tp_fp, 0.5, ground_truth)
+
+        print('Epoch {} done.'.format(epoch))
+        print('Acc TP for all classes = {} \n, Acc FP for all classes = {}\n'.format(acc_tps, acc_fps))
+    print('Done.')
+
+
+
+
+
+
@@ -0,0 +1,177 @@
+import os
+import numpy as np
+import cv2
+import pickle as pkl
+import random
+from utils import *
+
+classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat",
+           "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
+
+
+def draw_single(img_name, label_dir='./', out_dir='./', show_flag=False):
+    """
+    Draw bounding boxes of a SINGLE image.
+
+    Note: Labels share the same name as images, using YOLO format.
+          e.g. Image = 000001.jpg
+               Label = 000001.txt
+                        format = <class> <x> <y> <w> <h>
+                        11 0.344192634561 0.611 0.416430594901 0.262
+                        14 0.509915014164 0.51 0.974504249292 0.972
+
+
+    :param img_name:    single image name / path + name
+    :param label_dir:   the corresponding label directory
+    :param out_dir:     declare output directory, which will be created if not exist.
+    :param show_flag:   display if True.
+    :return:
+    """
+    # Read image
+    file_name = img_name.split('/')[-1].split('.')[0]
+
+    img = cv2.imread(img_name)
+    height, width = img.shape[:2]
+
+    # Read label
+    labels = read_labels(os.path.join(label_dir, file_name + '.txt'))
+
+    # Color
+    colors = pkl.load(open('pallete', 'rb'))
+    font = cv2.FONT_HERSHEY_SIMPLEX
+    m = 10
+
+    # Draw box + class
+    for l in labels:
+        cls = classes[int(l[0])]
+        upper_left_x = int((l[1] - l[3] / 2) * width)
+        upper_left_y = int((l[2] - l[4] / 2) * height)
+        bottom_right_x = int((l[1] + l[3] / 2) * width)
+        bottom_right_y = int((l[2] + l[4] / 2) * height)
+
+        color = random.choice(colors)
+        cv2.rectangle(img, (upper_left_x, upper_left_y), (bottom_right_x, bottom_right_y), color, 3)
+
+        if len(l) > 5:
+            # has confidence score
+            cv2.putText(img, cls + ' ' + str(l[5]), (upper_left_x - m, upper_left_y - m), font, 0.8, color, 2)
+        else:
+            # no confidence score
+            cv2.putText(img, cls, (upper_left_x - m, upper_left_y - m), font, 0.8, color, 2)
+
+    cv2.imwrite(os.path.join(out_dir, 'det_' + file_name + '.png'), img)
+
+    if show_flag:
+        cv2.imshow(file_name, img)
+        cv2.waitKey(0)
+        cv2.destroyAllWindows()
+
+
+def draw(img_dir, label_dir, out_dir, show_flag=False):
+    """
+    Draw bounding boxes of MULTIPLE images.
+
+        Note: Labels share the same name as images, using YOLO format.
+          e.g. Image = 000001.jpg
+               Label = 000001.txt
+                        format = <class> <x> <y> <w> <h>
+                        11 0.344192634561 0.611 0.416430594901 0.262
+                        14 0.509915014164 0.51 0.974504249292 0.972
+
+    :param img_dir:     directory of images OR 
+                        list of image names
+    :param label_dir:   directory of labels
+    :param out_dir:     declare output directory, which will be created if not exist.
+    :param show_flag:   display if True.
+    :return:
+    """
+    if not os.path.exists(out_dir):
+        os.makedirs(out_dir)
+        print('"{}" is created.'.format(out_dir))
+    else:
+        print('"{}" exists.'.format(out_dir))
+
+
+    # Image sources    
+    if isinstance(img_dir, list):  # from list of image names
+        img_list = img_dir
+    else:                          # from directory of images
+        img_list = os.listdir(img_dir)
+        img_list = [os.path.join(img_dir, elem) for elem in img_list]
+
+    for img_name in img_list:
+        draw_single(img_name, label_dir, out_dir, show_flag)  # core
+
+
+def visualize(y_out_epoch, img_name_epoch, image_list, out_dir, conf_threshold=0.1):
+    """
+    Visualize bbox a batch/epoch of images
+    :param y_out_epoch:         N * S * S * (B * 5+C) Tensor
+    :param img_name_epoch:      list of image name
+    :param image_list:          list of path + image_name
+    :param out_dir:             output to be stored here
+    :param conf_threshold:      filter out bbox with small confidence
+    :return:
+    """
+    assert y_out_epoch.size(0) == len(img_name_epoch)
+
+    # convert to image coordinate [0,1]
+    # #### Do ONLY once !!!
+    Tensors = [convert_coord_cell2img(y_out_epoch[i]) for i in range(y_out_epoch.size(0))]
+
+    # loop over each image
+    for k in range(y_out_epoch.size(0)):
+        T = y_out_epoch[k]
+        img_name = img_name_epoch[k]
+        res = []  # results to be write to .txt
+
+        # loop over each grid cell
+        for i in range(S):
+            for j in range(S):
+                _, cls = torch.max(T[i, j, :][-C:], 0)
+
+                best_conf = 0
+                for b in range(B):
+                    bbox = [cls.item()]
+                    bbox = bbox + T[i, j, 5*b: 5*b+5].tolist()
+
+                    if b == 0:
+                        best_bbox = bbox
+
+                    # for each grid cell, select the box with highest confidence score
+                    if T[i, j, 5*b+4] > best_conf:
+                        best_bbox = bbox
+
+                # filter out bbox with small confidence
+                if best_bbox[-1] > conf_threshold:
+                    res.append(best_bbox)
+
+        # write to file
+        with open(os.path.join(out_dir, img_name.split('.')[0] + '.txt'), 'w') as f:
+            for r in res:
+                for index in range(len(r)):
+                    if index == 0:
+                        f.write("%d " % r[index])
+                    else:
+                        f.write("%.4f " % r[index])
+                f.write("\n")
+
+    # draw box
+    draw(image_list, out_dir, out_dir)
+
+
+if __name__ == "__main__":
+    # #  Single
+    img_name = '000001.jpg'
+    draw_single(img_name, show_flag = True)
+
+    # # Multiple
+    '''
+    img_dir = '/Users/erica/Workspace/my-yolo-implementation/data/image'
+    label_dir = '/Users/erica/Workspace/my-yolo-implementation/data/label'
+    out_dir = '/Users/erica/Workspace/my-yolo-implementation/det'
+    
+    draw(img_dir, label_dir, out_dir)
+    '''
+
+    print('Done.')