finish

yxlijun · yxlijun · commit 111610d6b9cd · 2018-12-22T19:32:04.000+08:00
diff --git a/data/config.py b/data/config.py
@@ -35,7 +35,9 @@
 _C.filter_min_face = True
 
 # train config
-_C.LR_STEPS = (120, 198, 250)
+#_C.LR_STEPS = (120, 198, 250)
+_C.MAX_STEPS = 200000
+_C.LR_STEPS = (80000,100000,120000)
 _C.EPOCHES = 300
 
 # anchor config
@@ -48,8 +50,9 @@
 
 # detection config
 _C.NMS_THRESH = 0.3
-_C.TOP_K = 500
-_C.CONF_THRESH = 0.01
+_C.NMS_TOP_K = 5000
+_C.TOP_K = 750
+_C.CONF_THRESH = 0.05
 
 # loss config
 _C.NEG_POS_RATIOS = 3
diff --git a/demo.py b/demo.py
@@ -53,11 +53,11 @@ def detect(net, img_path, thresh):
     img = np.array(img)
     height, width, _ = img.shape
     max_im_shrink = np.sqrt(
-        1800 * 1200 / (img.shape[0] * img.shape[1]))
+        1700 * 1200 / (img.shape[0] * img.shape[1]))
     image = cv2.resize(img, None, None, fx=max_im_shrink,
                       fy=max_im_shrink, interpolation=cv2.INTER_LINEAR)
     #image = cv2.resize(img, (640, 640))
-    x = to_chw_bgr(img)
+    x = to_chw_bgr(image)
     x = x.astype('float32')
     x -= cfg.img_mean
     x = x[[2, 1, 0], :, :]
diff --git a/layers/functions/detection.py b/layers/functions/detection.py
@@ -23,7 +23,7 @@ def __init__(self, cfg):
         self.nms_thresh = cfg.NMS_THRESH
         self.conf_thresh = cfg.CONF_THRESH
         self.variance = cfg.VARIANCE
-        self.use_nms = cfg.USE_NMS
+        self.nms_top_k = cfg.NMS_TOP_K
 
     def forward(self, loc_data, conf_data, prior_data):
         """
@@ -57,21 +57,16 @@ def forward(self, loc_data, conf_data, prior_data):
             for cl in range(1, self.num_classes):
                 c_mask = conf_scores[cl].gt(self.conf_thresh)
                 scores = conf_scores[cl][c_mask]
-
+                
                 if scores.dim() == 0:
                     continue
                 l_mask = c_mask.unsqueeze(1).expand_as(boxes)
                 boxes_ = boxes[l_mask].view(-1, 4)
-                if self.use_nms:
-                    ids, count = nms(
-                        boxes_, scores, self.nms_thresh, self.top_k)
-                    output[i, cl, :count] = torch.cat((scores[ids[:count]].unsqueeze(1),
-                                                       boxes_[ids[:count]]), 1)
-                else:
-                    sort_scores, idx = scores.sort(0,descending=True)
-                    count = sort_scores.size(
-                        0) if sort_scores.size(0) < self.top_k else self.top_k
-                    output[i, cl,:count] = torch.cat(
-                        (sort_scores[:count].unsqueeze(1), boxes_[idx[:count]]), 1)
+                ids, count = nms(
+                    boxes_, scores, self.nms_thresh, self.nms_top_k)
+                count = count if count < self.top_k else self.top_k
+
+                output[i, cl, :count] = torch.cat((scores[ids[:count]].unsqueeze(1),
+                                                   boxes_[ids[:count]]), 1)
 
         return output
diff --git a/s3fd.py b/s3fd.py
@@ -187,7 +187,7 @@ def weights_init(self, m):
             m.bias.data.zero_()
 
 
-vgg_cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M',
+vgg_cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
            512, 512, 512, 'M']
 
 extras_cfg = [256, 'S', 512, 128, 'S', 256]
@@ -208,7 +208,7 @@ def vgg(cfg, i, batch_norm=False):
             else:
                 layers += [conv2d, nn.ReLU(inplace=True)]
             in_channels = v
-    conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6)
+    conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=3, dilation=3)
     conv7 = nn.Conv2d(1024, 1024, kernel_size=1)
     layers += [conv6,
                nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True)]
@@ -240,7 +240,7 @@ def multibox(vgg, extra_layers, num_classes):
     loc_layers += [nn.Conv2d(vgg[14].out_channels, 4,
                              kernel_size=3, padding=1)]
     conf_layers += [nn.Conv2d(vgg[14].out_channels,
-                              (num_classes - 1) * 3 + 1, kernel_size=3, padding=1)]
+                              3 + (num_classes-1), kernel_size=3, padding=1)]
 
     for k, v in enumerate(vgg_source):
         loc_layers += [nn.Conv2d(vgg[v].out_channels,
@@ -258,7 +258,7 @@ def multibox(vgg, extra_layers, num_classes):
 def build_s3fd(phase, num_classes=2):
     base_, extras_, head_ = multibox(
         vgg(vgg_cfg, 3), add_extras((extras_cfg), 1024), num_classes)
-
+    
     return S3FD(phase, base_, extras_, head_, num_classes)
 
 
diff --git a/tmp/0_Parade_marchingband_1_465.jpg b/tmp/0_Parade_marchingband_1_465.jpg
diff --git a/tmp/test.jpg b/tmp/test.jpg
diff --git a/tmp/test2.jpg b/tmp/test2.jpg
diff --git a/tools/afw_test.py b/tools/afw_test.py
@@ -38,8 +38,8 @@
 else:
     torch.set_default_tensor_type('torch.FloatTensor')
 
-AFW_IMG_DIR = os.path.join(cfg.AFW_DIR, 'images')
-AFW_RESULT_DIR = os.path.join(cfg.AFW_DIR, 's3fd')
+AFW_IMG_DIR = os.path.join(cfg.FACE.AFW_DIR, 'images')
+AFW_RESULT_DIR = os.path.join(cfg.FACE.AFW_DIR, 's3fd')
 AFW_RESULT_IMG_DIR = os.path.join(AFW_RESULT_DIR, 'images')
 
 if not os.path.exists(AFW_RESULT_IMG_DIR):
diff --git a/tools/fddb_test.py b/tools/fddb_test.py
@@ -39,9 +39,9 @@
     torch.set_default_tensor_type('torch.FloatTensor')
 
 
-FDDB_IMG_DIR = os.path.join(cfg.FDDB_DIR, 'images')
-FDDB_FOLD_DIR = os.path.join(cfg.FDDB_DIR, 'FDDB-folds')
-FDDB_RESULT_DIR = os.path.join(cfg.FDDB_DIR, 's3fd')
+FDDB_IMG_DIR = os.path.join(cfg.FACE.FDDB_DIR, 'images')
+FDDB_FOLD_DIR = os.path.join(cfg.FACE.FDDB_DIR, 'FDDB-folds')
+FDDB_RESULT_DIR = os.path.join(cfg.FACE.FDDB_DIR, 's3fd')
 FDDB_RESULT_IMG_DIR = os.path.join(FDDB_RESULT_DIR, 'images')
 
 if not os.path.exists(FDDB_RESULT_IMG_DIR):
diff --git a/tools/pascal_test.py b/tools/pascal_test.py
@@ -38,8 +38,8 @@
 else:
     torch.set_default_tensor_type('torch.FloatTensor')
 
-PASCAL_IMG_DIR = os.path.join(cfg.PASCAL_DIR, 'images')
-PASCAL_RESULT_DIR = os.path.join(cfg.PASCAL_DIR, 's3fd')
+PASCAL_IMG_DIR = os.path.join(cfg.FACE.PASCAL_DIR, 'images')
+PASCAL_RESULT_DIR = os.path.join(cfg.FACE.PASCAL_DIR, 's3fd')
 PASCAL_RESULT_IMG_DIR = os.path.join(PASCAL_RESULT_DIR, 'images')
 
 if not os.path.exists(PASCAL_RESULT_IMG_DIR):
diff --git a/tools/wider_test.py b/tools/wider_test.py
@@ -46,7 +46,7 @@ def detect_face(net, img, shrink):
         img = cv2.resize(img, None, None, fx=shrink, fy=shrink,
                          interpolation=cv2.INTER_LINEAR)
 
-    x = to_chw_bgr(image)
+    x = to_chw_bgr(img)
     x = x.astype('float32')
     x -= cfg.img_mean
     x = x[[2, 1, 0], :, :]
@@ -170,7 +170,7 @@ def get_data():
     del wider_face
 
     imgs_path = os.path.join(
-        cfg.WIDER_DIR, 'WIDER_{}'.format(subset), 'images')
+        cfg.FACE.WIDER_DIR, 'WIDER_{}'.format(subset), 'images')
     save_path = 'eval_tools/s3fd_{}'.format(subset)
 
     return event_list, file_list, imgs_path, save_path
@@ -209,7 +209,7 @@ def get_data():
             #                 (img.shape[0] * img.shape[1])) ** 0.5
 
             max_im_shrink = np.sqrt(
-                1650 * 1200 / (img.shape[0] * img.shape[1]))
+                1700 * 1200 / (img.shape[0] * img.shape[1]))
 
             shrink = max_im_shrink if max_im_shrink < 1 else 1
             counter += 1
diff --git a/train.py b/train.py
@@ -21,7 +21,7 @@
 from layers.modules import MultiBoxLoss
 from data.factory import dataset_factory, detection_collate
 
-os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
+#os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
 
 
 def str2bool(v):
@@ -31,7 +31,7 @@ def str2bool(v):
     description='S3FD face Detector Training With Pytorch')
 train_set = parser.add_mutually_exclusive_group()
 parser.add_argument('--dataset',
-                    default='hand',
+                    default='face',
                     choices=['hand', 'face', 'head'],
                     help='Train target')
 parser.add_argument('--basenet',
@@ -44,7 +44,7 @@ def str2bool(v):
                     default=None, type=str,
                     help='Checkpoint state_dict file to resume training from')
 parser.add_argument('--num_workers',
-                    default=8, type=int,
+                    default=4, type=int,
                     help='Number of workers used in dataloading')
 parser.add_argument('--cuda',
                     default=True, type=str2bool,
@@ -104,6 +104,7 @@ def str2bool(v):
 s3fd_net = build_s3fd('train', cfg.NUM_CLASSES)
 net = s3fd_net
 
+
 if args.resume:
     print('Resuming training, loading {}...'.format(args.resume))
     start_epoch = net.load_weights(args.resume)
@@ -121,9 +122,9 @@ def str2bool(v):
 
 if not args.resume:
     print('Initializing weights...')
-    net.extras.apply(net.weights_init)
-    net.loc.apply(net.weights_init)
-    net.conf.apply(net.weights_init)
+    s3fd_net.extras.apply(s3fd_net.weights_init)
+    s3fd_net.loc.apply(s3fd_net.weights_init)
+    s3fd_net.conf.apply(s3fd_net.weights_init)
 
 optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum,
                       weight_decay=args.weight_decay)
@@ -138,8 +139,7 @@ def train():
     iteration = 0
     net.train()
     for epoch in range(start_epoch, cfg.EPOCHES):
-        loc_loss = 0
-        conf_loss = 0
+        losses = 0
         for batch_idx, (images, targets) in enumerate(train_loader):
             if args.cuda:
                 images = Variable(images.cuda())
@@ -149,7 +149,7 @@ def train():
                 images = Variable(images)
                 targets = [Variable(ann, volatile=True) for ann in targets]
 
-            if epoch in cfg.LR_STEPS:
+            if iteration in cfg.LR_STEPS:
                 step_index += 1
                 adjust_learning_rate(optimizer, args.gamma, step_index)
 
@@ -162,15 +162,16 @@ def train():
             loss.backward()
             optimizer.step()
             t1 = time.time()
-            loc_loss += loss_l.data[0]
-            conf_loss += loss_c.data[0]
+            losses += loss.data[0]
 
             if iteration % 10 == 0:
+                tloss = losses / (batch_idx + 1)
                 print('Timer: %.4f' % (t1 - t0))
                 print('epoch:' + repr(epoch) + ' || iter:' +
-                      repr(iteration) + ' || Loss:%.4f' % (loss.data[0]))
+                      repr(iteration) + ' || Loss:%.4f' % (tloss))
                 print('->> conf loss:{:.4f} || loc loss:{:.4f}'.format(
                     loss_c.data[0], loss_l.data[0]))
+                print('->>lr:{:.6f}'.format(optimizer.param_groups[0]['lr']))
 
             if iteration != 0 and iteration % 5000 == 0:
                 print('Saving state, iter:', iteration)
@@ -180,6 +181,8 @@ def train():
             iteration += 1
 
         val(epoch)
+        if iteration == cfg.MAX_STEPS:
+            break
 
 
 def val(epoch):
@@ -224,7 +227,7 @@ def val(epoch):
     file = 'sfd_{}_checkpoint.pth'.format(args.dataset)
     torch.save(states, os.path.join(
         args.save_folder, file))
-    
+
 
 def adjust_learning_rate(optimizer, gamma, step):
     """Sets the learning rate to the initial LR decayed by 10 at every
diff --git a/utils/augmentations.py b/utils/augmentations.py
@@ -568,7 +568,7 @@ def anchor_crop_image_sampling(img,
     infDistance = 9999999
     bbox_labels = np.array(bbox_labels)
     scale = np.array([img_width, img_height, img_width, img_height])
-    
+
     boxes = bbox_labels[:, 1:5] * scale
     labels = bbox_labels[:, 0]
 
@@ -828,10 +828,10 @@ def preprocess(img, bbox_labels, mode, image_path):
 
             img = Image.fromarray(img)
 
-        interp_mode = [
-            Image.BILINEAR, Image.HAMMING, Image.NEAREST, Image.BICUBIC,
-            Image.LANCZOS
-        ]
+    interp_mode = [
+        Image.BILINEAR, Image.HAMMING, Image.NEAREST, Image.BICUBIC,
+        Image.LANCZOS
+    ]
     interp_indx = np.random.randint(0, 5)
 
     img = img.resize((cfg.resize_width, cfg.resize_height),