code modified for server use

paul-pias · paul-pias · commit 725dbecf7001 · 2020-07-11T03:58:11.000+06:00
diff --git a/app.py b/app.py
@@ -24,4 +24,4 @@ def video_feed():
 
 if __name__ == '__main__':
     # Serve the app with gevent
-    app.run(host='0.0.0.0', threaded=True, debug = True)
+    app.run(host='127.0.0.1', threaded=True, debug = True)
diff --git a/camera.py b/camera.py
@@ -6,6 +6,8 @@
 import argparse
 import threading, queue
 from torch.multiprocessing import Pool, Process, set_start_method
+from util import write_results, load_classes
+from preprocess import letterbox_image
 from darknet import Darknet
 from imutils.video import WebcamVideoStream,FPS
 # from camera import write
@@ -21,23 +23,6 @@
     torch.backends.cudnn.deterministic = True
     torch.set_default_tensor_type('torch.cuda.FloatTensor')
 
-def letterbox_image(img, inp_dim):
-    '''resize image with unchanged aspect ratio using padding'''
-    img_w, img_h = img.shape[1], img.shape[0]
-    w, h = inp_dim
-    new_w = int(img_w * min(w/img_w, h/img_h))
-    new_h = int(img_h * min(w/img_w, h/img_h))
-    resized_image = cv2.resize(img, (new_w,new_h), interpolation = cv2.INTER_CUBIC)
-    canvas = np.full((inp_dim[1], inp_dim[0], 3), 128)
-    canvas[(h-new_h)//2:(h-new_h)//2 + new_h,(w-new_w)//2:(w-new_w)//2 + new_w,  :] = resized_image
-    
-    return canvas
-
-def load_classes(namesfile):
-    fp = open(namesfile, "r")
-    names = fp.read().split("\n")[:-1]
-    return names
-
 def prep_image(img, inp_dim):
     """
     Prepare image for inputting to the neural network.
@@ -50,14 +35,56 @@ def prep_image(img, inp_dim):
     img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
     return img_, orig_im, dim
 
+labels = {}
+b_boxes = {}
+def write(bboxes, img, classes, colors):
+    """
+        Draws the bounding box in every frame over the objects that the model detects
+    """
+    class_idx = bboxes
+    bboxes = bboxes[1:5]
+    bboxes = bboxes.cpu().data.numpy()
+    bboxes = bboxes.astype(int)
+    b_boxes.update({"bbox":bboxes.tolist()})
+    # bboxes = bboxes + [150,100,200,200] # personal choice you can modify this to get distance as accurate as possible
+    bboxes = torch.from_numpy(bboxes)
+    cls = int(class_idx[-1])
+    label = "{0}".format(classes[cls])
+    labels.update({"Current Object":label})
+    color = random.choice(colors)
+
+    ## Put text configuration on frame
+    text_str = '%s' % (label) 
+    font_face = cv2.FONT_HERSHEY_DUPLEX
+    font_scale = 0.6
+    font_thickness = 1
+    text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0]
+    text_pt = (bboxes[0], bboxes[1] - 3)
+    text_color = [255, 255, 255]
+
+    
+    ## Distance Meaasurement for each bounding box
+    x, y, w, h = bboxes[0], bboxes[1], bboxes[2], bboxes[3]
+    ## item() is used to retrieve the value from the tensor
+    distance = (2 * 3.14 * 180) / (w.item()+ h.item() * 360) * 1000 + 3 ### Distance measuring in Inch 
+    feedback = ("{}".format(labels["Current Object"])+ " " +"is"+" at {} ".format(round(distance))+"Inches")
+    # # speak.Speak(feedback)     # If you are running this on linux based OS kindly use espeak. Using this speaking library in winodws will add unnecessary latency 
+    print(feedback)
+    
+    cv2.putText(img, str("{:.2f} Inches".format(distance)), (text_w+x,y), cv2.FONT_HERSHEY_DUPLEX, font_scale, (0,255,0), font_thickness, cv2.LINE_AA)
+    cv2.rectangle(img, (bboxes[0],bboxes[1]),(bboxes[2] + text_w -30,bboxes[3]), color, 2)
+    cv2.putText(img, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA)
 
+    return img
 
 class ObjectDetection:
     def __init__(self, id): 
         # self.cap = cv2.VideoCapture(id)
         self.cap = WebcamVideoStream(src = id).start()
-        self.cfgfile = "cfg/yolov4.cfg"
-        self.weightsfile = "yolov4.weights"
+        self.cfgfile = "cfg/yolov3.cfg"
+        # self.cfgfile = 'cfg/yolov3-tiny.cfg'
+        self.weightsfile = "yolov3.weights"
+        # self.weightsfile = 'yolov3-tiny.weights'
         self.confidence = float(0.6)
         self.nms_thesh = float(0.8)
         self.num_classes = 80
@@ -66,13 +93,16 @@ def __init__(self, id):
         self.model = Darknet(self.cfgfile)
         self.CUDA = torch.cuda.is_available()
         self.model.load_weights(self.weightsfile)
+        self.model.net_info["height"] = 160
+        self.inp_dim = int(self.model.net_info["height"])
         self.width = 1280 #640#1280
         self.height = 720 #360#720
         print("Loading network.....")
         if self.CUDA:
             self.model.cuda()
         print("Network successfully loaded")
-
+        assert self.inp_dim % 32 == 0
+        assert self.inp_dim > 32
         self.model.eval()
 
     def main(self):
@@ -89,35 +119,35 @@ def frame_render(queue_from_cam):
             frame = q.get()
             q.task_done()
             fps = FPS().start() 
-            
             try:
-                img, orig_im, dim = prep_image(frame, 160)
-                
+                img, orig_im, dim = prep_image(frame, self.inp_dim)
                 im_dim = torch.FloatTensor(dim).repeat(1,2)
                 if self.CUDA:                            #### If you have a gpu properly installed then it will run on the gpu
                     im_dim = im_dim.cuda()
                     img = img.cuda()
                 # with torch.no_grad():               #### Set the model in the evaluation mode
+                output = self.model(Variable(img), self.CUDA)
+                output = write_results(output, self.confidence, self.num_classes, nms = True, nms_conf = self.nms_thesh)  #### Localize the objects in a frame
+                output = output.type(torch.half)
                 
-                output = self.model(img)
-                from tool.utils import post_processing,plot_boxes_cv2
-                bounding_boxes = post_processing(img,self.confidence, self.nms_thesh, output)
-                frame = plot_boxes_cv2(frame, bounding_boxes[0], savename= None, class_names=self.classes, color = None, colors=self.colors)
-
+                if list(output.size()) == [1,86]:
+                    print(output.size())
+                    pass
+                else:
+                    output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(self.inp_dim))/self.inp_dim
+                
+        #            im_dim = im_dim.repeat(output.size(0), 1)
+                    output[:,[1,3]] *= frame.shape[1]
+                    output[:,[2,4]] *= frame.shape[0]
+                    list(map(lambda boxes: write(boxes, frame, self.classes, self.colors),output))
+                    
             except:
                 pass
             
             fps.update()
             fps.stop()
-            
             ret, jpeg = cv2.imencode('.jpg', frame)
             print("[INFO] elasped time: {:.2f}".format(fps.elapsed()))
             print("[INFO] approx. FPS: {:.1f}".format(fps.fps()))
-            return jpeg.tostring()
-            
-
-            
 
-if __name__ == "__main__":
-    id = 0
-    ObjectDetection(id).main()
+            return jpeg.tostring()