6
6
import argparse
7
7
import threading , queue
8
8
from torch .multiprocessing import Pool , Process , set_start_method
9
+ from util import write_results , load_classes
10
+ from preprocess import letterbox_image
9
11
from darknet import Darknet
10
12
from imutils .video import WebcamVideoStream ,FPS
11
13
# from camera import write
21
23
torch .backends .cudnn .deterministic = True
22
24
torch .set_default_tensor_type ('torch.cuda.FloatTensor' )
23
25
24
- def letterbox_image (img , inp_dim ):
25
- '''resize image with unchanged aspect ratio using padding'''
26
- img_w , img_h = img .shape [1 ], img .shape [0 ]
27
- w , h = inp_dim
28
- new_w = int (img_w * min (w / img_w , h / img_h ))
29
- new_h = int (img_h * min (w / img_w , h / img_h ))
30
- resized_image = cv2 .resize (img , (new_w ,new_h ), interpolation = cv2 .INTER_CUBIC )
31
- canvas = np .full ((inp_dim [1 ], inp_dim [0 ], 3 ), 128 )
32
- canvas [(h - new_h )// 2 :(h - new_h )// 2 + new_h ,(w - new_w )// 2 :(w - new_w )// 2 + new_w , :] = resized_image
33
-
34
- return canvas
35
-
36
- def load_classes (namesfile ):
37
- fp = open (namesfile , "r" )
38
- names = fp .read ().split ("\n " )[:- 1 ]
39
- return names
40
-
41
26
def prep_image (img , inp_dim ):
42
27
"""
43
28
Prepare image for inputting to the neural network.
@@ -50,14 +35,56 @@ def prep_image(img, inp_dim):
50
35
img_ = torch .from_numpy (img_ ).float ().div (255.0 ).unsqueeze (0 )
51
36
return img_ , orig_im , dim
52
37
38
+ labels = {}
39
+ b_boxes = {}
40
+ def write (bboxes , img , classes , colors ):
41
+ """
42
+ Draws the bounding box in every frame over the objects that the model detects
43
+ """
44
+ class_idx = bboxes
45
+ bboxes = bboxes [1 :5 ]
46
+ bboxes = bboxes .cpu ().data .numpy ()
47
+ bboxes = bboxes .astype (int )
48
+ b_boxes .update ({"bbox" :bboxes .tolist ()})
49
+ # bboxes = bboxes + [150,100,200,200] # personal choice you can modify this to get distance as accurate as possible
50
+ bboxes = torch .from_numpy (bboxes )
51
+ cls = int (class_idx [- 1 ])
52
+ label = "{0}" .format (classes [cls ])
53
+ labels .update ({"Current Object" :label })
54
+ color = random .choice (colors )
55
+
56
+ ## Put text configuration on frame
57
+ text_str = '%s' % (label )
58
+ font_face = cv2 .FONT_HERSHEY_DUPLEX
59
+ font_scale = 0.6
60
+ font_thickness = 1
61
+ text_w , text_h = cv2 .getTextSize (text_str , font_face , font_scale , font_thickness )[0 ]
62
+ text_pt = (bboxes [0 ], bboxes [1 ] - 3 )
63
+ text_color = [255 , 255 , 255 ]
64
+
65
+
66
+ ## Distance Meaasurement for each bounding box
67
+ x , y , w , h = bboxes [0 ], bboxes [1 ], bboxes [2 ], bboxes [3 ]
68
+ ## item() is used to retrieve the value from the tensor
69
+ distance = (2 * 3.14 * 180 ) / (w .item ()+ h .item () * 360 ) * 1000 + 3 ### Distance measuring in Inch
70
+ feedback = ("{}" .format (labels ["Current Object" ])+ " " + "is" + " at {} " .format (round (distance ))+ "Inches" )
71
+ # # speak.Speak(feedback) # If you are running this on linux based OS kindly use espeak. Using this speaking library in winodws will add unnecessary latency
72
+ print (feedback )
73
+
74
+ cv2 .putText (img , str ("{:.2f} Inches" .format (distance )), (text_w + x ,y ), cv2 .FONT_HERSHEY_DUPLEX , font_scale , (0 ,255 ,0 ), font_thickness , cv2 .LINE_AA )
75
+ cv2 .rectangle (img , (bboxes [0 ],bboxes [1 ]),(bboxes [2 ] + text_w - 30 ,bboxes [3 ]), color , 2 )
76
+ cv2 .putText (img , text_str , text_pt , font_face , font_scale , text_color , font_thickness , cv2 .LINE_AA )
53
77
78
+ return img
54
79
55
80
class ObjectDetection :
56
81
def __init__ (self , id ):
57
82
# self.cap = cv2.VideoCapture(id)
58
83
self .cap = WebcamVideoStream (src = id ).start ()
59
- self .cfgfile = "cfg/yolov4.cfg"
60
- self .weightsfile = "yolov4.weights"
84
+ self .cfgfile = "cfg/yolov3.cfg"
85
+ # self.cfgfile = 'cfg/yolov3-tiny.cfg'
86
+ self .weightsfile = "yolov3.weights"
87
+ # self.weightsfile = 'yolov3-tiny.weights'
61
88
self .confidence = float (0.6 )
62
89
self .nms_thesh = float (0.8 )
63
90
self .num_classes = 80
@@ -66,13 +93,16 @@ def __init__(self, id):
66
93
self .model = Darknet (self .cfgfile )
67
94
self .CUDA = torch .cuda .is_available ()
68
95
self .model .load_weights (self .weightsfile )
96
+ self .model .net_info ["height" ] = 160
97
+ self .inp_dim = int (self .model .net_info ["height" ])
69
98
self .width = 1280 #640#1280
70
99
self .height = 720 #360#720
71
100
print ("Loading network....." )
72
101
if self .CUDA :
73
102
self .model .cuda ()
74
103
print ("Network successfully loaded" )
75
-
104
+ assert self .inp_dim % 32 == 0
105
+ assert self .inp_dim > 32
76
106
self .model .eval ()
77
107
78
108
def main (self ):
@@ -89,35 +119,35 @@ def frame_render(queue_from_cam):
89
119
frame = q .get ()
90
120
q .task_done ()
91
121
fps = FPS ().start ()
92
-
93
122
try :
94
- img , orig_im , dim = prep_image (frame , 160 )
95
-
123
+ img , orig_im , dim = prep_image (frame , self .inp_dim )
96
124
im_dim = torch .FloatTensor (dim ).repeat (1 ,2 )
97
125
if self .CUDA : #### If you have a gpu properly installed then it will run on the gpu
98
126
im_dim = im_dim .cuda ()
99
127
img = img .cuda ()
100
128
# with torch.no_grad(): #### Set the model in the evaluation mode
129
+ output = self .model (Variable (img ), self .CUDA )
130
+ output = write_results (output , self .confidence , self .num_classes , nms = True , nms_conf = self .nms_thesh ) #### Localize the objects in a frame
131
+ output = output .type (torch .half )
101
132
102
- output = self .model (img )
103
- from tool .utils import post_processing ,plot_boxes_cv2
104
- bounding_boxes = post_processing (img ,self .confidence , self .nms_thesh , output )
105
- frame = plot_boxes_cv2 (frame , bounding_boxes [0 ], savename = None , class_names = self .classes , color = None , colors = self .colors )
106
-
133
+ if list (output .size ()) == [1 ,86 ]:
134
+ print (output .size ())
135
+ pass
136
+ else :
137
+ output [:,1 :5 ] = torch .clamp (output [:,1 :5 ], 0.0 , float (self .inp_dim ))/ self .inp_dim
138
+
139
+ # im_dim = im_dim.repeat(output.size(0), 1)
140
+ output [:,[1 ,3 ]] *= frame .shape [1 ]
141
+ output [:,[2 ,4 ]] *= frame .shape [0 ]
142
+ list (map (lambda boxes : write (boxes , frame , self .classes , self .colors ),output ))
143
+
107
144
except :
108
145
pass
109
146
110
147
fps .update ()
111
148
fps .stop ()
112
-
113
149
ret , jpeg = cv2 .imencode ('.jpg' , frame )
114
150
print ("[INFO] elasped time: {:.2f}" .format (fps .elapsed ()))
115
151
print ("[INFO] approx. FPS: {:.1f}" .format (fps .fps ()))
116
- return jpeg .tostring ()
117
-
118
-
119
-
120
152
121
- if __name__ == "__main__" :
122
- id = 0
123
- ObjectDetection (id ).main ()
153
+ return jpeg .tostring ()
0 commit comments