6
6
import argparse
7
7
import threading , queue
8
8
from torch .multiprocessing import Pool , Process , set_start_method
9
- from util import write_results , load_classes
10
- from preprocess import letterbox_image
11
9
from darknet import Darknet
12
10
from imutils .video import WebcamVideoStream ,FPS
13
11
# from camera import write
23
21
torch .backends .cudnn .deterministic = True
24
22
torch .set_default_tensor_type ('torch.cuda.FloatTensor' )
25
23
24
+ def letterbox_image (img , inp_dim ):
25
+ '''resize image with unchanged aspect ratio using padding'''
26
+ img_w , img_h = img .shape [1 ], img .shape [0 ]
27
+ w , h = inp_dim
28
+ new_w = int (img_w * min (w / img_w , h / img_h ))
29
+ new_h = int (img_h * min (w / img_w , h / img_h ))
30
+ resized_image = cv2 .resize (img , (new_w ,new_h ), interpolation = cv2 .INTER_CUBIC )
31
+ canvas = np .full ((inp_dim [1 ], inp_dim [0 ], 3 ), 128 )
32
+ canvas [(h - new_h )// 2 :(h - new_h )// 2 + new_h ,(w - new_w )// 2 :(w - new_w )// 2 + new_w , :] = resized_image
33
+
34
+ return canvas
35
+
36
+ def load_classes (namesfile ):
37
+ fp = open (namesfile , "r" )
38
+ names = fp .read ().split ("\n " )[:- 1 ]
39
+ return names
26
40
27
41
def prep_image (img , inp_dim ):
28
42
"""
29
43
Prepare image for inputting to the neural network.
30
-
31
44
Returns a Variable
32
45
"""
33
46
orig_im = img
@@ -37,115 +50,74 @@ def prep_image(img, inp_dim):
37
50
img_ = torch .from_numpy (img_ ).float ().div (255.0 ).unsqueeze (0 )
38
51
return img_ , orig_im , dim
39
52
40
- labels = {}
41
- b_boxes = {}
42
- def write (bboxes , img , classes , colors ):
43
- """
44
- Draws the bounding box in every frame over the objects that the model detects
45
- """
46
- class_idx = bboxes
47
- bboxes = bboxes [1 :5 ]
48
- bboxes = bboxes .cpu ().data .numpy ()
49
- bboxes = bboxes .astype (int )
50
- b_boxes .update ({"bbox" :bboxes .tolist ()})
51
- # bboxes = bboxes + [150,100,200,200] # personal choice you can modify this to get distance as accurate as possible
52
- bboxes = torch .from_numpy (bboxes )
53
- cls = int (class_idx [- 1 ])
54
- label = "{0}" .format (classes [cls ])
55
- labels .update ({"Current Object" :label })
56
- color = random .choice (colors )
57
-
58
- ## Put text configuration on frame
59
- text_str = '%s' % (label )
60
- font_face = cv2 .FONT_HERSHEY_DUPLEX
61
- font_scale = 0.6
62
- font_thickness = 1
63
- text_w , text_h = cv2 .getTextSize (text_str , font_face , font_scale , font_thickness )[0 ]
64
- text_pt = (bboxes [0 ], bboxes [1 ] - 3 )
65
- text_color = [255 , 255 , 255 ]
66
53
67
-
68
- ## Distance Meaasurement for each bounding box
69
- x , y , w , h = bboxes [0 ], bboxes [1 ], bboxes [2 ], bboxes [3 ]
70
- ## item() is used to retrieve the value from the tensor
71
- distance = (2 * 3.14 * 180 ) / (w .item ()+ h .item () * 360 ) * 1000 + 3 ### Distance measuring in Inch
72
- feedback = ("{}" .format (labels ["Current Object" ])+ " " + "is" + " at {} " .format (round (distance ))+ "Inches" )
73
- # # speak.Speak(feedback) # If you are running this on linux based OS kindly use espeak. Using this speaking library in winodws will add unnecessary latency
74
- print (feedback )
75
-
76
- cv2 .putText (img , str ("{:.2f} Inches" .format (distance )), (text_w + x ,y ), cv2 .FONT_HERSHEY_DUPLEX , font_scale , (0 ,255 ,0 ), font_thickness , cv2 .LINE_AA )
77
- cv2 .rectangle (img , (bboxes [0 ],bboxes [1 ]),(bboxes [2 ] + text_w - 30 ,bboxes [3 ]), color , 2 )
78
- cv2 .putText (img , text_str , text_pt , font_face , font_scale , text_color , font_thickness , cv2 .LINE_AA )
79
-
80
- return img
81
54
82
55
class ObjectDetection :
83
56
def __init__ (self , id ):
84
57
# self.cap = cv2.VideoCapture(id)
85
58
self .cap = WebcamVideoStream (src = id ).start ()
86
- self .cfgfile = "cfg/yolov3.cfg"
87
- # self.cfgfile = 'cfg/yolov3-tiny.cfg'
88
- self .weightsfile = "yolov3.weights"
89
- # self.weightsfile = 'yolov3-tiny.weights'
90
- self .confidence = float (0.5 )
91
- self .nms_thesh = float (0.4 )
59
+ self .cfgfile = "cfg/yolov4.cfg"
60
+ self .weightsfile = "yolov4.weights"
61
+ self .confidence = float (0.6 )
62
+ self .nms_thesh = float (0.8 )
92
63
self .num_classes = 80
93
64
self .classes = load_classes ('data/coco.names' )
94
65
self .colors = pkl .load (open ("pallete" , "rb" ))
95
66
self .model = Darknet (self .cfgfile )
96
67
self .CUDA = torch .cuda .is_available ()
97
68
self .model .load_weights (self .weightsfile )
98
- self .model .net_info ["height" ] = 160
99
- self .inp_dim = int (self .model .net_info ["height" ])
100
- self .width = 1280 #640#
101
- self .height = 720 #360#
69
+ self .width = 1280 #640#1280
70
+ self .height = 720 #360#720
102
71
print ("Loading network....." )
103
72
if self .CUDA :
104
73
self .model .cuda ()
105
74
print ("Network successfully loaded" )
106
- assert self .inp_dim % 32 == 0
107
- assert self .inp_dim > 32
75
+
108
76
self .model .eval ()
109
77
110
78
def main (self ):
111
79
q = queue .Queue ()
112
- def frame_render (queue_from_cam ):
113
- frame = self .cap .read ()
114
- frame = cv2 .resize (frame ,(self .width , self .height ))
115
- queue_from_cam .put (frame )
116
- cam = threading .Thread (target = frame_render , args = (q ,))
117
- cam .start ()
118
- cam .join ()
119
- frame = q .get ()
120
- q .task_done ()
121
- fps = FPS ().start ()
122
- try :
123
- img , orig_im , dim = prep_image (frame , self .inp_dim )
124
- im_dim = torch .FloatTensor (dim ).repeat (1 ,2 )
125
- if self .CUDA : #### If you have a gpu properly installed then it will run on the gpu
126
- im_dim = im_dim .cuda ()
127
- img = img .cuda ()
128
- # with torch.no_grad(): #### Set the model in the evaluation mode
129
- output = self .model (Variable (img ), self .CUDA )
130
- output = write_results (output , self .confidence , self .num_classes , nms = True , nms_conf = self .nms_thesh ) #### Localize the objects in a frame
131
- output = output .type (torch .half )
132
- if list (output .size ()) == [1 ,86 ]:
133
- pass
134
- else :
135
- output [:,1 :5 ] = torch .clamp (output [:,1 :5 ], 0.0 , float (self .inp_dim ))/ self .inp_dim
80
+ while True :
81
+ def frame_render (queue_from_cam ):
82
+ frame = self .cap .read () # If you capture stream using opencv (cv2.VideoCapture()) the use the following line
83
+ # ret, frame = self.cap.read()
84
+ frame = cv2 .resize (frame ,(self .width , self .height ))
85
+ queue_from_cam .put (frame )
86
+ cam = threading .Thread (target = frame_render , args = (q ,))
87
+ cam .start ()
88
+ cam .join ()
89
+ frame = q .get ()
90
+ q .task_done ()
91
+ fps = FPS ().start ()
92
+
93
+ try :
94
+ img , orig_im , dim = prep_image (frame , 160 )
95
+
96
+ im_dim = torch .FloatTensor (dim ).repeat (1 ,2 )
97
+ if self .CUDA : #### If you have a gpu properly installed then it will run on the gpu
98
+ im_dim = im_dim .cuda ()
99
+ img = img .cuda ()
100
+ # with torch.no_grad(): #### Set the model in the evaluation mode
101
+
102
+ output = self .model (img )
103
+ from tool .utils import post_processing ,plot_boxes_cv2
104
+ bounding_boxes = post_processing (img ,self .confidence , self .nms_thesh , output )
105
+ frame = plot_boxes_cv2 (frame , bounding_boxes [0 ], savename = None , class_names = self .classes , color = None , colors = self .colors )
136
106
137
- # im_dim = im_dim.repeat(output.size(0), 1)
138
- output [:,[1 ,3 ]] *= frame .shape [1 ]
139
- output [:,[2 ,4 ]] *= frame .shape [0 ]
140
- list (map (lambda boxes : write (boxes , frame , self .classes , self .colors ),output ))
107
+ except :
108
+ pass
109
+
110
+ fps .update ()
111
+ fps .stop ()
112
+
113
+ ret , jpeg = cv2 .imencode ('.jpg' , frame )
114
+ print ("[INFO] elasped time: {:.2f}" .format (fps .elapsed ()))
115
+ print ("[INFO] approx. FPS: {:.1f}" .format (fps .fps ()))
116
+ return jpeg .tostring ()
117
+
141
118
142
- except :
143
- pass
144
- fps .update ()
145
- fps .stop ()
146
- print ("[INFO] elasped time: {:.2f}" .format (fps .elapsed ()))
147
- print ("[INFO] approx. FPS: {:.1f}" .format (fps .fps ()))
148
- frame = cv2 .putText (frame , str ("{:.2f} Inches" .format (distance )), (x ,y ), cv2 .FONT_HERSHEY_DUPLEX , 0.6 , (0 ,0 ,255 ), 1 , cv2 .LINE_AA )
149
- ret , jpeg = cv2 .imencode ('.jpg' , frame )
150
- return jpeg .tostring ()
119
+
151
120
121
+ if __name__ == "__main__" :
122
+ id = 0
123
+ ObjectDetection (id ).main ()
0 commit comments