-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathhandDetection.py
128 lines (88 loc) · 3.58 KB
/
handDetection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
## Import necesssary libraries
import cv2
import mediapipe as mp
import numpy as np
from joblib import load
from sklearn.preprocessing import Normalizer
## Open capture with video path
capture = cv2.VideoCapture(0)
## Initialize mediapipe hand detection function
mpHands = mp.solutions.hands
hands = mpHands.Hands(max_num_hands=1)
mp_drawing = mp.solutions.drawing_utils
## Load trained model and initialize a normalizer
model = load("model.joblib")
normalizer = Normalizer()
## Define variables for output video
h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
size = (w, h)
## Create VideoWriter instance with variables taken from input
outputVid = cv2.VideoWriter("result.avi", cv2.VideoWriter_fourcc('M','J','P','G'), 24, size, isColor = True)
## Helper function to create a bounding box around each hand.
## Takes in video frame img and hand landmarks lm
def createBoundingBox(img, lm):
## Initialize empty array to store all landmarks of
## hand landmark lm
lm_array = np.empty((0,2), int)
## For each landmark in hand landmark, append
## minimum points to array
for _, landmark in enumerate(lm.landmark):
width, height = img.shape[1], img.shape[0]
## Calculate minimum point between landmark
## position and size of video frame
lm_x = min(int(landmark.x * width), width - 1)
lm_y = min(int(landmark.y * height), height - 1)
## Create a point using the minimum for landmark
lm_point = [np.array((lm_x, lm_y))]
## Append point to array
lm_array = np.append(lm_array, lm_point, axis=0)
## Using built-in method boundingRect, get the x,y,w,h
## from the bounding box of lm_array
x, y, w, h = cv2.boundingRect(lm_array)
## Define positions for bouding box to encapsulate hand
x_min = x - 20
y_min = y - 15
x_max = x + w + 20
y_max = y + h + 15
return [x_min, y_min, x_max, y_max]
## While capture is open
while(capture.isOpened()):
## Read the frame from capture
read, frame = capture.read()
frame = cv2.flip(frame,1)
## If frame was properly read
if read == True:
## Convert frame to RGB for proper mediapipe detection
rgbFrame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
## Process each frame to get hand landmarks
results = hands.process(rgbFrame)
## If results exists
if results.multi_hand_landmarks:
## For each hand detected
for handLms in results.multi_hand_landmarks:
## Call upon createBoudningBox() method to get bounding box coordinates
boudingBox = createBoundingBox(frame, handLms)
## Draw a rectangle around each processed bounding box
cv2.rectangle(frame, (boudingBox[0], boudingBox[1]), (boudingBox[2], boudingBox[3]), (0, 255, 0), 2)
## Draw the connections between landmarks for better visualization
mp_drawing.draw_landmarks(frame, handLms, mpHands.HAND_CONNECTIONS)
## Define coords as the landmark's x and y coordinates and normalize them
coords = handLms.landmark
coords = list(np.array([[landmark.x, landmark.y] for landmark in coords]).flatten())
coords = normalizer.transform([coords])
## Predict which letter is being gestured using the trained model
predicted_letter = model.predict(coords)
# Write above the bouding box the predicted letter
cv2.putText(frame, str(predicted_letter[0]),(boudingBox[0], boudingBox[1]), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
cv2.imshow("Frame", frame)
## Write frame with detection results to VideoWriter
## instance outputVid
outputVid.write(frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
else:
break
capture.release()
outputVid.release()
cv2.destroyAllWindows()