-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathnewmain.py
165 lines (130 loc) · 6.02 KB
/
newmain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import cv2
import mediapipe as mp
import numpy as np
from joblib import load
from sklearn.preprocessing import Normalizer
from pynput.keyboard import Key, Controller as KeyboardController
import pyautogui
import time
## Open capture with video path
capture = cv2.VideoCapture(0)
## Initialize mediapipe hand detection function
mpHands = mp.solutions.hands
hands = mpHands.Hands(max_num_hands=1)
mp_drawing = mp.solutions.drawing_utils
## Load trained model and initialize a normalizer
model = load("model.joblib")
normalizer = Normalizer()
## Define variables for output video
h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
size = (w, h)
## Create VideoWriter instance with variables taken from input
outputVid = cv2.VideoWriter("result.avi", cv2.VideoWriter_fourcc('M','J','P','G'), 24, size, isColor=True)
## Create a keyboard controller
keyboard = KeyboardController()
## Helper function to create a bounding box around each hand.
## Takes in video frame img and hand landmarks lm
def createBoundingBox(img, lm):
## Initialize empty array to store all landmarks of
## hand landmark lm
lm_array = np.empty((0,2), int)
## For each landmark in hand landmark, append
## minimum points to array
for _, landmark in enumerate(lm.landmark):
width, height = img.shape[1], img.shape[0]
## Calculate minimum point between landmark
## position and size of video frame
lm_x = min(int(landmark.x * width), width - 1)
lm_y = min(int(landmark.y * height), height - 1)
## Create a point using the minimum for landmark
lm_point = [np.array((lm_x, lm_y))]
## Append point to array
lm_array = np.append(lm_array, lm_point, axis=0)
## Using built-in method boundingRect, get the x,y,w,h
## from the bounding box of lm_array
x, y, w, h = cv2.boundingRect(lm_array)
## Define positions for bouding box to encapsulate hand
x_min = x - 20
y_min = y - 15
x_max = x + w + 20
y_max = y + h + 15
return [x_min, y_min, x_max, y_max]
## While capture is open
while(capture.isOpened()):
## Read the frame from capture
read, frame = capture.read()
frame = cv2.flip(frame, 1)
## If frame was properly read
if read == True:
## Convert frame to RGB for proper mediapipe detection
rgbFrame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
## Process each frame to get hand landmarks
results = hands.process(rgbFrame)
## If results exists
if results.multi_hand_landmarks:
## For each hand detected
for handLms in results.multi_hand_landmarks:
## Call upon createBoundingBox() method to get bounding box coordinates
boudingBox = createBoundingBox(frame, handLms)
## Draw a rectangle around each processed bounding box
cv2.rectangle(frame, (boudingBox[0], boudingBox[1]), (boudingBox[2], boudingBox[3]), (0, 255, 0), 2)
## Draw the connections between landmarks for better visualization
mp_drawing.draw_landmarks(frame, handLms, mpHands.HAND_CONNECTIONS)
## Define coords as the landmark's x and y coordinates and normalize them
coords = handLms.landmark
coords = list(np.array([[landmark.x, landmark.y] for landmark in coords]).flatten())
coords = normalizer.transform([coords])
## Predict which letter is being gestured using the trained model
predicted_letter = model.predict(coords)
# Write above the bouding box the predicted letter
cv2.putText(frame, str(predicted_letter[0]),(boudingBox[0], boudingBox[1]), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
# Perform action based on predicted letter
if str(predicted_letter[0]).upper() == 'A' or str(predicted_letter[0]).upper() == 'M':
keyboard.press(Key.alt)
keyboard.press(Key.right)
time.sleep(0.1)
keyboard.release(Key.right)
keyboard.release(Key.alt)
action_text = "Right Rotation"
elif str(predicted_letter[0]).upper() == 'B':
keyboard.press(Key.alt)
keyboard.press(Key.left)
time.sleep(0.1)
keyboard.release(Key.left)
keyboard.release(Key.alt)
action_text = "Left Rotation"
elif str(predicted_letter[0]).upper() == 'C':
keyboard.press(Key.alt)
keyboard.press(Key.up)
time.sleep(0.1)
keyboard.release(Key.up)
keyboard.release(Key.alt)
action_text = "Up Rotation"
elif str(predicted_letter[0]).upper() == 'D':
keyboard.press(Key.alt)
keyboard.press(Key.down)
time.sleep(0.1)
keyboard.release(Key.down)
keyboard.release(Key.alt)
action_text = "Down Rotation"
elif str(predicted_letter[0]).upper() == 'R':
keyboard.press(Key.home)
time.sleep(0.1)
keyboard.release(Key.home)
action_text = "Home Button Pressed"
elif str(predicted_letter[0]).upper() == 'V' or str(predicted_letter[0]).upper() == 'K':
pyautogui.hscroll(+10)
action_text = "Zoom In"
elif str(predicted_letter[0]).upper() == 'Y':
pyautogui.hscroll(-10)
action_text = "Zoom Out"
cv2.imshow("Frame", frame)
## Write frame with detection results to VideoWriter
## instance outputVid
outputVid.write(frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
capture.release()
outputVid.release()
cv2.destroyAllWindows()