Skip to content

Commit 64e21fb

Browse files
uploading socket-programming-project
1 parent 842f810 commit 64e21fb

13 files changed

+1214
-0
lines changed

HandTrackingModule.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
import cv2
2+
import mediapipe as mp
3+
import time
4+
5+
6+
class handDetector():
7+
'''
8+
The class for the gesture detection and tracking.
9+
'''
10+
def __init__(self, mode=False, modelComplexity=1, maxHands=2, detectionCon=0.5, trackCon=0.5):
11+
'''
12+
The init function required for class instantiation.
13+
14+
:param mode: (boolean, default=False) used to process video.
15+
:param modelComplexity: (int, default=1) the complexity of the detector.
16+
:param maxHands: (int, default=2) max number of hands that can appear.
17+
:param detectionCon: (float, default=0.5) confidence threshold of detection.
18+
:param trackCon: (float, default=0.5) confidence threshold of tracking.
19+
'''
20+
21+
self.mode = mode
22+
self.modelComplexity = modelComplexity
23+
self.maxHands = maxHands
24+
self.detectionCon = detectionCon
25+
self.trackCon = trackCon
26+
27+
# Creating an object of hand tracking detector from mediapipe module
28+
self.mpHands = mp.solutions.hands
29+
self.hands = self.mpHands.Hands(self.mode, self.maxHands, self.modelComplexity,
30+
self.detectionCon, self.trackCon)
31+
# Create an object for drawing connections between different landmarks of the hand
32+
self.mpDraw = mp.solutions.drawing_utils
33+
34+
def findHands(self, img, draw=True):
35+
'''
36+
Detect hands within a frame with the option to draw connections between different landmarks
37+
38+
:param img: (array) the frame passed to detect hands within it.
39+
:param draw: (boolean, default=True) drawing landmarks with small circles and connection lines between them.
40+
:return:
41+
img: (array) the return frame with hands detected (if any) and the connections between different landmarks (if draw=True)
42+
'''
43+
44+
# Convert the frame to be in RGB format
45+
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
46+
# Pass the frame to the hand detector
47+
self.results = self.hands.process(imgRGB)
48+
49+
# check if any hand was detected in the frame
50+
if self.results.multi_hand_landmarks:
51+
# draw circles on each landmark and connection lines between them for the detected hand(s)
52+
for handLms in self.results.multi_hand_landmarks:
53+
if draw:
54+
self.mpDraw.draw_landmarks(img, handLms,
55+
self.mpHands.HAND_CONNECTIONS)
56+
#return back the frame after detection and drawing
57+
return img
58+
59+
def findPosition(self, img, handNo=0, draw=True):
60+
'''
61+
Return the coordinates for the detected hand landmarks
62+
63+
:param img: (array) the frame passed get landmarks coordinates from.
64+
:param handNo: (int, default=0) the id of the hand either 0 for the first (right) hand detected nor 1 for the second.
65+
:param draw: (boolean, default=True) drawing a big circle on every landmark detected within the frame.
66+
:return:
67+
lmList: (list): list containing the id for every landmark detected and its coordinates with respect to the frame.
68+
'''
69+
# Create empty list for to append every landmark detected within the selected hand
70+
lmList = []
71+
72+
# check if any hand was detected in the frame
73+
if self.results.multi_hand_landmarks:
74+
# get the landmarks for the desired hand
75+
myHand = self.results.multi_hand_landmarks[handNo]
76+
# Iterate over different landmark detected in the desired hand.
77+
for id, lm in enumerate(myHand.landmark):
78+
# get the shape of the frame
79+
h, w, c = img.shape
80+
# get the center of the landmark, since lm.x is normalized by the width of the frame so we need
81+
# to multiply it back by the width of the frame, similarly with lm.y ,
82+
cx, cy = int(lm.x * w), int(lm.y * h)
83+
# append the id of the landmark and its center.
84+
lmList.append([id, cx, cy])
85+
# if True draw big circles on each landmark detected
86+
if draw:
87+
cv2.circle(img, (cx, cy), 15, (255, 0, 255), cv2.FILLED)
88+
89+
return lmList
90+
91+
92+
def main():
93+
# Test code
94+
pTime = 0
95+
cTime = 0
96+
# Get the vid from camera
97+
cap = cv2.VideoCapture(1)
98+
# Create the detector
99+
detector = handDetector()
100+
101+
102+
while True:
103+
# Get the frame form camera
104+
success, img = cap.read()
105+
# pass the frame to the detector to get the back a frame with the hand(s) detected if any where found
106+
img = detector.findHands(img)
107+
# Get the coordinates for different landmarks detected
108+
lmList = detector.findPosition(img)
109+
# if len(lmList) != 0:
110+
# print(lmList4])
111+
112+
# Calculate fps
113+
cTime = time.time()
114+
fps = 1 / (cTime - pTime)
115+
pTime = cTime
116+
117+
# show the fps rate at the top left corner of the live camera
118+
cv2.putText(img, str(int(fps)), (10, 70), cv2.FONT_HERSHEY_PLAIN, 3,
119+
(255, 0, 255), 3)
120+
121+
# Show the camera now and put a small delay.
122+
cv2.imshow("Image", img)
123+
cv2.waitKey(1)
124+
125+
# Execute main (test code) if this script is running.
126+
if __name__ == "__main__":
127+
main()

README.md

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
# Realtime Multi-client Video-Processing-with-socket-programming using TCP-protocol.
2+
3+
<p align="center">
4+
<img src="images/multiclient-real-time-processing1.gif" width="800" height="600" />
5+
</p>
6+
7+
Our application perform multi client video processing using the server hardware cabapilities which is handeled using threading.
8+
9+
Each new client request type of processing and send its source video.. the server perform the procesing real-time based on the client parser `--process`
10+
11+
## Installations required
12+
```python
13+
- pip install comtypes
14+
- pip install pycaw
15+
- pip install pickle
16+
- pip install imutils
17+
- pip install opencv-python
18+
- pip install mediapipe
19+
```
20+
21+
22+
## Use gesture control to change the volume of a computer.
23+
[1] First we look into hand tracking and then we use hand landmarks to find gestures of our hand to change volume.
24+
25+
[2] Distance between index finger and thumb is used to change volume level.
26+
27+
![image](images/1.png "Title")
28+
29+
30+
## Applications in medical field:
31+
32+
* in operating rooms.\
33+
Doctors may soon be using a system in the operating room that recognizes hand gestures as commands to tell a computer to browse and display medical images of the patient during a surgery.
34+
35+
Surgeons routinely need to review medical images and records during surgery, but stepping away from the operating table and touching a keyboard and mouse can delay the procedure and increase the risk of spreading infection-causing bacteria.
36+
37+
One of the most ubiquitous pieces of equipment in surgical units is the computer workstation, which allows access to medical images before and during surgery. However, computers and their peripherals are difficult to sterilize, and keyboards and mice have been found to be a source of contamination. Also, when nurses or assistants operate the keyboard for the surgeon, the process of conveying information accurately has proven cumbersome and inefficient since spoken dialogue can be time-consuming and leads to frustration and delays in the surgery.
38+
39+
the Gestix hand-gesture interface responds to the surgeon’s gesture commands in real time without requiring the surgeon to attach a microphone, use head-mounted (body-contact) sensing devices, or to use foot pedals to control the operation of the display system.\
40+
41+
![image](images/2.png "Title")
42+
43+
* post-stroke rehabilitation.\
44+
Some researchers developed a hand gesture recognition algorithm devoted to monitor the seven gestures for residential rehabilitation of the post-stroke patients.
45+
46+
* sign language.
47+
48+
49+
50+
## how to setup.
51+
52+
1. to run server :
53+
```
54+
> python .\server.py
55+
```
56+
make sure that the client ip is the same as the ip printed at the server
57+
58+
2. to run the client and use gesture detection:
59+
```
60+
> python .\client.py --camera 'enable' --process 'gesture'
61+
62+
```
63+
add multiple clients connected to the same server.\
64+
also pressing q to exit the client application.
65+
66+
3. make edge processing for a video.
67+
```
68+
> python .\client.py --video '.\videos\recording.mp4' --process 'edge'
69+
```
70+
71+
3. recieve original video from server without processing
72+
```
73+
> python .\client.py --video '.\videos\recording.mp4' --process 'original'
74+
> --video '.\videos\recording.mp4' --process 'original' here are optional as they are set in the deafult parsering.
75+
```
76+
77+
78+
79+
# Additional feature (NOT REQUIRED)
80+
## Sign language ( Hand tracking transfer learning model [not finished] yet)
81+
82+
> their was an error running the transfer learning over ssd model, step [6] in `model.ipynb` may be it exceeded the available ram.
83+
84+
We uploaded only notebooks for convienience as [tensorflow models, labeled images, ssd weights] exceed 300 MB)
85+
86+
## example of image labelling
87+
<p align="center">
88+
<img src="images/3.png" width="300" />
89+
<img src="images/4.png" width="300"/>
90+
</p>
91+
92+
## error msg
93+
<p align="center">
94+
<img src="images/5.png" width="500" />
95+
</p>
96+

client.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import socket, cv2, pickle, struct
2+
import imutils
3+
import argparse
4+
5+
parser = argparse.ArgumentParser()
6+
parser.add_argument("--camera") # argument to enable disable webcam --camera True
7+
## make a videos folder and use them in the parser (change the deafault in the next line)
8+
parser.add_argument("--video", default='videos/recording.mp4') # argument --video 'videos/lcdp.mp4' --process 'type'
9+
parser.add_argument("--process", default= 'original', choices=["edge","gesture", "original"]) # argument --process choose the type of processing to be done on the server
10+
args = parser.parse_args()
11+
12+
13+
# Getting source video from client.
14+
wCam, hCam = 688, 480
15+
16+
camera = str(args.camera) # make true if you want to use the "WEB CAM"
17+
if camera == 'enable':
18+
vid = cv2.VideoCapture(0)
19+
else:
20+
vid = cv2.VideoCapture(str(args.video))
21+
22+
vid.set(3, wCam)
23+
vid.set(4, hCam)
24+
# end of gettin source video
25+
26+
# intializing the destination to the host
27+
host = '62.114.34.87' ## don't forget to change the IP to yours(get the ip from server output)
28+
port = 9999
29+
size = 1024
30+
client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
31+
32+
client_socket.connect((host,port))
33+
34+
client_socket.send(str(args.process).encode('utf-8'))
35+
36+
data = client_socket.recv(size)
37+
if len(data):
38+
print('Recieved:', data.decode('utf-8'))
39+
# this will send (process type) as string in the first msg of each client connection to the server
40+
# 'process type' is used by server threads to know the type of processing.
41+
#
42+
# end initializing
43+
44+
45+
if client_socket:
46+
while(vid.isOpened()):
47+
try:
48+
img, frame = vid.read()
49+
frame = imutils.resize(frame, width=380, height=250)
50+
a = pickle.dumps(frame)
51+
message = struct.pack("Q", len(a)) + a
52+
client_socket.sendall(message)
53+
54+
data = b""
55+
payload_size = struct.calcsize("Q")
56+
while True:
57+
# cv2.imshow(f"Recieved on Client", frame)
58+
while len(data) < payload_size:
59+
packet = client_socket.recv(4*1024)
60+
if not packet: break
61+
data += packet
62+
packed_msg_size = data[:payload_size]
63+
data = data[payload_size:]
64+
msg_size = struct.unpack("Q", packed_msg_size)[0]
65+
while len(data) < msg_size:
66+
data += client_socket.recv(4*1024)
67+
frame_data = data[:msg_size]
68+
data = data[msg_size:]
69+
frame = pickle.loads(frame_data)
70+
###
71+
# image processing here
72+
# frame = cv2.putText(frame,text,(10,40),cv2.FONT_HERSHEY_SIMPLEX,0.7,(0,0,255),2)
73+
###
74+
cv2.imshow(f"Recieved on Client", frame)
75+
img, frame = vid.read()
76+
frame = imutils.resize(frame, width=380, height=250)
77+
a = pickle.dumps(frame)
78+
message = struct.pack("Q", len(a)) + a
79+
client_socket.sendall(message)
80+
81+
82+
key = cv2.waitKey(1) & 0xFF
83+
if key == ord("q"):
84+
client_socket.close()
85+
except:
86+
print('VIDEO FINISHED')
87+
break

images/1.png

75.6 KB
Loading

images/2.png

12.7 KB
Loading

images/3.png

919 KB
Loading

images/4.png

928 KB
Loading

images/5.png

130 KB
Loading
16.7 MB
Loading

0 commit comments

Comments
 (0)