GP-TEAM-SBME
diff --git a/‎HandTrackingModule.py
Lines changed: 127 additions & 0 deletions b/‎HandTrackingModule.py
Lines changed: 127 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 96 additions & 0 deletions b/‎README.md
Lines changed: 96 additions & 0 deletions
diff --git a/‎client.py
Lines changed: 87 additions & 0 deletions b/‎client.py
Lines changed: 87 additions & 0 deletions
diff --git a/‎images/1.png
75.6 KB b/‎images/1.png
75.6 KB
diff --git a/‎images/2.png
12.7 KB b/‎images/2.png
12.7 KB
diff --git a/‎images/3.png
919 KB b/‎images/3.png
919 KB
diff --git a/‎images/4.png
928 KB b/‎images/4.png
928 KB
diff --git a/‎images/5.png
130 KB b/‎images/5.png
130 KB
diff --git a/‎images/multiclient-real-time-processing1.gif
16.7 MB b/‎images/multiclient-real-time-processing1.gif
16.7 MB
@@ -0,0 +1,127 @@
+import cv2
+import mediapipe as mp
+import time
+
+
+class handDetector():
+    '''
+    The class for the gesture detection and tracking.
+    '''
+    def __init__(self, mode=False, modelComplexity=1, maxHands=2, detectionCon=0.5, trackCon=0.5):
+        '''
+        The init function required for class instantiation.
+
+        :param mode: (boolean, default=False) used to process video.
+        :param modelComplexity: (int, default=1) the complexity of the detector.
+        :param maxHands: (int, default=2) max number of hands that can appear.
+        :param detectionCon: (float, default=0.5) confidence threshold of detection.
+        :param trackCon: (float, default=0.5) confidence threshold of tracking.
+        '''
+
+        self.mode = mode
+        self.modelComplexity = modelComplexity
+        self.maxHands = maxHands
+        self.detectionCon = detectionCon
+        self.trackCon = trackCon
+
+        # Creating an object of hand tracking detector from mediapipe module
+        self.mpHands = mp.solutions.hands
+        self.hands = self.mpHands.Hands(self.mode, self.maxHands, self.modelComplexity,
+                                        self.detectionCon, self.trackCon)
+        # Create an object for drawing connections between different landmarks of the hand
+        self.mpDraw = mp.solutions.drawing_utils
+
+    def findHands(self, img, draw=True):
+        '''
+        Detect hands within a frame with the option to draw connections between different landmarks
+
+        :param img: (array) the frame passed to detect hands within it.
+        :param draw: (boolean, default=True) drawing landmarks with small circles and connection lines between them.
+        :return:
+        img: (array) the return frame with hands detected (if any) and the connections between different landmarks (if draw=True)
+        '''
+
+        # Convert the frame to be in RGB format
+        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        # Pass the frame to the hand detector
+        self.results = self.hands.process(imgRGB)
+
+        # check if any hand was detected in the frame
+        if self.results.multi_hand_landmarks:
+            # draw circles on each landmark and connection lines between them for the detected hand(s)
+            for handLms in self.results.multi_hand_landmarks:
+                if draw:
+                    self.mpDraw.draw_landmarks(img, handLms,
+                                               self.mpHands.HAND_CONNECTIONS)
+        #return back the frame after detection and drawing
+        return img
+
+    def findPosition(self, img, handNo=0, draw=True):
+        '''
+        Return the coordinates for the detected hand landmarks
+
+        :param img: (array) the frame passed get landmarks coordinates from.
+        :param handNo: (int, default=0) the id of the hand either 0 for the first (right) hand detected nor 1 for the second.
+        :param draw: (boolean, default=True) drawing a big circle on every landmark detected within the frame.
+        :return:
+        lmList: (list): list containing the id for every landmark detected and its coordinates with respect to the frame.
+        '''
+        # Create empty list for to append every landmark detected within the selected hand
+        lmList = []
+
+        # check if any hand was detected in the frame
+        if self.results.multi_hand_landmarks:
+            # get the landmarks for the desired hand
+            myHand = self.results.multi_hand_landmarks[handNo]
+            # Iterate over different landmark detected in the desired hand.
+            for id, lm in enumerate(myHand.landmark):
+                # get the shape of the frame
+                h, w, c = img.shape
+                # get the center of the landmark, since lm.x is normalized by the width of the frame so we need
+                # to multiply it back by the width of the frame, similarly with lm.y ,
+                cx, cy = int(lm.x * w), int(lm.y * h)
+                # append the id of the landmark and its center.
+                lmList.append([id, cx, cy])
+                # if True draw big circles on each landmark detected
+                if draw:
+                    cv2.circle(img, (cx, cy), 15, (255, 0, 255), cv2.FILLED)
+
+        return lmList
+
+
+def main():
+    # Test code
+    pTime = 0
+    cTime = 0
+    # Get the vid from camera
+    cap = cv2.VideoCapture(1)
+    # Create the detector
+    detector = handDetector()
+
+
+    while True:
+        # Get the frame form camera
+        success, img = cap.read()
+        # pass the frame to the detector to get the back a frame with the hand(s) detected if any where found
+        img = detector.findHands(img)
+        # Get the coordinates for different landmarks detected
+        lmList = detector.findPosition(img)
+        # if len(lmList) != 0:
+        #     print(lmList4])
+
+        # Calculate fps
+        cTime = time.time()
+        fps = 1 / (cTime - pTime)
+        pTime = cTime
+
+        # show the fps rate at the top left corner of the live camera
+        cv2.putText(img, str(int(fps)), (10, 70), cv2.FONT_HERSHEY_PLAIN, 3,
+                    (255, 0, 255), 3)
+
+        # Show the camera now and put a small delay.
+        cv2.imshow("Image", img)
+        cv2.waitKey(1)
+
+# Execute main (test code) if this script is running.
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,96 @@
+# Realtime Multi-client Video-Processing-with-socket-programming using TCP-protocol.
+
+<p align="center">
+  <img src="images/multiclient-real-time-processing1.gif" width="800" height="600" />
+</p>
+
+Our application perform multi client video processing using the server hardware cabapilities which is handeled using threading.
+
+Each new client request type of processing and send its source video.. the server perform the procesing real-time based on the client parser `--process`  
+
+## Installations required
+```python
+- pip install comtypes
+- pip install pycaw
+- pip install pickle
+- pip install imutils
+- pip install opencv-python
+- pip install mediapipe
+```
+
+
+## Use gesture control to change the volume of a computer.
+[1] First we look into hand tracking and then we use hand landmarks to find gestures of our hand to change volume.
+
+[2] Distance between index finger and thumb is used to change volume level.
+
+![image](images/1.png "Title")
+
+
+## Applications in medical field:
+
+* in operating rooms.\
+    Doctors may soon be using a system in the operating room that recognizes hand gestures as commands to tell a computer to browse and display medical images of the patient during a surgery.
+    
+    Surgeons routinely need to review medical images and records during surgery, but stepping away from the operating table and touching a keyboard and mouse can delay the procedure and increase the risk of spreading infection-causing bacteria.
+
+    One of the most ubiquitous pieces of equipment in surgical units is the computer workstation, which allows access to medical images before and during surgery. However, computers and their peripherals are difficult to sterilize, and keyboards and mice have been found to be a source of contamination. Also, when nurses or assistants operate the keyboard for the surgeon, the process of conveying information accurately has proven cumbersome and inefficient since spoken dialogue can be time-consuming and leads to frustration and delays in the surgery.
+ 
+    the Gestix hand-gesture interface responds to the surgeon’s gesture commands in real time without requiring the surgeon to attach a microphone, use head-mounted (body-contact) sensing devices, or to use foot pedals to control the operation of the display system.\
+
+    ![image](images/2.png "Title")
+
+* post-stroke rehabilitation.\
+Some researchers developed a hand gesture recognition algorithm devoted to monitor the seven gestures for residential rehabilitation of the post-stroke patients.
+
+* sign language.
+  
+
+
+## how to setup.
+
+1.  to run server : 
+```
+> python .\server.py
+```
+make sure that the client ip is the same as the ip printed at the server 
+
+2. to run the client and use gesture detection:
+```
+> python .\client.py --camera 'enable' --process 'gesture'
+
+```
+add multiple clients connected to the same server.\
+also pressing q to exit the client application.
+
+3. make edge processing for a video.
+```
+ > python .\client.py --video '.\videos\recording.mp4' --process 'edge'
+```
+
+3. recieve original video from server without processing
+```
+ > python .\client.py --video '.\videos\recording.mp4' --process 'original'
+ > --video '.\videos\recording.mp4' --process 'original'  here are optional as they are set in the deafult parsering.
+```
+
+
+
+# Additional feature (NOT REQUIRED)
+## Sign language ( Hand tracking transfer learning model [not finished] yet)
+
+> their was an error running the transfer learning over ssd model, step [6] in `model.ipynb`  may be it exceeded the available ram.
+
+We uploaded only notebooks for convienience as [tensorflow models, labeled images, ssd weights] exceed 300 MB)
+
+## example of image labelling
+<p align="center">
+  <img src="images/3.png" width="300" />
+  <img src="images/4.png" width="300"/>   
+</p>
+
+## error msg
+<p align="center">
+  <img src="images/5.png" width="500" />
+</p>
+ 
@@ -0,0 +1,87 @@
+import socket, cv2, pickle, struct
+import imutils
+import argparse
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--camera") # argument to enable disable webcam --camera True
+## make a videos folder and use them in the parser (change the deafault in the next line)
+parser.add_argument("--video", default='videos/recording.mp4')  # argument --video 'videos/lcdp.mp4' --process 'type'
+parser.add_argument("--process", default= 'original', choices=["edge","gesture", "original"])  # argument --process choose the type of processing to be done on the server
+args = parser.parse_args()
+
+
+# Getting source video from client.
+wCam, hCam = 688, 480
+
+camera = str(args.camera) # make true if you want to use the "WEB CAM"
+if camera == 'enable':
+    vid = cv2.VideoCapture(0)
+else:
+    vid = cv2.VideoCapture(str(args.video))
+
+vid.set(3, wCam)
+vid.set(4, hCam)
+# end of gettin source video
+
+# intializing the destination to the host
+host = '62.114.34.87' ## don't forget to change the IP to yours(get the ip from server output)  
+port = 9999
+size = 1024
+client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+
+client_socket.connect((host,port))
+
+client_socket.send(str(args.process).encode('utf-8'))
+
+data = client_socket.recv(size)
+if len(data):
+    print('Recieved:', data.decode('utf-8'))
+# this will send (process type) as string in the first msg of each client connection to the server
+# 'process type' is used by server threads to know the type of processing.
+#  
+# end initializing
+
+
+if client_socket:
+    while(vid.isOpened()):
+        try:
+            img, frame = vid.read()
+            frame = imutils.resize(frame, width=380, height=250)
+            a = pickle.dumps(frame)
+            message = struct.pack("Q", len(a)) + a
+            client_socket.sendall(message)
+            
+            data = b""
+            payload_size  = struct.calcsize("Q")
+            while True:
+                # cv2.imshow(f"Recieved on Client", frame)
+                while len(data) < payload_size:
+                    packet = client_socket.recv(4*1024)
+                    if not packet: break
+                    data += packet
+                packed_msg_size = data[:payload_size]
+                data = data[payload_size:]
+                msg_size = struct.unpack("Q", packed_msg_size)[0]
+                while len(data) < msg_size:
+                    data += client_socket.recv(4*1024)
+                frame_data = data[:msg_size]
+                data = data[msg_size:]
+                frame = pickle.loads(frame_data)
+                ###
+                #  image processing here
+                # frame = cv2.putText(frame,text,(10,40),cv2.FONT_HERSHEY_SIMPLEX,0.7,(0,0,255),2)
+                ###
+                cv2.imshow(f"Recieved on Client", frame)
+                img, frame = vid.read()
+                frame = imutils.resize(frame, width=380, height=250)
+                a = pickle.dumps(frame)
+                message = struct.pack("Q", len(a)) + a
+                client_socket.sendall(message)
+
+
+                key = cv2.waitKey(1) & 0xFF
+                if key == ord("q"):
+                    client_socket.close()
+        except:
+            print('VIDEO FINISHED')
+            break