hellomlorg
diff --git a/‎Hand Gesture Implementation.ipynb
Lines changed: 205 additions & 0 deletions b/‎Hand Gesture Implementation.ipynb
Lines changed: 205 additions & 0 deletions
diff --git a/‎Hand Gesture Implementation.py
Lines changed: 146 additions & 0 deletions b/‎Hand Gesture Implementation.py
Lines changed: 146 additions & 0 deletions
diff --git a/‎Loss and accuracy Graph/Figure_1.png
22.5 KB b/‎Loss and accuracy Graph/Figure_1.png
22.5 KB
diff --git a/‎Loss and accuracy Graph/Figure_2.png
25 KB b/‎Loss and accuracy Graph/Figure_2.png
25 KB
@@ -0,0 +1,205 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Alphabet recognition Implementation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Libraries needed"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from collections import deque\n",
+    "import numpy as np\n",
+    "import cv2 \n",
+    "from keras.models import load_model\n",
+    "import pyttsx3\n",
+    "engine = pyttsx3.init()\n",
+    " "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Important variables used in prgm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = load_model('best_model.h5')  #loadig the ocr model created earlier\n",
+    "letters = {0: 'a', 1: 'b', 2: 'c', 3: 'd', 4: 'e', 5: 'f', 6: 'g', 7: 'h', 8: 'i', 9: 'j', 10: 'k', 11: 'l',\n",
+    "           12: 'm', 13: 'n', 14: 'o', 15: 'p', 16: 'q', 17: 'r', 18: 's', 19: 't', 20: 'u', 21: 'v', 22: 'w',\n",
+    "           23: 'x', 24: 'y', 25: 'z', 26: ''}\n",
+    "redLower = np.array([170, 100, 60])\n",
+    "redUpper = np.array([180, 255, 255]) # we can set this from the chart given in stack overflow\n",
+    "\n",
+    "kernel = np.ones((5, 5), np.uint8)\n",
+    "\n",
+    "# define blackboard and alphabets\n",
+    "blackboard = np.zeros((480, 640, 3), dtype=np.uint8)\n",
+    "alphabet = np.zeros((200, 200, 3), dtype=np.uint8)\n",
+    "points = deque(maxlen=512)\n",
+    "sounddict={'a':0,'b':0,'c':0,'d':0,'e':0,'f':0,'g':0,'h':0,'i':0,'j':0,'k':0,'l':0,'m':0,'n':0,'o':0,'p':0,'q':0,'r':0,'s':0,'t':0,\n",
+    "          'u':0,'v':0,'w':0,'x':0,'y':0,'z':0}\n",
+    "\n",
+    "\n",
+    "counter=0\n",
+    "cap = cv2.VideoCapture(0) # camera object\n",
+    "prediction = 26"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Camera working ( main steps )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "while True:\n",
+    "    ret, frame = cap.read()\n",
+    "    frame = cv2.flip(frame, 1) # so that we can see the proper image while moving our pen  \n",
+    "    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV) # changing the original frame to hsv\n",
+    "    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # changing the original frame to grayscale\n",
+    "    \n",
+    "    # Detecting which pixel value falls under red color boundaries\n",
+    "    red = cv2.inRange(hsv, redLower, redUpper)\n",
+    "    \n",
+    "#     cv2.imshow(\"Initial InRange Image\",red)\n",
+    "\n",
+    "    # Preprocessing the input inRange Image\n",
+    "    red = cv2.erode(red, kernel)# erosion\n",
+    "    red = cv2.morphologyEx(red, cv2.MORPH_OPEN, kernel) # opening\n",
+    "    red = cv2.dilate(red, kernel)# dilation\n",
+    "    \n",
+    "    cv2.imshow(\"red\",red)\n",
+    "\n",
+    "    # find countours in the image\n",
+    "    cnts, _ = cv2.findContours(red, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
+    "    \n",
+    "    #debug\n",
+    "    a=cv2.cvtColor(red,cv2.COLOR_GRAY2BGR)\n",
+    "    cv2.drawContours(a, cnts, -1, (0, 255, 0), 3) \n",
+    "    cv2.imshow(\"Drawing contour\",a)\n",
+    "    \n",
+    "    center = None\n",
+    "    # if any countours were found\n",
+    "    if len(cnts) > 0:\n",
+    "        cnt = sorted(cnts, key=cv2.contourArea, reverse=True)[0]\n",
+    "        ((x, y), radius) = cv2.minEnclosingCircle(cnt)\n",
+    "        cv2.circle(frame, (int(x), int(y),), int(radius), (125, 344, 278), 2)\n",
+    "        \n",
+    "        \n",
+    "        M = cv2.moments(cnt)\n",
+    "        center = (int(M['m10'] / M['m00']), int(M['m01'] / M['m00']))\n",
+    "        points.appendleft(center)\n",
+    "    # if no countours were found means if there is no red coloured object in the frame.\n",
+    "    elif len(cnts) == 0:\n",
+    "        if len(points) != 0:  #if there are points in deque and we have removed the pen . \n",
+    "            blackboard_gray = cv2.cvtColor(blackboard, cv2.COLOR_BGR2GRAY)\n",
+    "            blur = cv2.medianBlur(blackboard_gray, 15)\n",
+    "            blur = cv2.GaussianBlur(blur, (5, 5), 0)\n",
+    "            thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]\n",
+    "            cv2.imshow(\"Thresh\", thresh)\n",
+    "            blackboard_cnts = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[0]\n",
+    "            \n",
+    "            #debug\n",
+    "            bb=cv2.cvtColor(thresh,cv2.COLOR_GRAY2BGR)\n",
+    "            cv2.drawContours(bb, blackboard_cnts, -1, (0, 255, 0), 3) \n",
+    "            cv2.imshow(\"final_thresh_with_contour\",bb)\n",
+    "\n",
+    "            if len(blackboard_cnts) >= 1:\n",
+    "                cnt = sorted(blackboard_cnts, key=cv2.contourArea, reverse=True)[0]  # first sort all the contours and find the biggest contour\n",
+    "\n",
+    "                if cv2.contourArea(cnt) > 1000: # I area of the selected countour is greater than 1000 , to maintain that there is no noise selected as countour.\n",
+    "                    x, y, w, h = cv2.boundingRect(cnt)\n",
+    "                    alphabet = blackboard_gray[y - 10:y + h + 10, x - 10:x + w + 10]\n",
+    "                    try:\n",
+    "                        img = cv2.resize(alphabet, (28, 28))\n",
+    "                        cv2.imshow(\"alphabet\",alphabet)  # this is the alphabet image selected that we will give our OCR as an input . \n",
+    "                    except cv2.error as e:\n",
+    "                        points = deque(maxlen=512)\n",
+    "                        blackboard = np.zeros((480, 640, 3), dtype=np.uint8)\n",
+    "                        continue\n",
+    "\n",
+    "                    img = np.array(img)\n",
+    "                    img = img.astype('float32') / 255\n",
+    "                    prediction = model.predict(img.reshape(1, 28, 28))[0]\n",
+    "                    prediction = np.argmax(prediction)\n",
+    "                    # try catch for sound \n",
+    "                    try:\n",
+    "                        engine.setProperty('rate', 138)     # setting up new voice rate\n",
+    "                        volume = engine.getProperty('volume')   #getting to know current volume level (min=0 and max=1)\n",
+    "                        engine.setProperty('volume',0.5) \n",
+    "                        engine.say(\"Our model Predicted the alphabet as\")\n",
+    "                        engine.setProperty('volume',1.0)    # setting up volume level  between 0 and 1\n",
+    "                        engine.say(str(letters[int(prediction)]))\n",
+    "                        print(\"Our model Predicted the alphabet as \"+str(letters[int(prediction)]))\n",
+    "                        engine.runAndWait()\n",
+    "                    except Exception as e:\n",
+    "                        print(\"There is error in text to speech\")\n",
+    "\n",
+    "            # Empty the point deque and also blackboard\n",
+    "            points = deque(maxlen=512)\n",
+    "            blackboard = np.zeros((480, 640, 3), dtype=np.uint8)\n",
+    "\n",
+    "    # connect the detected points with line\n",
+    "    for i in range(1, len(points)):\n",
+    "        if points[i - 1] is None or points[i] is None:\n",
+    "            continue\n",
+    "        cv2.line(frame, points[i - 1], points[i], (0, 0, 0), 2)\n",
+    "        cv2.line(blackboard, points[i - 1], points[i], (255, 255, 255), 8)\n",
+    "\n",
+    "    cv2.putText(frame, \"Prediction: \" + str(letters[int(prediction)]), (20, 400), cv2.FONT_HERSHEY_SIMPLEX, 0.8,\n",
+    "                (255, 255, 255), 2)\n",
+    "    cv2.imshow(\"Alphabet Recognition System\", frame)\n",
+    "    if cv2.waitKey(5) == 13:  # if I press Enter it will break \n",
+    "        break\n",
+    "cap.release()\n",
+    "cv2.destroyAllWindows()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "PyCharm (Alphabet Recognisation _Using_HandGestures)",
+   "language": "python",
+   "name": "pycharm-d299a7e7"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
@@ -0,0 +1,146 @@
+#Author : Shivansh Joshi
+# # Alphabet recognition Implementation
+
+# ### Libraries needed
+
+from collections import deque
+import numpy as np
+import cv2 
+from keras.models import load_model
+import pyttsx3
+engine = pyttsx3.init()
+ 
+
+
+# ### Important variables used in prgm
+
+
+
+model = load_model('best_model.h5')  #loadig the ocr model created earlier
+letters = {0: 'a', 1: 'b', 2: 'c', 3: 'd', 4: 'e', 5: 'f', 6: 'g', 7: 'h', 8: 'i', 9: 'j', 10: 'k', 11: 'l',
+           12: 'm', 13: 'n', 14: 'o', 15: 'p', 16: 'q', 17: 'r', 18: 's', 19: 't', 20: 'u', 21: 'v', 22: 'w',
+           23: 'x', 24: 'y', 25: 'z', 26: ''}
+redLower = np.array([170, 100, 60])
+redUpper = np.array([180, 255, 255]) # we can set this from the chart given in stack overflow
+
+kernel = np.ones((5, 5), np.uint8)
+
+# define blackboard and alphabets
+blackboard = np.zeros((480, 640, 3), dtype=np.uint8)
+alphabet = np.zeros((200, 200, 3), dtype=np.uint8)
+points = deque(maxlen=512)
+sounddict={'a':0,'b':0,'c':0,'d':0,'e':0,'f':0,'g':0,'h':0,'i':0,'j':0,'k':0,'l':0,'m':0,'n':0,'o':0,'p':0,'q':0,'r':0,'s':0,'t':0,
+          'u':0,'v':0,'w':0,'x':0,'y':0,'z':0}
+
+
+counter=0
+cap = cv2.VideoCapture(0) # camera object
+prediction = 26
+
+
+# ### Camera working ( main steps )
+
+while True:
+    ret, frame = cap.read()
+    frame = cv2.flip(frame, 1) # so that we can see the proper image while moving our pen  
+    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV) # changing the original frame to hsv
+    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # changing the original frame to grayscale
+    
+    # Detecting which pixel value falls under red color boundaries
+    red = cv2.inRange(hsv, redLower, redUpper)
+    
+#     cv2.imshow("Initial InRange Image",red)
+
+    # Preprocessing the input inRange Image
+    red = cv2.erode(red, kernel)# erosion
+    red = cv2.morphologyEx(red, cv2.MORPH_OPEN, kernel) # opening
+    red = cv2.dilate(red, kernel)# dilation
+    
+    cv2.imshow("red",red)
+
+    # find countours in the image
+    cnts, _ = cv2.findContours(red, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    
+    #debug
+    a=cv2.cvtColor(red,cv2.COLOR_GRAY2BGR)
+    cv2.drawContours(a, cnts, -1, (0, 255, 0), 3) 
+    cv2.imshow("Drawing contour",a)
+    
+    center = None
+    # if any countours were found
+    if len(cnts) > 0:
+        cnt = sorted(cnts, key=cv2.contourArea, reverse=True)[0]
+        ((x, y), radius) = cv2.minEnclosingCircle(cnt)
+        cv2.circle(frame, (int(x), int(y),), int(radius), (125, 344, 278), 2)
+        
+        
+        M = cv2.moments(cnt)
+        center = (int(M['m10'] / M['m00']), int(M['m01'] / M['m00']))
+        points.appendleft(center)
+    # if no countours were found means if there is no red coloured object in the frame.
+    elif len(cnts) == 0:
+        if len(points) != 0:  #if there are points in deque and we have removed the pen . 
+            blackboard_gray = cv2.cvtColor(blackboard, cv2.COLOR_BGR2GRAY)
+            blur = cv2.medianBlur(blackboard_gray, 15)
+            blur = cv2.GaussianBlur(blur, (5, 5), 0)
+            thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
+            cv2.imshow("Thresh", thresh)
+            blackboard_cnts = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[0]
+            
+            #debug
+            bb=cv2.cvtColor(thresh,cv2.COLOR_GRAY2BGR)
+            cv2.drawContours(bb, blackboard_cnts, -1, (0, 255, 0), 3) 
+            cv2.imshow("final_thresh_with_contour",bb)
+
+            if len(blackboard_cnts) >= 1:
+                cnt = sorted(blackboard_cnts, key=cv2.contourArea, reverse=True)[0]  # first sort all the contours and find the biggest contour
+
+                if cv2.contourArea(cnt) > 1000: # I area of the selected countour is greater than 1000 , to maintain that there is no noise selected as countour.
+                    x, y, w, h = cv2.boundingRect(cnt)
+                    alphabet = blackboard_gray[y - 10:y + h + 10, x - 10:x + w + 10]
+                    try:
+                        img = cv2.resize(alphabet, (28, 28))
+                        cv2.imshow("alphabet",alphabet)  # this is the alphabet image selected that we will give our OCR as an input . 
+                    except cv2.error as e:
+                        points = deque(maxlen=512)
+                        blackboard = np.zeros((480, 640, 3), dtype=np.uint8)
+                        continue
+
+                    img = np.array(img)
+                    img = img.astype('float32') / 255
+                    prediction = model.predict(img.reshape(1, 28, 28))[0]
+                    prediction = np.argmax(prediction)
+                    # try catch for sound 
+                    try:
+                        engine.setProperty('rate', 138)     # setting up new voice rate
+                        volume = engine.getProperty('volume')   #getting to know current volume level (min=0 and max=1)
+                        engine.setProperty('volume',0.5) 
+                        engine.say("Our model Predicted the alphabet as")
+                        engine.setProperty('volume',1.0)    # setting up volume level  between 0 and 1
+                        engine.say(str(letters[int(prediction)]))
+                        print("Our model Predicted the alphabet as "+str(letters[int(prediction)]))
+                        engine.runAndWait()
+                    except Exception as e:
+                        print("There is error in text to speech")
+
+            # Empty the point deque and also blackboard
+            points = deque(maxlen=512)
+            blackboard = np.zeros((480, 640, 3), dtype=np.uint8)
+
+    # connect the detected points with line
+    for i in range(1, len(points)):
+        if points[i - 1] is None or points[i] is None:
+            continue
+        cv2.line(frame, points[i - 1], points[i], (0, 0, 0), 2)
+        cv2.line(blackboard, points[i - 1], points[i], (255, 255, 255), 8)
+
+    cv2.putText(frame, "Prediction: " + str(letters[int(prediction)]), (20, 400), cv2.FONT_HERSHEY_SIMPLEX, 0.8,
+                (255, 255, 255), 2)
+    cv2.imshow("Alphabet Recognition System", frame)
+    if cv2.waitKey(5) == 13:  # if I press Enter it will break 
+        break
+cap.release()
+cv2.destroyAllWindows()
+
+
+