Skip to content

Commit b868f7f

Browse files
Adding Files
0 parents  commit b868f7f

10 files changed

+1101
-0
lines changed

Hand Gesture Implementation.ipynb

+205
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# Alphabet recognition Implementation"
8+
]
9+
},
10+
{
11+
"cell_type": "markdown",
12+
"metadata": {},
13+
"source": [
14+
"### Libraries needed"
15+
]
16+
},
17+
{
18+
"cell_type": "code",
19+
"execution_count": 3,
20+
"metadata": {},
21+
"outputs": [],
22+
"source": [
23+
"from collections import deque\n",
24+
"import numpy as np\n",
25+
"import cv2 \n",
26+
"from keras.models import load_model\n",
27+
"import pyttsx3\n",
28+
"engine = pyttsx3.init()\n",
29+
" "
30+
]
31+
},
32+
{
33+
"cell_type": "markdown",
34+
"metadata": {},
35+
"source": [
36+
"### Important variables used in prgm"
37+
]
38+
},
39+
{
40+
"cell_type": "code",
41+
"execution_count": 4,
42+
"metadata": {},
43+
"outputs": [],
44+
"source": [
45+
"model = load_model('best_model.h5') #loadig the ocr model created earlier\n",
46+
"letters = {0: 'a', 1: 'b', 2: 'c', 3: 'd', 4: 'e', 5: 'f', 6: 'g', 7: 'h', 8: 'i', 9: 'j', 10: 'k', 11: 'l',\n",
47+
" 12: 'm', 13: 'n', 14: 'o', 15: 'p', 16: 'q', 17: 'r', 18: 's', 19: 't', 20: 'u', 21: 'v', 22: 'w',\n",
48+
" 23: 'x', 24: 'y', 25: 'z', 26: ''}\n",
49+
"redLower = np.array([170, 100, 60])\n",
50+
"redUpper = np.array([180, 255, 255]) # we can set this from the chart given in stack overflow\n",
51+
"\n",
52+
"kernel = np.ones((5, 5), np.uint8)\n",
53+
"\n",
54+
"# define blackboard and alphabets\n",
55+
"blackboard = np.zeros((480, 640, 3), dtype=np.uint8)\n",
56+
"alphabet = np.zeros((200, 200, 3), dtype=np.uint8)\n",
57+
"points = deque(maxlen=512)\n",
58+
"sounddict={'a':0,'b':0,'c':0,'d':0,'e':0,'f':0,'g':0,'h':0,'i':0,'j':0,'k':0,'l':0,'m':0,'n':0,'o':0,'p':0,'q':0,'r':0,'s':0,'t':0,\n",
59+
" 'u':0,'v':0,'w':0,'x':0,'y':0,'z':0}\n",
60+
"\n",
61+
"\n",
62+
"counter=0\n",
63+
"cap = cv2.VideoCapture(0) # camera object\n",
64+
"prediction = 26"
65+
]
66+
},
67+
{
68+
"cell_type": "markdown",
69+
"metadata": {},
70+
"source": [
71+
"### Camera working ( main steps )"
72+
]
73+
},
74+
{
75+
"cell_type": "code",
76+
"execution_count": null,
77+
"metadata": {},
78+
"outputs": [],
79+
"source": [
80+
"while True:\n",
81+
" ret, frame = cap.read()\n",
82+
" frame = cv2.flip(frame, 1) # so that we can see the proper image while moving our pen \n",
83+
" hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV) # changing the original frame to hsv\n",
84+
" gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # changing the original frame to grayscale\n",
85+
" \n",
86+
" # Detecting which pixel value falls under red color boundaries\n",
87+
" red = cv2.inRange(hsv, redLower, redUpper)\n",
88+
" \n",
89+
"# cv2.imshow(\"Initial InRange Image\",red)\n",
90+
"\n",
91+
" # Preprocessing the input inRange Image\n",
92+
" red = cv2.erode(red, kernel)# erosion\n",
93+
" red = cv2.morphologyEx(red, cv2.MORPH_OPEN, kernel) # opening\n",
94+
" red = cv2.dilate(red, kernel)# dilation\n",
95+
" \n",
96+
" cv2.imshow(\"red\",red)\n",
97+
"\n",
98+
" # find countours in the image\n",
99+
" cnts, _ = cv2.findContours(red, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
100+
" \n",
101+
" #debug\n",
102+
" a=cv2.cvtColor(red,cv2.COLOR_GRAY2BGR)\n",
103+
" cv2.drawContours(a, cnts, -1, (0, 255, 0), 3) \n",
104+
" cv2.imshow(\"Drawing contour\",a)\n",
105+
" \n",
106+
" center = None\n",
107+
" # if any countours were found\n",
108+
" if len(cnts) > 0:\n",
109+
" cnt = sorted(cnts, key=cv2.contourArea, reverse=True)[0]\n",
110+
" ((x, y), radius) = cv2.minEnclosingCircle(cnt)\n",
111+
" cv2.circle(frame, (int(x), int(y),), int(radius), (125, 344, 278), 2)\n",
112+
" \n",
113+
" \n",
114+
" M = cv2.moments(cnt)\n",
115+
" center = (int(M['m10'] / M['m00']), int(M['m01'] / M['m00']))\n",
116+
" points.appendleft(center)\n",
117+
" # if no countours were found means if there is no red coloured object in the frame.\n",
118+
" elif len(cnts) == 0:\n",
119+
" if len(points) != 0: #if there are points in deque and we have removed the pen . \n",
120+
" blackboard_gray = cv2.cvtColor(blackboard, cv2.COLOR_BGR2GRAY)\n",
121+
" blur = cv2.medianBlur(blackboard_gray, 15)\n",
122+
" blur = cv2.GaussianBlur(blur, (5, 5), 0)\n",
123+
" thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]\n",
124+
" cv2.imshow(\"Thresh\", thresh)\n",
125+
" blackboard_cnts = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[0]\n",
126+
" \n",
127+
" #debug\n",
128+
" bb=cv2.cvtColor(thresh,cv2.COLOR_GRAY2BGR)\n",
129+
" cv2.drawContours(bb, blackboard_cnts, -1, (0, 255, 0), 3) \n",
130+
" cv2.imshow(\"final_thresh_with_contour\",bb)\n",
131+
"\n",
132+
" if len(blackboard_cnts) >= 1:\n",
133+
" cnt = sorted(blackboard_cnts, key=cv2.contourArea, reverse=True)[0] # first sort all the contours and find the biggest contour\n",
134+
"\n",
135+
" if cv2.contourArea(cnt) > 1000: # I area of the selected countour is greater than 1000 , to maintain that there is no noise selected as countour.\n",
136+
" x, y, w, h = cv2.boundingRect(cnt)\n",
137+
" alphabet = blackboard_gray[y - 10:y + h + 10, x - 10:x + w + 10]\n",
138+
" try:\n",
139+
" img = cv2.resize(alphabet, (28, 28))\n",
140+
" cv2.imshow(\"alphabet\",alphabet) # this is the alphabet image selected that we will give our OCR as an input . \n",
141+
" except cv2.error as e:\n",
142+
" points = deque(maxlen=512)\n",
143+
" blackboard = np.zeros((480, 640, 3), dtype=np.uint8)\n",
144+
" continue\n",
145+
"\n",
146+
" img = np.array(img)\n",
147+
" img = img.astype('float32') / 255\n",
148+
" prediction = model.predict(img.reshape(1, 28, 28))[0]\n",
149+
" prediction = np.argmax(prediction)\n",
150+
" # try catch for sound \n",
151+
" try:\n",
152+
" engine.setProperty('rate', 138) # setting up new voice rate\n",
153+
" volume = engine.getProperty('volume') #getting to know current volume level (min=0 and max=1)\n",
154+
" engine.setProperty('volume',0.5) \n",
155+
" engine.say(\"Our model Predicted the alphabet as\")\n",
156+
" engine.setProperty('volume',1.0) # setting up volume level between 0 and 1\n",
157+
" engine.say(str(letters[int(prediction)]))\n",
158+
" print(\"Our model Predicted the alphabet as \"+str(letters[int(prediction)]))\n",
159+
" engine.runAndWait()\n",
160+
" except Exception as e:\n",
161+
" print(\"There is error in text to speech\")\n",
162+
"\n",
163+
" # Empty the point deque and also blackboard\n",
164+
" points = deque(maxlen=512)\n",
165+
" blackboard = np.zeros((480, 640, 3), dtype=np.uint8)\n",
166+
"\n",
167+
" # connect the detected points with line\n",
168+
" for i in range(1, len(points)):\n",
169+
" if points[i - 1] is None or points[i] is None:\n",
170+
" continue\n",
171+
" cv2.line(frame, points[i - 1], points[i], (0, 0, 0), 2)\n",
172+
" cv2.line(blackboard, points[i - 1], points[i], (255, 255, 255), 8)\n",
173+
"\n",
174+
" cv2.putText(frame, \"Prediction: \" + str(letters[int(prediction)]), (20, 400), cv2.FONT_HERSHEY_SIMPLEX, 0.8,\n",
175+
" (255, 255, 255), 2)\n",
176+
" cv2.imshow(\"Alphabet Recognition System\", frame)\n",
177+
" if cv2.waitKey(5) == 13: # if I press Enter it will break \n",
178+
" break\n",
179+
"cap.release()\n",
180+
"cv2.destroyAllWindows()"
181+
]
182+
}
183+
],
184+
"metadata": {
185+
"kernelspec": {
186+
"display_name": "PyCharm (Alphabet Recognisation _Using_HandGestures)",
187+
"language": "python",
188+
"name": "pycharm-d299a7e7"
189+
},
190+
"language_info": {
191+
"codemirror_mode": {
192+
"name": "ipython",
193+
"version": 3
194+
},
195+
"file_extension": ".py",
196+
"mimetype": "text/x-python",
197+
"name": "python",
198+
"nbconvert_exporter": "python",
199+
"pygments_lexer": "ipython3",
200+
"version": "3.8.5"
201+
}
202+
},
203+
"nbformat": 4,
204+
"nbformat_minor": 4
205+
}

Hand Gesture Implementation.py

+146
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
#Author : Shivansh Joshi
2+
# # Alphabet recognition Implementation
3+
4+
# ### Libraries needed
5+
6+
from collections import deque
7+
import numpy as np
8+
import cv2
9+
from keras.models import load_model
10+
import pyttsx3
11+
engine = pyttsx3.init()
12+
13+
14+
15+
# ### Important variables used in prgm
16+
17+
18+
19+
model = load_model('best_model.h5') #loadig the ocr model created earlier
20+
letters = {0: 'a', 1: 'b', 2: 'c', 3: 'd', 4: 'e', 5: 'f', 6: 'g', 7: 'h', 8: 'i', 9: 'j', 10: 'k', 11: 'l',
21+
12: 'm', 13: 'n', 14: 'o', 15: 'p', 16: 'q', 17: 'r', 18: 's', 19: 't', 20: 'u', 21: 'v', 22: 'w',
22+
23: 'x', 24: 'y', 25: 'z', 26: ''}
23+
redLower = np.array([170, 100, 60])
24+
redUpper = np.array([180, 255, 255]) # we can set this from the chart given in stack overflow
25+
26+
kernel = np.ones((5, 5), np.uint8)
27+
28+
# define blackboard and alphabets
29+
blackboard = np.zeros((480, 640, 3), dtype=np.uint8)
30+
alphabet = np.zeros((200, 200, 3), dtype=np.uint8)
31+
points = deque(maxlen=512)
32+
sounddict={'a':0,'b':0,'c':0,'d':0,'e':0,'f':0,'g':0,'h':0,'i':0,'j':0,'k':0,'l':0,'m':0,'n':0,'o':0,'p':0,'q':0,'r':0,'s':0,'t':0,
33+
'u':0,'v':0,'w':0,'x':0,'y':0,'z':0}
34+
35+
36+
counter=0
37+
cap = cv2.VideoCapture(0) # camera object
38+
prediction = 26
39+
40+
41+
# ### Camera working ( main steps )
42+
43+
while True:
44+
ret, frame = cap.read()
45+
frame = cv2.flip(frame, 1) # so that we can see the proper image while moving our pen
46+
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV) # changing the original frame to hsv
47+
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # changing the original frame to grayscale
48+
49+
# Detecting which pixel value falls under red color boundaries
50+
red = cv2.inRange(hsv, redLower, redUpper)
51+
52+
# cv2.imshow("Initial InRange Image",red)
53+
54+
# Preprocessing the input inRange Image
55+
red = cv2.erode(red, kernel)# erosion
56+
red = cv2.morphologyEx(red, cv2.MORPH_OPEN, kernel) # opening
57+
red = cv2.dilate(red, kernel)# dilation
58+
59+
cv2.imshow("red",red)
60+
61+
# find countours in the image
62+
cnts, _ = cv2.findContours(red, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
63+
64+
#debug
65+
a=cv2.cvtColor(red,cv2.COLOR_GRAY2BGR)
66+
cv2.drawContours(a, cnts, -1, (0, 255, 0), 3)
67+
cv2.imshow("Drawing contour",a)
68+
69+
center = None
70+
# if any countours were found
71+
if len(cnts) > 0:
72+
cnt = sorted(cnts, key=cv2.contourArea, reverse=True)[0]
73+
((x, y), radius) = cv2.minEnclosingCircle(cnt)
74+
cv2.circle(frame, (int(x), int(y),), int(radius), (125, 344, 278), 2)
75+
76+
77+
M = cv2.moments(cnt)
78+
center = (int(M['m10'] / M['m00']), int(M['m01'] / M['m00']))
79+
points.appendleft(center)
80+
# if no countours were found means if there is no red coloured object in the frame.
81+
elif len(cnts) == 0:
82+
if len(points) != 0: #if there are points in deque and we have removed the pen .
83+
blackboard_gray = cv2.cvtColor(blackboard, cv2.COLOR_BGR2GRAY)
84+
blur = cv2.medianBlur(blackboard_gray, 15)
85+
blur = cv2.GaussianBlur(blur, (5, 5), 0)
86+
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
87+
cv2.imshow("Thresh", thresh)
88+
blackboard_cnts = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[0]
89+
90+
#debug
91+
bb=cv2.cvtColor(thresh,cv2.COLOR_GRAY2BGR)
92+
cv2.drawContours(bb, blackboard_cnts, -1, (0, 255, 0), 3)
93+
cv2.imshow("final_thresh_with_contour",bb)
94+
95+
if len(blackboard_cnts) >= 1:
96+
cnt = sorted(blackboard_cnts, key=cv2.contourArea, reverse=True)[0] # first sort all the contours and find the biggest contour
97+
98+
if cv2.contourArea(cnt) > 1000: # I area of the selected countour is greater than 1000 , to maintain that there is no noise selected as countour.
99+
x, y, w, h = cv2.boundingRect(cnt)
100+
alphabet = blackboard_gray[y - 10:y + h + 10, x - 10:x + w + 10]
101+
try:
102+
img = cv2.resize(alphabet, (28, 28))
103+
cv2.imshow("alphabet",alphabet) # this is the alphabet image selected that we will give our OCR as an input .
104+
except cv2.error as e:
105+
points = deque(maxlen=512)
106+
blackboard = np.zeros((480, 640, 3), dtype=np.uint8)
107+
continue
108+
109+
img = np.array(img)
110+
img = img.astype('float32') / 255
111+
prediction = model.predict(img.reshape(1, 28, 28))[0]
112+
prediction = np.argmax(prediction)
113+
# try catch for sound
114+
try:
115+
engine.setProperty('rate', 138) # setting up new voice rate
116+
volume = engine.getProperty('volume') #getting to know current volume level (min=0 and max=1)
117+
engine.setProperty('volume',0.5)
118+
engine.say("Our model Predicted the alphabet as")
119+
engine.setProperty('volume',1.0) # setting up volume level between 0 and 1
120+
engine.say(str(letters[int(prediction)]))
121+
print("Our model Predicted the alphabet as "+str(letters[int(prediction)]))
122+
engine.runAndWait()
123+
except Exception as e:
124+
print("There is error in text to speech")
125+
126+
# Empty the point deque and also blackboard
127+
points = deque(maxlen=512)
128+
blackboard = np.zeros((480, 640, 3), dtype=np.uint8)
129+
130+
# connect the detected points with line
131+
for i in range(1, len(points)):
132+
if points[i - 1] is None or points[i] is None:
133+
continue
134+
cv2.line(frame, points[i - 1], points[i], (0, 0, 0), 2)
135+
cv2.line(blackboard, points[i - 1], points[i], (255, 255, 255), 8)
136+
137+
cv2.putText(frame, "Prediction: " + str(letters[int(prediction)]), (20, 400), cv2.FONT_HERSHEY_SIMPLEX, 0.8,
138+
(255, 255, 255), 2)
139+
cv2.imshow("Alphabet Recognition System", frame)
140+
if cv2.waitKey(5) == 13: # if I press Enter it will break
141+
break
142+
cap.release()
143+
cv2.destroyAllWindows()
144+
145+
146+

Loss and accuracy Graph/Figure_1.png

22.5 KB
Loading

Loss and accuracy Graph/Figure_2.png

25 KB
Loading

0 commit comments

Comments
 (0)