onlyphantom
diff --git a/‎README.md
+7-1 b/‎README.md
+7-1
diff --git a/‎digitrecognition/contourarea_03.py
+41 b/‎digitrecognition/contourarea_03.py
+41
diff --git a/‎digitrecognition/digit_01.py
+149 b/‎digitrecognition/digit_01.py
+149
diff --git a/‎digitrecognition/digitrec.html
+137-20 b/‎digitrecognition/digitrec.html
+137-20
diff --git a/‎digitrecognition/digitrec.md
+135-1 b/‎digitrecognition/digitrec.md
+135-1
diff --git a/‎digitrecognition/digitrec.pdf
29.1 KB b/‎digitrecognition/digitrec.pdf
29.1 KB
@@ -79,16 +79,22 @@ A math-first approach to learning computer vision in Python. The repository will
             - [Contour Properties](digitrecognition/digitrec.html#contour-properties)
     - [References and learn-by-building modules](digitrecognition/digitrec.html#references)
 
+### Chapter 5
+- Facial Recognition
+
 ## Approach and Motivation
 The course is foundational to anyone who wish to work with computer vision in Python. It covers some of the most common image processing routines, and have in-depth coverage on mathematical concepts present in the materials: 
 - Math-first approach
 - Tons of sample python scripts (.py)
+    - 45+ python scripts from chapter 1 to 4 for plug-and-play experiments
 - Multimedia (image illustrations, video explanation, quiz)
+    - 57 image assets from chapter 1 to 4 for practical illustrations
+    - 4 PDFs, and 4 HTMLs, one for each chapter
 - Practical tips on real-world applications
 
 The course's **only dependency** is `OpenCV`. Getting started is as easy as `pip install opencv-contrib-python` and you're set to go.
 
-- Question: What about deep learning libraries?
+##### Question: What about deep learning libraries?
 
 No; While using deep learning for images made for interesting topics, they are probably better suited as an altogether separate course series. This course series (tutorial series) focused on the **essentials of computer vision** and,
 for pedagogical reasons, try not to be overly ambitious with the scope it intends to cover. 
 
@@ -0,0 +1,41 @@
+import cv2
+
+PURPLE = (75, 0, 130)
+YELLOW = (0, 255, 255)
+THICKNESS = 4
+FONT = cv2.FONT_HERSHEY_SIMPLEX
+
+img_color = cv2.imread("assets/ocbc.jpg")
+img_color = cv2.resize(img_color, None, None, fx=0.5, fy=0.5)
+img = cv2.cvtColor(img_color, cv2.COLOR_BGR2GRAY)
+
+blurred = cv2.GaussianBlur(img, (7, 7), 0)
+blurred = cv2.bilateralFilter(blurred, 5, sigmaColor=50, sigmaSpace=50)
+edged = cv2.Canny(blurred, 130, 150, 255)
+
+cv2.imshow("Outline of device", edged)
+cv2.waitKey(0)
+
+cnts, _ = cv2.findContours(edged, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+# sort contours by area, and get the first 10
+cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:9]
+
+cv2.drawContours(img_color, cnts, 0, PURPLE, THICKNESS)
+cv2.imshow("Target Contour", img_color)
+cv2.waitKey(0)
+
+for i in range(len(cnts)):
+    cv2.drawContours(img_color, cnts, i, PURPLE, THICKNESS)
+    print(f"ContourArea:{cv2.contourArea(cnts[i])}")
+    x, y, w, h = cv2.boundingRect(cnts[i])
+    cv2.rectangle(img_color, (x, y), (x + w, y + h), YELLOW, THICKNESS)
+
+    area = round(cv2.contourArea(cnts[i]), 1)
+    peri = round(cv2.arcLength(cnts[i], closed=True), 1)
+    print(f"ContourArea:{area}, Peri: {peri}")
+    cv2.putText(img_color, "Area:" + str(area), (x, y - 15), FONT, 0.4, PURPLE, 1)
+    cv2.putText(img_color, "Perimeter:" + str(peri), (x, y - 5), FONT, 0.4, PURPLE, 1)
+
+    cv2.imshow("Contour one by one", img_color)
+    cv2.waitKey(0)
+
@@ -0,0 +1,149 @@
+import cv2
+import numpy as np
+
+FONT = cv2.FONT_HERSHEY_SIMPLEX
+CYAN = (255, 255, 0)
+DIGITSDICT = {
+    (1, 1, 1, 1, 1, 1, 0): 0,
+    (0, 1, 1, 0, 0, 0, 0): 1,
+    (1, 1, 0, 1, 1, 0, 1): 2,
+    (1, 1, 1, 1, 0, 0, 1): 3,
+    (0, 1, 1, 0, 0, 1, 1): 4,
+    (1, 0, 1, 1, 0, 1, 1): 5,
+    (1, 0, 1, 1, 1, 1, 1): 6,
+    (1, 1, 1, 0, 0, 1, 0): 7,
+    (1, 1, 1, 1, 1, 1, 1): 8,
+    (1, 1, 1, 1, 0, 1, 1): 9,
+}
+
+
+# roi_color = cv2.imread("inter/dbs-roi.png")
+roi_color = cv2.imread("inter/ocbc-roi.png")
+roi = cv2.cvtColor(roi_color, cv2.COLOR_BGR2GRAY)
+
+RATIO = roi.shape[0] * 0.2
+
+roi = cv2.bilateralFilter(roi, 5, 30, 60)
+
+trimmed = roi[int(RATIO) :, int(RATIO) : roi.shape[1] - int(RATIO)]
+roi_color = roi_color[int(RATIO) :, int(RATIO) : roi.shape[1] - int(RATIO)]
+cv2.imshow("Blurred and Trimmed", trimmed)
+cv2.waitKey(0)
+
+edged = cv2.adaptiveThreshold(
+    trimmed, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 5, 5
+)
+cv2.imshow("Edged", edged)
+cv2.waitKey(0)
+
+kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 5))
+dilated = cv2.dilate(edged, kernel, iterations=1)
+
+cv2.imshow("Dilated", dilated)
+cv2.waitKey(0)
+
+kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 1))
+dilated = cv2.dilate(dilated, kernel, iterations=1)
+
+cv2.imshow("Dilated x2", dilated)
+cv2.waitKey(0)
+
+kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2, 1),)
+eroded = cv2.erode(dilated, kernel, iterations=1)
+
+cv2.imshow("Eroded", eroded)
+cv2.waitKey(0)
+
+h = roi.shape[0]
+ratio = int(h * 0.07)
+eroded[-ratio:,] = 0
+eroded[:, :ratio] = 0
+
+cv2.imshow("Eroded + Black", eroded)
+cv2.waitKey(0)
+
+cnts, _ = cv2.findContours(eroded, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+digits_cnts = []
+
+canvas = trimmed.copy()
+cv2.drawContours(canvas, cnts, -1, (255, 255, 255), 1)
+cv2.imshow("All Contours", canvas)
+cv2.waitKey(0)
+
+canvas = trimmed.copy()
+for cnt in cnts:
+    (x, y, w, h) = cv2.boundingRect(cnt)
+    if h > 20:
+        digits_cnts += [cnt]
+        cv2.rectangle(canvas, (x, y), (x + w, y + h), (0, 0, 0), 1)
+        cv2.drawContours(canvas, cnt, 0, (255, 255, 255), 1)
+        cv2.imshow("Digit Contours", canvas)
+        cv2.waitKey(0)
+
+print(f"No. of Digit Contours: {len(digits_cnts)}")
+
+
+cv2.imshow("Digit Contours", canvas)
+cv2.waitKey(0)
+
+
+sorted_digits = sorted(digits_cnts, key=lambda cnt: cv2.boundingRect(cnt)[0])
+
+canvas = trimmed.copy()
+
+
+for i, cnt in enumerate(sorted_digits):
+    (x, y, w, h) = cv2.boundingRect(cnt)
+    cv2.rectangle(canvas, (x, y), (x + w, y + h), (0, 0, 0), 1)
+    cv2.putText(canvas, str(i), (x, y - 3), FONT, 0.3, (0, 0, 0), 1)
+
+cv2.imshow("All Contours sorted", canvas)
+cv2.waitKey(0)
+
+digits = []
+canvas = roi_color.copy()
+for cnt in sorted_digits:
+    (x, y, w, h) = cv2.boundingRect(cnt)
+    roi = eroded[y : y + h, x : x + w]
+    print(f"W:{w}, H:{h}")
+    # convenience units
+    qW, qH = int(w * 0.25), int(h * 0.15)
+    fractionH, halfH, fractionW = int(h * 0.05), int(h * 0.5), int(w * 0.25)
+
+    # seven segments in the order of wikipedia's illustration
+    sevensegs = [
+        ((0, 0), (w, qH)),  # a (top bar)
+        ((w - qW, 0), (w, halfH)),  # b (upper right)
+        ((w - qW, halfH), (w, h)),  # c (lower right)
+        ((0, h - qH), (w, h)),  # d (lower bar)
+        ((0, halfH), (qW, h)),  # e (lower left)
+        ((0, 0), (qW, halfH)),  # f (upper left)
+        # ((0, halfH - fractionH), (w, halfH + fractionH)) # center
+        (
+            (0 + fractionW, halfH - fractionH),
+            (w - fractionW, halfH + fractionH),
+        ),  # center
+    ]
+
+    # initialize to off
+    on = [0] * 7
+
+    for (i, ((p1x, p1y), (p2x, p2y))) in enumerate(sevensegs):
+        region = roi[p1y:p2y, p1x:p2x]
+        print(
+            f"{i}: Sum of 1: {np.sum(region == 255)}, Sum of 0: {np.sum(region == 0)}, Shape: {region.shape}, Size: {region.size}"
+        )
+        if np.sum(region == 255) > region.size * 0.5:
+            on[i] = 1
+        print(f"State of ON: {on}")
+
+    digit = DIGITSDICT[tuple(on)]
+    print(f"Digit is: {digit}")
+    digits += [digit]
+    cv2.rectangle(canvas, (x, y), (x + w, y + h), CYAN, 1)
+    cv2.putText(canvas, str(digit), (x - 5, y + 6), FONT, 0.3, (0, 0, 0), 1)
+    cv2.imshow("Digit", canvas)
+    cv2.waitKey(0)
+
+print(f"Digits on the token are: {digits}")
+
@@ -193,6 +193,8 @@ Because of how these operations work, there are a couple of things to note:
 
 ![](assets/morphexample.png)
 
+The full code solution is in `morphological_02.py`.
+
 As we read our image in grayscale mode (`flags=0`), we obtain a white blackground and a mostly-black foreground. This is illustrated in the subplot titled "Original" above. We begin our preprocessing steps by first binarizing the image (step 1), followed by inverting the colors (step 2) to get a white-on-black image. 
 
 An erosion operation is then performed (step 3). This works by creating our kernel (either through `numpy` or through `opencv`'s structuring element) and sliding that kernel across our image to remove white noises in our image. 
@@ -217,6 +219,13 @@ cv2.imshow("Transformed", dilated)
 cv2.waitKey(0)
 ```
 
+OpenCV provides the three shapes for our kernel:
+- Rectangular box: `MORPH_RECT`
+- Cross: `MORPH_CROSS`
+- Ellipse: `MORPH_ELLIPSE`
+
+They are fed as the first argument into `cv2.getStructuringElement()`, with the second being the kernel size (`ksize`) itself. The third argument is the _anchor point_, which defaults to the center.
+
 ### Opening and Closing
 Another name for **Erosion, followed by Dilation** is the Opening. It is useful in removing noise in our image. The reverse of Opening is Closing, where we first **perform Dilation followed by Erosion**, particularly suited for closing small holes inside foreground objects.
 
@@ -332,6 +341,22 @@ If you are paying close attention to the digit '0' in our LCD display, you will
 
 A reasonable strategy to handle this is the Dilation or Closing (Dilation followed by Erosion) operation that you've learned earlier. 
 
+Similarly, your ROI may necessitate other pre-processing and the specific tactical solution vary greatly depending on the problem set at hand. 
+
+As I inspect the bounding box we retrieved around the LCD screen, the observation that these bouding boxes often have their digits centered around the bottom half of the display led me to insert an additional step prior to the morphological transformation in the final code solution. The step uses numpy subsetting to trim away the top 20% as well as 20% on each side of the image:
+
+```py
+roi = cv2.imread("roi.png", flags=0)
+RATIO = roi.shape[0] * 0.2
+trimmed = roi[
+    int(RATIO) :, 
+    int(RATIO) : roi.shape[1] - int(RATIO)]
+```
+
+That said, whenever possible, you want to be cautious of not hand-tuning your problem in a way that is overly specific to the images you have at hand lest risking the solution **only** working on those specific images and not others, a phenomenon fondly termed as "overfitting" in the machine learning community.
+
+I've re-executed the solution code against some sample image sets, once with the "trimming" in-place and then without the trimming, before settling on the decision. As you will see later, the trimming improves our accuracy and is a relatively safe strategy given how every LCD screen regardless of the issuer (bank) has the same asymmetry with more "blank space" at the top half compared to the bottom half. 
+
 #### Contour Properties
 Furthermore, in many cases of digit recognition / digit classification you will want to predict the class for each digit in an ordered fashion. Supposed the LCD screen contains the digits "40710382", our algorithm should correctly isolate these digits, classify them iteratively, but do so from the leftmost digit to the rightmost. Failing to account for this may result in your algorithm correctly classifying each digit, but produce an unreasonable output such as "1740238". 
 
@@ -365,9 +390,118 @@ for cnt in cnts:
 sorted_digits = sorted(digits_cnts, key=lambda cnt: cv2.boundingRect(cnt)[0])
 ```
 
-When we put these together, we now have a complete pipeline.  
+When we put these together, we now have a complete pipeline:  
 ![](assets/digitrecflow.png)
 
+The full solution code is in `digit_01.py` but the essential parts are as follow:
+
+```py
+import cv2
+import numpy as np
+# step 1:
+DIGITSDICT = {
+    (1, 1, 1, 1, 1, 1, 0): 0,
+    (0, 1, 1, 0, 0, 0, 0): 1,
+    (1, 1, 0, 1, 1, 0, 1): 2,
+    (1, 1, 1, 1, 0, 0, 1): 3,
+    (0, 1, 1, 0, 0, 1, 1): 4,
+    (1, 0, 1, 1, 0, 1, 1): 5,
+    (1, 0, 1, 1, 1, 1, 1): 6,
+    (1, 1, 1, 0, 0, 1, 0): 7,
+    (1, 1, 1, 1, 1, 1, 1): 8,
+    (1, 1, 1, 1, 0, 1, 1): 9,
+}
+
+# step 2
+roi = cv2.imread("inter/ocbc-roi.png", flags=0)
+
+# step 3
+RATIO = roi.shape[0] * 0.2
+roi = cv2.bilateralFilter(roi, 5, 30, 60)
+trimmed = roi[int(RATIO) :, int(RATIO) : roi.shape[1] - int(RATIO)]
+
+# step 4
+edged = cv2.adaptiveThreshold(
+    trimmed, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 5, 5
+)
+
+# step 5
+kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 5))
+dilated = cv2.dilate(edged, kernel, iterations=1)
+eroded = cv2.erode(dilated, kernel, iterations=1)
+
+# step 6
+cnts, _ = cv2.findContours(eroded, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+digits_cnts = []
+for cnt in cnts:
+    (x, y, w, h) = cv2.boundingRect(cnt)
+    if h > 20:
+        digits_cnts += [cnt]
+
+# step 7
+sorted_digits = sorted(digits_cnts, key=lambda cnt: cv2.boundingRect(cnt)[0])
+
+# step 8
+digits = []
+for cnt in sorted_digits:
+    # step 8a
+    (x, y, w, h) = cv2.boundingRect(cnt)
+    roi = eroded[y : y + h, x : x + w]
+    qW, qH = int(w * 0.25), int(h * 0.15)
+    fractionH, halfH, fractionW = int(h * 0.05), int(h * 0.5), int(w * 0.25)
+
+    # step 8b
+    sevensegs = [
+        ((0, 0), (w, qH)),  # a (top bar)
+        ((w - qW, 0), (w, halfH)),  # b (upper right)
+        ((w - qW, halfH), (w, h)),  # c (lower right)
+        ((0, h - qH), (w, h)),  # d (lower bar)
+        ((0, halfH), (qW, h)),  # e (lower left)
+        ((0, 0), (qW, halfH)),  # f (upper left)
+        # ((0, halfH - fractionH), (w, halfH + fractionH)) # center
+        (
+            (0 + fractionW, halfH - fractionH),
+            (w - fractionW, halfH + fractionH),
+        ),  # center
+    ]
+
+    # step 8c
+    on = [0] * 7
+    for (i, ((p1x, p1y), (p2x, p2y))) in enumerate(sevensegs):
+        region = roi[p1y:p2y, p1x:p2x]
+        print(
+            f"{i}: Sum of 1: {np.sum(region == 255)}, Sum of 0: {np.sum(region == 0)}, Shape: {region.shape}, Size: {region.size}"
+        )
+        if np.sum(region == 255) > region.size * 0.5:
+            on[i] = 1
+        print(f"State of ON: {on}")
+    # step 8d
+    digit = DIGITSDICT[tuple(on)]
+    print(f"Digit is: {digit}")
+    digits += [digit]
+    # step 9
+    cv2.rectangle(canvas, (x, y), (x + w, y + h), CYAN, 1)
+    cv2.putText(canvas, str(digit), (x - 5, y + 6), FONT, 0.3, (0, 0, 0), 1)
+    cv2.imshow("Digit", canvas)
+    cv2.waitKey(0)
+print(f"Digits on the token are: {digits}")
+```
+
+- Step 1: Initialize the lookup dictionary
+- Step 2: Read our ROI image using OpenCV
+- Step 3: Noise reduction and trim away asymmetrical white space in our ROI
+- Step 4: Binarize our image using adaptive thresholding
+- Step 5: Morphological transformation to remove noise and fill the small holes in our digit
+- Step 6: Find contours in our image with a height greater than 20px
+- Step 7: Sort the contours in-place, using the x value of their coordinates (hence, left to right)
+- Step 8
+    - Step 8a: Create rectangle bounding box on each digit, and some convenience units that we later use to slice the seven segments. Notice that these convenience units are not hard-coded values, but are proportional to the Height (`h`) of our rectangular box
+    - Step 8b: Slice the seven segments; The first segment ("A") is from point (0,0) to (w, `int(h * 0.15)`); This segment is `w` in width and 15% the height of the full digit contour, starting from position (0, 0)
+    -  Step 8c: Initialize the state to `0` for each of the 7 segments, then conditionally set regions with more white than black pixels to `1`
+    -  Step 8d: Once all 7 states have been set, perform lookup against the digit dictionary created in step 1; Append the value to the `digits` list created at the beginning of step 8
+- Step 9: Draw rectangle and add predicted text for each bounding box. Finally, use a print statement to print the `digits` list. 
+
+
 # References
 [^1]: LeCun, Y., Bottou, L., Bengio, Y., and Haffner, P. (1998). Gradient-based learning applied to document recognition. Proceedings of the IEEE, 86, 2278–2324
 [^2]: Saliency map, Wikipedia