Building a Handwritten Multi-Digit Recognition
Building
a Handwritten Multi-Digit Recognition
Setting up the Local Environment
Open a command prompt, created a folder digitapp and create a virtual environment
mkdir digitapp
cd digitapp
python -m venv digitapp
Activate the virtual environment
digitapp\Scripts\activate
Install the necessary libraries
File: requirements.txt
joblib==1.5.0
keras==3.8.0
matplotlib==3.10.0
numpy==2.0.2
pandas==2.2.2
scikit-learn==1.6.1
seaborn==0.13.2
streamlit==1.45.0
streamlit-drawable-canvas==0.9.3
# Command to execute for installation of the libraries
pip install -r requirements.txt
Create a model
File: trainer.py
#~~~ Working on the Data set ~~~
# 'Import
necessary libraries'
import numpy as np
from tensorflow.keras import layers # For defining layers of the neural
network
from tensorflow.keras import models
# For building the model architecture
from keras.datasets import mnist # MNIST dataset of digits
from keras.utils import to_categorical # Utility to convert labels to one-hot
encoding
np.random.seed(42)
# 'Split the data into train and test sets'
(train_images, train_labels), (test_images, test_labels) =
mnist.load_data()
# 'Reshape 60000 images with 28x28 pixels i.e., (60000, 28,
28) to (60000, 28, 28, 1)'
# 'to add a channel dimension (grayscale)'
train_images = train_images.reshape((60000, 28, 28, 1))
train_images = train_images.astype('float32') / 255
test_images = test_images.reshape((10000, 28, 28, 1))
test_images = test_images.astype('float32') / 255
# 'e.g., label 3 becomes [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]'
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)
model = models.Sequential()
# 'adding layers to the model'
# 'First convolutional layer:
# - 32 filters
# - Each filter is 3x3 in size
# - ReLU activation introduces non-linearity
# - input_shape defines the shape of the input images (28x28
pixels, 1 channel)'
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28,
28, 1)))
# - Reduces spatial dimensions (downsampling) using 2x2
pooling
# - Helps reduce computation and overfitting'
model.add(layers.MaxPooling2D((2, 2)))
# - Randomly sets 20% of inputs to zero during training
# - Helps prevent overfitting'
model.add(layers.Dropout(0.2))
# - 64 filters, again with 3x3 kernels
# - ReLU activation'
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.2))
# - 64 filters
# - No pooling here,so spatial dimensions stay same(except
for being reduced by conv itself)'
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
# - Converts 3D feature maps into 1D feature vector for the
dense (fully connected) layers'
model.add(layers.Flatten())
# - 64 units
# - ReLU activation'
model.add(layers.Dense(64, activation='relu'))
# - 10 units for 10 digit classes (0–9)
# - Softmax activation to output probabilities for each class'
model.add(layers.Dense(10, activation='softmax'))
# - Shows the number of parameters and the output shape of
each layer'
model.summary()
3 Convolutional layers to extract features
2 MaxPooling layers to reduce dimensions
2 Dropout layers to prevent overfitting
1 Flatten layer to convert to vector
1 Hidden dense layer
1 Output layer with 10 classes (digits 0–9) '''
# 'Compile the model:
# - optimizer='rmsprop':
# RMSprop is used to adjust the learning rate during training
(good for RNNs and CNNs)
# - loss='categorical_crossentropy':
# Appropriate for multi-class classification where labels are
one-hot encoded
# - metrics=['accuracy']:
# Track accuracy as the performance metric during training
and validation'
model.compile(optimizer='rmsprop',
loss='categorical_crossentropy',
metrics=['accuracy'])
# 'Fitting the model:
# - train_images and train_labels: the input data and labels
# - epochs=5: the model will go through the full dataset 5
times
# - batch_size=64: the model will update weights after every
64 samples
# - validation_split=0.1: use 10% of training data for
validation
# (to monitor performance on unseen data)'
training_history = model.fit(train_images, train_labels,
epochs=5,
batch_size=64,
validation_split=0.1)
import matplotlib.pyplot as plt
# Set the x-axis and y-axis labels
plt.xlabel('Epoch Number') # X-axis will represent epochs
plt.ylabel('Loss') # Y-axis will represent
loss values
# Plot the training loss
plt.plot(training_history.history['loss'], label='Training Loss')
# Plot the validation loss
plt.plot(training_history.history['val_loss'], label='Validation Loss')
# Add a legend to label the two curves
plt.legend()
# Display the plot
plt.show()
plt.xlabel('Epoch Number') #
Label for x-axis (number of epochs)
plt.ylabel('Accuracy') #
Label for y-axis (accuracy values)
# Plot training accuracy
plt.plot(training_history.history['accuracy'], label='Training
Accuracy')
# Plot validation accuracy
plt.plot(training_history.history['val_accuracy'], label='Validation
Accuracy')
# Add legend to differentiate between the two curves
plt.legend()
# Display the plot
plt.show()
# Evaluate the model on the test data
# This computes the loss and accuracy of the model on the
test set
test_loss, test_acc = model.evaluate(test_images, test_labels)
# Print the accuracy on the test set
print(f"Test Accuracy: {test_acc}")
# Save the trained model to a file (mnist.h5)
# This allows you to reload and use the model later without
retraining
model.save('mnist.h5')
Execute trainer.py and generate the
model
Build the Web Application
File: app.py
#~~~ Importing the libraries ~~~
# Import Streamlit for creating the web application UI
import streamlit as st
# Import the Streamlit drawable canvas component for drawing
on the web page
from streamlit_drawable_canvas import st_canvas
# Import OpenCV for image processing tasks
import cv2
# Import Keras function to load a pre-trained model
from keras.models import load_model
# Import NumPy for numerical operations, such as reshaping
and padding arrays
import numpy as np
# Import the warnings module to filter and suppress warning
messages
import warnings
warnings.filterwarnings('ignore') # Ignore any warnings that
might clutter the output
dgrs = []
# Initialize a global string to store the prediction result
res = " "
#~~~ Working on the digits drawn ~~~
def predict():
global
res # Declare res as global so
it can be modified inside this function
# Load the
pre-trained MNIST model (expects input images of size 28x28)
model =
load_model('mnist.h5')
# Define path to the
image saved from the canvas
image_folder
= "./"
filename =
f'img.jpg'
# Working
on the captured image to match the model input
#
Read the image in color format using OpenCV
image = cv2.imread(image_folder + filename, cv2.IMREAD_COLOR)
#
Convert the image to grayscale to simplify the image data (removes color
channels)
gray =
cv2.cvtColor(image.copy(), cv2.COLOR_BGR2GRAY)
#
Apply Gaussian blur to smooth the image and reduce noise
blurred =
cv2.GaussianBlur(gray, (5, 5), 0)
#
Apply adaptive thresholding to convert the grayscale image to binary (black
& white)
# Adaptive
thresholding is better than global thresholding in varying lighting conditions
th =
cv2.adaptiveThreshold(
blurred, # Source image
255, #
Maximum value to use with THRESH_BINARY
cv2.ADAPTIVE_THRESH_GAUSSIAN_C, # Use a weighted sum of
neighborhood values
cv2.THRESH_BINARY_INV, #
Invert the output (digits become white)
11, 2 # Block size and constant C
)
#
Find contours (continuous lines or curves that bound the white regions)
contours =
cv2.findContours(
th, #
Binary image
cv2.RETR_EXTERNAL, # Only retrieve external
contours
cv2.CHAIN_APPROX_SIMPLE #
Compress horizontal, vertical, and diagonal segments
)[0] #
Only need the contours details
# Loop through each contour (likely each digit
drawn)
for cnt in
contours:
# Compute the bounding rectangle around the
contour
x, y, w,
h = cv2.boundingRect(cnt)
#
Draw a blue rectangle around each detected digit
cv2.rectangle(image, (x, y), (x + w, y + h), (255, 0, 0), 1)
#
Crop the digit from the binary image using the bounding box
digit =
th[y:y + h, x:x + w]
#
Resize the digit to 18x18 pixels (MNIST model expects 28x28)
resized_digit = cv2.resize(digit, (18, 18))
#
Pad the resized digit with 5 pixels of black pixels (zeros) on each side
# This results in a
28x28 image as expected by the model
padded_digit = np.pad(resized_digit, ((5, 5), (5, 5)),
"constant", constant_values=0)
# Reshape the image to match the model's input
shape: (1, 28, 28, 1)
digit =
padded_digit.reshape(1, 28, 28, 1)
#
Normalize pixel values from [0, 255] to [0, 1]
digit =
digit / 255.0
#
Get the prediction probabilities for each digit (0–9)
pred =
model.predict(digit)[0]
#
Get the digit with the highest probability (model's final prediction)
final_pred = np.argmax(pred)
#
Append predicted digit to the global list
dgrs.append(int(final_pred))
#
Add predicted digit to the result string
res =
res + " " + str(final_pred)
#
Prepare text showing prediction and confidence percentage
data =
str(final_pred) + ' ' + str(int(max(pred) * 100)) + '%'
#
Define font settings for overlaying prediction on the image
font =
cv2.FONT_HERSHEY_SIMPLEX
fontScale = 0.5
color =
(255, 255, 255) #
White color
thickness = 1
#
Overlay prediction text on the image at the top-left corner of the bounding box
cv2.putText(image, data, (x, y), font, fontScale, color, thickness)
#~~~ UI interface ~~~
#
setting up the UI interface
# Set the app title
st.title("Drawable Canvas")
# Add a description below the title
st.markdown("""
Draw digits on the canvas, get the image data back
into Python!
""")
# Create a canvas where users can draw digits
canvas_result = st_canvas(
stroke_width=10, # Thickness of the brush
stroke_color='red', # Color of the brush
height=150 # Height of the
canvas
)
# Check if the user has drawn something
if canvas_result.image_data is not None:
#
Save the drawn image to a file for processing
cv2.imwrite(f"img.jpg",
canvas_result.image_data)
# Create a "Predict" button
if st.button("Predict"):
predict() #
Call the predict function
st.write('The
predicted digit:', res) #
Display the result on the app
Run app.py using streamlit
Open the the
below streamlit app URL in your browser


Comments
Post a Comment