Classification of Multiple Images

Author

Venkata Nekkanti

Published

February 19, 2024

#load the libraries

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras import datasets, layers, models

type(datasets.cifar10.load_data())

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
170498071/170498071 ━━━━━━━━━━━━━━━━━━━━ 4s 0us/step

tuple

(X_train, y_train), (X_test, y_test) = datasets.cifar10.load_data() #tuple import

type(X_train),type(X_test)

(numpy.ndarray, numpy.ndarray)

X_train.shape,y_train.shape,X_test.shape,y_test.shape #understanding the dimensions

((50000, 32, 32, 3), (50000, 1), (10000, 32, 32, 3), (10000, 1))

y_train[:5] #this is a 2dimesional array but one dimensional is suffice

array([[6],
       [9],
       [9],
       [4],
       [1]], dtype=uint8)

y_train = y_train.reshape(-1,) #converting the 2D to 1D array.
y_train[:5]

array([6, 9, 9, 4, 1], dtype=uint8)

classes = ["airplane","automobile","bird","cat","deer","dog","frog","horse","ship","truck"]

classes[9]

'truck'

#Function to display a specific image with name from several thousands of inputs.
def plot_sample(X,y, index):
  plt.figure(figsize=(15,2))
  plt.imshow(X[index])
  plt.xlabel(classes[y[index]])

#Lets now try to call that function we created earlier!
plot_sample(X_train, y_train, 5)

plot_sample(X_train, y_train, 3)

# Normalization of Pixel Values

X_train = X_train/255
X_test = X_test/255

# Images are typically represented as arrays of pixel values ranging from 0 to 255 (for 8-bit images).
# Dividing by 255 scales these values to a range of 0 to 1.
# Normalizing the data ensures that all features (pixel values) contribute to the model's learning process equally.

# Let's now try to build a simple artificial neural network for image classification.
ic = models.Sequential([
        layers.Flatten(input_shape=(32,32,3)), #first layer which accepts the shape
        layers.Dense(3000, activation='relu'), # next deep layer with 3000 neurons
        layers.Dense(1000, activation='relu'), # next deep layer with 1000 neurons
        layers.Dense(10, activation='softmax') #output/final layer
    ])

ic.compile(optimizer='SGD',
              loss='sparse_categorical_crossentropy', # When the value y is hot encdoed, then it is categorical entropy it y is a number like 7, it is sparse_categorical_entropy
              metrics=['accuracy'])

ic.fit(X_train, y_train, epochs=5) #epochs helps the model to readthrough the data multiple times for better understanding.

/usr/local/lib/python3.10/dist-packages/keras/src/layers/reshaping/flatten.py:37: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)

Epoch 1/5
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 8s 4ms/step - accuracy: 0.3061 - loss: 1.9319
Epoch 2/5
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 9s 4ms/step - accuracy: 0.4176 - loss: 1.6382
Epoch 3/5
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 10s 4ms/step - accuracy: 0.4573 - loss: 1.5472
Epoch 4/5
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 5s 3ms/step - accuracy: 0.4775 - loss: 1.4917
Epoch 5/5
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 4s 3ms/step - accuracy: 0.4984 - loss: 1.4349

<keras.src.callbacks.history.History at 0x78b83f01ff40>

the accuracy is around 49% which is not that good.

from sklearn.metrics import confusion_matrix , classification_report
import numpy as np
y_pred = ic.predict(X_test)
y_pred_classes = [np.argmax(element) for element in y_pred]

print("Classification Report: \n", classification_report(y_test, y_pred_classes))

313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step
Classification Report: 
               precision    recall  f1-score   support

           0       0.46      0.60      0.52      1000
           1       0.34      0.82      0.48      1000
           2       0.49      0.15      0.23      1000
           3       0.41      0.21      0.28      1000
           4       0.54      0.27      0.36      1000
           5       0.40      0.41      0.41      1000
           6       0.50      0.56      0.53      1000
           7       0.55      0.51      0.53      1000
           8       0.57      0.62      0.59      1000
           9       0.49      0.37      0.42      1000

    accuracy                           0.45     10000
   macro avg       0.47      0.45      0.43     10000
weighted avg       0.47      0.45      0.43     10000

#let's now try to add do convolution neural network and improve the models by better layering.
cnn = models.Sequential([
    layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(32, 32, 3)),
    layers.MaxPooling2D((2, 2)),

    layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),

    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')
])

/usr/local/lib/python3.10/dist-packages/keras/src/layers/convolutional/base_conv.py:107: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)

cnn.summary()

Model: "sequential_1"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type)                         ┃ Output Shape                ┃         Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ conv2d (Conv2D)                      │ (None, 30, 30, 32)          │             896 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d (MaxPooling2D)         │ (None, 15, 15, 32)          │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_1 (Conv2D)                    │ (None, 13, 13, 64)          │          18,496 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_1 (MaxPooling2D)       │ (None, 6, 6, 64)            │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ flatten_1 (Flatten)                  │ (None, 2304)                │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_3 (Dense)                      │ (None, 64)                  │         147,520 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_4 (Dense)                      │ (None, 10)                  │             650 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘

 Total params: 167,562 (654.54 KB)

 Trainable params: 167,562 (654.54 KB)

 Non-trainable params: 0 (0.00 B)

cnn.compile(optimizer='adam', # adam can be adaptive for training deep learning models.
              loss='sparse_categorical_crossentropy', # as the output is not binary, let's opt sparse.
              metrics=['accuracy'])

cnn.fit(X_train, y_train, epochs=10)

Epoch 1/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 9s 3ms/step - accuracy: 0.3583 - loss: 1.7562
Epoch 2/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 6s 2ms/step - accuracy: 0.5723 - loss: 1.2178
Epoch 3/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 5s 3ms/step - accuracy: 0.6341 - loss: 1.0530
Epoch 4/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 5s 3ms/step - accuracy: 0.6666 - loss: 0.9666
Epoch 5/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 5s 3ms/step - accuracy: 0.6907 - loss: 0.8957
Epoch 6/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 5s 3ms/step - accuracy: 0.7054 - loss: 0.8477
Epoch 7/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 4s 2ms/step - accuracy: 0.7236 - loss: 0.8056
Epoch 8/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 4s 2ms/step - accuracy: 0.7344 - loss: 0.7646
Epoch 9/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 4s 3ms/step - accuracy: 0.7430 - loss: 0.7319
Epoch 10/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 5s 3ms/step - accuracy: 0.7567 - loss: 0.7015

<keras.src.callbacks.history.History at 0x78b750db29b0>

cnn.evaluate(X_test,y_test)

313/313 ━━━━━━━━━━━━━━━━━━━━ 2s 3ms/step - accuracy: 0.7005 - loss: 0.8834

[0.8994408249855042, 0.6966999769210815]

we now landed at a good accuracy than earlier. The input data images are not consize to land with perfect accuracy. So, I’d stick with this as better acc.

# Time for some predictions...
y_pred = cnn.predict(X_test)
y_pred[:5]

313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step

array([[8.5942736e-03, 1.5321394e-04, 2.5765553e-02, 6.4822674e-01,
        2.7658421e-04, 2.6662534e-02, 5.5865822e-03, 5.3579576e-04,
        2.8410617e-01, 9.2587135e-05],
       [5.3280666e-02, 2.5588342e-03, 2.3992468e-05, 9.5548457e-06,
        3.4179793e-06, 1.8470092e-07, 2.5370001e-08, 7.5638049e-08,
        9.4372505e-01, 3.9814957e-04],
       [1.2224587e-01, 5.9868312e-01, 4.3528755e-03, 1.0073372e-02,
        8.7910118e-03, 1.7486976e-03, 1.3118266e-03, 1.0229350e-02,
        1.8488812e-01, 5.7675824e-02],
       [4.3167397e-01, 1.8747890e-02, 1.9484865e-02, 1.2260282e-02,
        5.6867022e-03, 1.8741388e-04, 7.9920760e-04, 1.5470705e-03,
        5.0918615e-01, 4.2647251e-04],
       [1.0455847e-05, 4.5812911e-05, 3.4536619e-02, 1.6169388e-02,
        8.4160775e-01, 8.5049681e-03, 9.8926507e-02, 1.7184652e-04,
        2.6100173e-05, 6.0151456e-07]], dtype=float32)

y_classes = [np.argmax(element) for element in y_pred]
y_classes[:5]

[3, 8, 1, 8, 4]

y_test = y_test.reshape(-1,) #converting the 2D to 1D array.
y_test[:5]

array([3, 8, 8, 0, 6], dtype=uint8)

I would now compare some actual and predcited values to understand better.

plot_sample(X_test, y_test,16)

classes[y_classes[16]]

'dog'

plot_sample(X_test, y_test,100)

classes[y_classes[100]]

'deer'

plot_sample(X_test, y_test,75)

classes[y_classes[75]]

'bird'