1. Import the libraries, load dataset, print shape of data, visualize the images in dataset. (5 )

%matplotlib inline
import numpy as np
import os
import pandas as pd
import seaborn as sns
from scipy import ndimage
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, LabelBinarizer
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization

from google.colab import drive
drive.mount('/content/sample_data/aiml')

Drive already mounted at /content/sample_data/aiml; to attempt to forcibly remount, call drive.mount("/content/sample_data/aiml", force_remount=True).

labels = pd.read_csv('/content/sample_data/aiml/My Drive/AIML/Labels.csv')
labels.count()

Label    4750
dtype: int64

labels.groupby('Label').count()

label_unique = labels.Label.unique()
label_unique

array(['Small-flowered Cranesbill', 'Fat Hen', 'Shepherds Purse',
       'Common wheat', 'Common Chickweed', 'Charlock', 'Cleavers',
       'Scentless Mayweed', 'Sugar beet', 'Maize', 'Black-grass',
       'Loose Silky-bent'], dtype=object)

label_df = pd.DataFrame( label_unique, columns = ['plant_label'])
label_df

images = np.load('/content/sample_data/aiml/My Drive/AIML/images.npy')
images.shape

(4750, 128, 128, 3)

#Visualize images in the dataset
plt.imshow(images[1200])

<matplotlib.image.AxesImage at 0x7f0cc0a769e8>

4750 images of 128x128 dimensions with RGB values

2. Data Pre-processing: (15 ) a. Normalization. b. Gaussian Blurring. c. Visualize data after pre-processing.

#Normalizing
images_normalized = images/255

#Guassian blurring
blurred_images = ndimage.gaussian_filter(images_normalized, sigma=3)
blurred_images.shape

(4750, 128, 128, 3)

#Visualizing data after preprocessing
plt.imshow(blurred_images[1200])

<matplotlib.image.AxesImage at 0x7f0cc0a5f438>

3. Make data compatible: (10 ) a. Convert labels to one-hot-vectors. b. Print the label for y_train[0]. c. Split the dataset into training, testing, and validation set. (Hint: First split images and labels into training and testing set with test_size = 0.3. Then further split test data into test and validation set with test_size = 0.5) d. Check the shape of data, Reshape data into shapes compatible with Keras models if it’s not already. If it’s already in the compatible shape, then comment in the notebook that it’s already in compatible shape.

labelEncode = LabelBinarizer()
labelBinEncoded = labelEncode.fit_transform(labels)

labelBinEncoded

array([[0, 0, 0, ..., 0, 1, 0],
       [0, 0, 0, ..., 0, 1, 0],
       [0, 0, 0, ..., 0, 1, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

x_train, x_test, y_train, y_test = train_test_split(blurred_images, labelBinEncoded, test_size = 0.3, random_state=0)
x_test, x_val, y_test, y_val   = train_test_split(x_test, y_test, test_size= 0.5, random_state=0)

y_train[0]

array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

x_train.shape

(3325, 128, 128, 3)

y_train.shape

(3325, 12)

x_test.shape

(712, 128, 128, 3)

y_test.shape

(712, 12)

x_val.shape

(713, 128, 128, 3)

y_val.shape

(713, 12)

Training data and labels are in compatible shapes.

4. Building CNN: (15 ) a. Define layers. b. Set optimizer and loss function. (Use Adam optimizer and categorical crossentropy.)

model = Sequential()

model.add(Conv2D(filters=64, kernel_size=(5, 5), input_shape=(128, 128, 3), activation='relu'))
model.add(BatchNormalization(axis=3))
model.add(Conv2D(filters=64, kernel_size=(5, 5), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(BatchNormalization(axis=3))
model.add(Dropout(0.1))

model.add(Conv2D(filters=128, kernel_size=(5, 5), activation='relu'))
model.add(BatchNormalization(axis=3))
model.add(Conv2D(filters=128, kernel_size=(5, 5), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(BatchNormalization(axis=3))
model.add(Dropout(0.1))

model.add(Conv2D(filters=256, kernel_size=(5, 5), activation='relu'))
model.add(BatchNormalization(axis=3))
model.add(Conv2D(filters=256, kernel_size=(5, 5), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(BatchNormalization(axis=3))
model.add(Dropout(0.1))

model.add(Flatten())

model.add(Dense(256, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Dense(256, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Dense(12, activation='softmax'))

opt = keras.optimizers.Adam(learning_rate=0.0001)
# compile model
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d (Conv2D)              (None, 124, 124, 64)      4864      
_________________________________________________________________
batch_normalization (BatchNo (None, 124, 124, 64)      256       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 120, 120, 64)      102464    
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 60, 60, 64)        0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 60, 60, 64)        256       
_________________________________________________________________
dropout (Dropout)            (None, 60, 60, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 56, 56, 128)       204928    
_________________________________________________________________
batch_normalization_2 (Batch (None, 56, 56, 128)       512       
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 52, 52, 128)       409728    
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 26, 26, 128)       0         
_________________________________________________________________
batch_normalization_3 (Batch (None, 26, 26, 128)       512       
_________________________________________________________________
dropout_1 (Dropout)          (None, 26, 26, 128)       0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 22, 22, 256)       819456    
_________________________________________________________________
batch_normalization_4 (Batch (None, 22, 22, 256)       1024      
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 18, 18, 256)       1638656   
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 9, 9, 256)         0         
_________________________________________________________________
batch_normalization_5 (Batch (None, 9, 9, 256)         1024      
_________________________________________________________________
dropout_2 (Dropout)          (None, 9, 9, 256)         0         
_________________________________________________________________
flatten (Flatten)            (None, 20736)             0         
_________________________________________________________________
dense (Dense)                (None, 256)               5308672   
_________________________________________________________________
batch_normalization_6 (Batch (None, 256)               1024      
_________________________________________________________________
dropout_3 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               65792     
_________________________________________________________________
batch_normalization_7 (Batch (None, 256)               1024      
_________________________________________________________________
dropout_4 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 12)                3084      
=================================================================
Total params: 8,563,276
Trainable params: 8,560,460
Non-trainable params: 2,816
_________________________________________________________________

5. Fit and evaluate model and print confusion matrix. (10 )

model.fit(x_train, y_train, batch_size=50, epochs=20, validation_data=(x_val, y_val), shuffle=True,  verbose=1)

Epoch 1/20
 2/67 [..............................] - ETA: 2s - loss: 3.9820 - accuracy: 0.0900WARNING:tensorflow:Callbacks method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0326s vs `on_train_batch_end` time: 0.0547s). Check your callbacks.
67/67 [==============================] - 7s 109ms/step - loss: 2.2684 - accuracy: 0.3537 - val_loss: 2.8312 - val_accuracy: 0.1262
Epoch 2/20
67/67 [==============================] - 6s 95ms/step - loss: 1.4638 - accuracy: 0.5383 - val_loss: 4.1424 - val_accuracy: 0.1262
Epoch 3/20
67/67 [==============================] - 6s 95ms/step - loss: 1.1129 - accuracy: 0.6421 - val_loss: 5.5497 - val_accuracy: 0.1262
Epoch 4/20
67/67 [==============================] - 6s 96ms/step - loss: 0.8328 - accuracy: 0.7239 - val_loss: 5.0337 - val_accuracy: 0.1262
Epoch 5/20
67/67 [==============================] - 6s 95ms/step - loss: 0.6659 - accuracy: 0.7814 - val_loss: 6.9377 - val_accuracy: 0.1178
Epoch 6/20
67/67 [==============================] - 6s 95ms/step - loss: 0.4624 - accuracy: 0.8538 - val_loss: 6.5168 - val_accuracy: 0.1010
Epoch 7/20
67/67 [==============================] - 6s 95ms/step - loss: 0.3550 - accuracy: 0.8884 - val_loss: 7.1416 - val_accuracy: 0.1024
Epoch 8/20
67/67 [==============================] - 6s 95ms/step - loss: 0.2837 - accuracy: 0.9152 - val_loss: 4.2095 - val_accuracy: 0.1950
Epoch 9/20
67/67 [==============================] - 6s 94ms/step - loss: 0.2330 - accuracy: 0.9347 - val_loss: 1.7152 - val_accuracy: 0.5442
Epoch 10/20
67/67 [==============================] - 6s 95ms/step - loss: 0.1881 - accuracy: 0.9498 - val_loss: 0.5810 - val_accuracy: 0.8612
Epoch 11/20
67/67 [==============================] - 6s 95ms/step - loss: 0.1402 - accuracy: 0.9624 - val_loss: 0.2209 - val_accuracy: 0.9383
Epoch 12/20
67/67 [==============================] - 6s 95ms/step - loss: 0.1188 - accuracy: 0.9732 - val_loss: 0.2244 - val_accuracy: 0.9257
Epoch 13/20
67/67 [==============================] - 6s 95ms/step - loss: 0.1044 - accuracy: 0.9762 - val_loss: 0.0663 - val_accuracy: 0.9888
Epoch 14/20
67/67 [==============================] - 6s 95ms/step - loss: 0.0920 - accuracy: 0.9783 - val_loss: 0.0593 - val_accuracy: 0.9916
Epoch 15/20
67/67 [==============================] - 6s 95ms/step - loss: 0.0810 - accuracy: 0.9826 - val_loss: 0.0515 - val_accuracy: 0.9916
Epoch 16/20
67/67 [==============================] - 6s 94ms/step - loss: 0.0611 - accuracy: 0.9907 - val_loss: 0.0315 - val_accuracy: 0.9972
Epoch 17/20
67/67 [==============================] - 6s 95ms/step - loss: 0.0655 - accuracy: 0.9856 - val_loss: 0.0405 - val_accuracy: 0.9944
Epoch 18/20
67/67 [==============================] - 6s 94ms/step - loss: 0.0498 - accuracy: 0.9937 - val_loss: 0.0418 - val_accuracy: 0.9930
Epoch 19/20
67/67 [==============================] - 6s 95ms/step - loss: 0.0440 - accuracy: 0.9919 - val_loss: 0.0413 - val_accuracy: 0.9930
Epoch 20/20
67/67 [==============================] - 6s 95ms/step - loss: 0.0465 - accuracy: 0.9898 - val_loss: 0.0453 - val_accuracy: 0.9930

<tensorflow.python.keras.callbacks.History at 0x7f0c597dda90>

# Score trained model.
scores = model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

23/23 [==============================] - 0s 22ms/step - loss: 0.0347 - accuracy: 0.9930
Test loss: 0.03470199182629585
Test accuracy: 0.992977499961853

y_pred = np.argmax(model.predict(x_test), axis=-1)
y_test_class = np.argmax(y_test, axis = 1) 
y_pred.shape

(712,)

confusionMTX = confusion_matrix(y_test_class, y_pred) 
confusionMTX

array([[35,  0,  0,  0,  0,  0,  2,  0,  0,  0,  0,  0],
       [ 0, 50,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0, 51,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0, 95,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0, 35,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0, 85,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0, 96,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0, 26,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0, 77,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  1,  0,  0,  0, 39,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 60,  0],
       [ 0,  0,  0,  1,  0,  0,  0,  0,  1,  0,  0, 58]])

6. Visualize predictions for x_test[2], x_test[3], x_test[33], x_test[36], x_test[59]. (5 )

#x_test[2]
print("ID of the plant : " , y_pred[2])
print("Name of the plant :" + label_df['plant_label'][y_pred[2]])
print("Sample image and Actual Normalized, Guassian Blur Image of the plant")
fig, ax = plt.subplots(1,2)
for i in range(4750):
  if (labels["Label"][i] == label_df['plant_label'][y_pred[2]]):
    ax[0].imshow(images[i])
    ax[1].imshow(x_test[2])
    exit

ID of the plant :  7
Name of the plant :Scentless Mayweed
Sample image and Actual Normalized, Guassian Blur Image of the plant

#x_test[3]
print("ID of the plant : " , y_pred[3])
print("Name of the plant :" + label_df['plant_label'][y_pred[3]])
print("Sample image and Actual Normalized, Guassian Blur Image of the plant")
fig, ax = plt.subplots(1,2)
for i in range(4750):
  if (labels["Label"][i] == label_df['plant_label'][y_pred[3]]):
    ax[0].imshow(images[i])
    ax[1].imshow(x_test[3])
    exit

ID of the plant :  10
Name of the plant :Black-grass
Sample image and Actual Normalized, Guassian Blur Image of the plant

#x_test[33]
print("ID of the plant : " , y_pred[33])
print("Name of the plant :" + label_df['plant_label'][y_pred[33]])
print("Sample image and Actual Normalized, Guassian Blur Image of the plant")
fig, ax = plt.subplots(1,2)
for i in range(4750):
  if (labels["Label"][i] == label_df['plant_label'][y_pred[33]]):
    ax[0].imshow(images[i])
    ax[1].imshow(x_test[33])
    exit

ID of the plant :  3
Name of the plant :Common wheat
Sample image and Actual Normalized, Guassian Blur Image of the plant

#x_test[36]
print("ID of the plant : " , y_pred[36])
print("Name of the plant :" + label_df['plant_label'][y_pred[36]])
print("Sample image and Actual Normalized, Guassian Blur Image of the plant")
fig, ax = plt.subplots(1,2)
for i in range(4750):
  if (labels["Label"][i] == label_df['plant_label'][y_pred[36]]):
    ax[0].imshow(images[i])
    ax[1].imshow(x_test[36])
    exit

ID of the plant :  5
Name of the plant :Charlock
Sample image and Actual Normalized, Guassian Blur Image of the plant

x_test[59]
print("ID of the plant : " , y_pred[59])
print("Name of the plant :" + label_df['plant_label'][y_pred[59]])
print("Sample image and Actual Normalized, Guassian Blur Image of the plant")
fig, ax = plt.subplots(1,2)
for i in range(4750):
  if (labels["Label"][i] == label_df['plant_label'][y_pred[59]]):
    ax[0].imshow(images[i])
    ax[1].imshow(x_test[59])
    exit

ID of the plant :  11
Name of the plant :Loose Silky-bent
Sample image and Actual Normalized, Guassian Blur Image of the plant

	plant_label
0	Small-flowered Cranesbill
1	Fat Hen
2	Shepherds Purse
3	Common wheat
4	Common Chickweed
5	Charlock
6	Cleavers
7	Scentless Mayweed
8	Sugar beet
9	Maize
10	Black-grass
11	Loose Silky-bent