1. Import the libraries, load dataset, print shape of data, visualize the images in dataset. (5 )
%matplotlib inline
import numpy as np
import os
import pandas as pd
import seaborn as sns
from scipy import ndimage
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, LabelBinarizer
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from google.colab import drive
drive.mount('/content/sample_data/aiml')
labels = pd.read_csv('/content/sample_data/aiml/My Drive/AIML/Labels.csv')
labels.count()
labels.groupby('Label').count()
label_unique = labels.Label.unique()
label_unique
label_df = pd.DataFrame( label_unique, columns = ['plant_label'])
label_df
images = np.load('/content/sample_data/aiml/My Drive/AIML/images.npy')
images.shape
#Visualize images in the dataset
plt.imshow(images[1200])
4750 images of 128x128 dimensions with RGB values
2. Data Pre-processing: (15 ) a. Normalization. b. Gaussian Blurring. c. Visualize data after pre-processing.
#Normalizing
images_normalized = images/255
#Guassian blurring
blurred_images = ndimage.gaussian_filter(images_normalized, sigma=3)
blurred_images.shape
#Visualizing data after preprocessing
plt.imshow(blurred_images[1200])
3. Make data compatible: (10 ) a. Convert labels to one-hot-vectors. b. Print the label for y_train[0]. c. Split the dataset into training, testing, and validation set. (Hint: First split images and labels into training and testing set with test_size = 0.3. Then further split test data into test and validation set with test_size = 0.5) d. Check the shape of data, Reshape data into shapes compatible with Keras models if it’s not already. If it’s already in the compatible shape, then comment in the notebook that it’s already in compatible shape.
labelEncode = LabelBinarizer()
labelBinEncoded = labelEncode.fit_transform(labels)
labelBinEncoded
x_train, x_test, y_train, y_test = train_test_split(blurred_images, labelBinEncoded, test_size = 0.3, random_state=0)
x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, test_size= 0.5, random_state=0)
y_train[0]
x_train.shape
y_train.shape
x_test.shape
y_test.shape
x_val.shape
y_val.shape
Training data and labels are in compatible shapes.
4. Building CNN: (15 ) a. Define layers. b. Set optimizer and loss function. (Use Adam optimizer and categorical crossentropy.)
model = Sequential()
model.add(Conv2D(filters=64, kernel_size=(5, 5), input_shape=(128, 128, 3), activation='relu'))
model.add(BatchNormalization(axis=3))
model.add(Conv2D(filters=64, kernel_size=(5, 5), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(BatchNormalization(axis=3))
model.add(Dropout(0.1))
model.add(Conv2D(filters=128, kernel_size=(5, 5), activation='relu'))
model.add(BatchNormalization(axis=3))
model.add(Conv2D(filters=128, kernel_size=(5, 5), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(BatchNormalization(axis=3))
model.add(Dropout(0.1))
model.add(Conv2D(filters=256, kernel_size=(5, 5), activation='relu'))
model.add(BatchNormalization(axis=3))
model.add(Conv2D(filters=256, kernel_size=(5, 5), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(BatchNormalization(axis=3))
model.add(Dropout(0.1))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(256, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(12, activation='softmax'))
opt = keras.optimizers.Adam(learning_rate=0.0001)
# compile model
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
model.summary()
5. Fit and evaluate model and print confusion matrix. (10 )
model.fit(x_train, y_train, batch_size=50, epochs=20, validation_data=(x_val, y_val), shuffle=True, verbose=1)
# Score trained model.
scores = model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])
y_pred = np.argmax(model.predict(x_test), axis=-1)
y_test_class = np.argmax(y_test, axis = 1)
y_pred.shape
confusionMTX = confusion_matrix(y_test_class, y_pred)
confusionMTX
6. Visualize predictions for x_test[2], x_test[3], x_test[33], x_test[36], x_test[59]. (5 )
#x_test[2]
print("ID of the plant : " , y_pred[2])
print("Name of the plant :" + label_df['plant_label'][y_pred[2]])
print("Sample image and Actual Normalized, Guassian Blur Image of the plant")
fig, ax = plt.subplots(1,2)
for i in range(4750):
if (labels["Label"][i] == label_df['plant_label'][y_pred[2]]):
ax[0].imshow(images[i])
ax[1].imshow(x_test[2])
exit
#x_test[3]
print("ID of the plant : " , y_pred[3])
print("Name of the plant :" + label_df['plant_label'][y_pred[3]])
print("Sample image and Actual Normalized, Guassian Blur Image of the plant")
fig, ax = plt.subplots(1,2)
for i in range(4750):
if (labels["Label"][i] == label_df['plant_label'][y_pred[3]]):
ax[0].imshow(images[i])
ax[1].imshow(x_test[3])
exit
#x_test[33]
print("ID of the plant : " , y_pred[33])
print("Name of the plant :" + label_df['plant_label'][y_pred[33]])
print("Sample image and Actual Normalized, Guassian Blur Image of the plant")
fig, ax = plt.subplots(1,2)
for i in range(4750):
if (labels["Label"][i] == label_df['plant_label'][y_pred[33]]):
ax[0].imshow(images[i])
ax[1].imshow(x_test[33])
exit
#x_test[36]
print("ID of the plant : " , y_pred[36])
print("Name of the plant :" + label_df['plant_label'][y_pred[36]])
print("Sample image and Actual Normalized, Guassian Blur Image of the plant")
fig, ax = plt.subplots(1,2)
for i in range(4750):
if (labels["Label"][i] == label_df['plant_label'][y_pred[36]]):
ax[0].imshow(images[i])
ax[1].imshow(x_test[36])
exit
x_test[59]
print("ID of the plant : " , y_pred[59])
print("Name of the plant :" + label_df['plant_label'][y_pred[59]])
print("Sample image and Actual Normalized, Guassian Blur Image of the plant")
fig, ax = plt.subplots(1,2)
for i in range(4750):
if (labels["Label"][i] == label_df['plant_label'][y_pred[59]]):
ax[0].imshow(images[i])
ax[1].imshow(x_test[59])
exit