Where is your CNN Model looking at? - (Grad-CAM)
Use Grad-CAM to visualize where the network is looking at or which pixels in the image contribute most to the prediction being made
Gradient-weighted Class Activation Mapping (Grad-CAM) :
Grad-CAM is a technique to visually represent where amodel is looking at and why it has made a certain prediction and was first presented in this paper Grad-CAM: Visual Explanations from Deep Networks via Gradient-based Localization
from keras import backend as K
import time
import matplotlib.pyplot as plt
import numpy as np
% matplotlib inline
np.random.seed(2017)
from keras.models import Sequential
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers import Activation, Flatten, Dropout
from keras.layers.normalization import BatchNormalization
from keras.utils import np_utils
from keras.datasets import cifar10
(train_features, train_labels), (test_features, test_labels) = cifar10.load_data()
num_train, img_channels, img_rows, img_cols = train_features.shape
num_test, _, _, _ = test_features.shape
num_classes = len(np.unique(train_labels))
class_names = ['airplane','automobile','bird','cat','deer',
'dog','frog','horse','ship','truck']
fig = plt.figure(figsize=(8,3))
for i in range(num_classes):
ax = fig.add_subplot(2, 5, 1 + i, xticks=[], yticks=[])
idx = np.where(train_labels[:]==i)[0]
features_idx = train_features[idx,::]
img_num = np.random.randint(features_idx.shape[0])
im = features_idx[img_num]
ax.set_title(class_names[i])
plt.imshow(im)
plt.show()
train_features = train_features.astype('float32')/255
test_features = test_features.astype('float32')/255
# convert class labels to binary class labels
train_labels = np_utils.to_categorical(train_labels, num_classes)
test_labels = np_utils.to_categorical(test_labels, num_classes)
from keras.layers import Conv2D,BatchNormalization,MaxPooling2D,Activation,Flatten
# Define the model #RF
model = Sequential()
model.add(Conv2D(32, 3, border_mode='same', name='layer1', input_shape=(32, 32, 3))) #3
model.add(BatchNormalization(name='BN1'))
model.add(Activation('relu',name='rl1'))
#Conv block 1
model.add(Conv2D(64, 3,name='layer2',border_mode='same')) #5
model.add(BatchNormalization(name='BN2'))
model.add(Activation('relu',name='rl2'))
model.add(Conv2D(128, 3,name='layer3')) #7
model.add(BatchNormalization(name='BN3'))
model.add(Activation('relu',name='rl3'))
#dropout after conv block1
model.add(Dropout(0.1,name='drp1'))
#Transition Block 1
model.add(Conv2D(32,1,name='tb1'))
model.add(BatchNormalization(name='tb-BN1'))
model.add(Activation('relu',name='tb-rl1'))
model.add(MaxPooling2D(pool_size=(2, 2),name='mp1')) #14
#Conv Block 2
model.add(Conv2D(64, 3, name='layer4',border_mode='same')) #16
model.add(BatchNormalization(name='BN4'))
model.add(Activation('relu',name='rl4'))
model.add(Conv2D(128, 3,name='layer5',border_mode='same')) #18
model.add(BatchNormalization(name='BN5'))
model.add(Activation('relu',name='rl5'))
#dropout after conv block2
model.add(Dropout(0.1,name='drp2'))
#Transition Block 2
model.add(Conv2D(32,1,name='tb2'))
model.add(BatchNormalization(name='tb-BN2'))
model.add(Activation('relu',name='tb-rl2'))
model.add(MaxPooling2D(pool_size=(2, 2),name='mp2')) #36 - we have reached the image size here
#final conv Block
model.add(Conv2D(64, 3, name='layer6',border_mode='same')) #38
model.add(BatchNormalization(name='BN6'))
model.add(Activation('relu',name='rl6'))
model.add(Conv2D(128, 3,name='layer7',border_mode='same')) #40
model.add(BatchNormalization(name='BN7'))
model.add(Activation('relu',name='rl7'))
#dropout after final conv block
model.add(Dropout(0.1,name='d3'))
#Pointwise convolution to squash 128 channels to 10 output channels
model.add(Conv2D(10,1,name='red1'))
model.add(BatchNormalization(name='red-BN1'))
model.add(Activation('relu',name='rrl1'))
#last conv layer - No ReLU activation, No Batch Normalization
model.add(Conv2D(10,7,name='layer8')) #47
#Flatten the output
model.add(Flatten())
#Softmax activation to output likelihood values for classes
model.add(Activation('softmax'))
#Print model summary
model.summary()
from keras.optimizers import Adam
from keras.callbacks import LearningRateScheduler
def scheduler(epoch, lr):
if (epoch%3==0 and epoch):
new_lr = max(0.9*lr,0.0005)
else:
new_lr=lr
return round(new_lr, 10)
lr_scheduler=LearningRateScheduler(scheduler,verbose=1)
#start with a higher lr of 0.003
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.003), metrics=['accuracy'])
from google.colab import drive
def mount_drive():
drive.mount('/gdrive',force_remount=True)
mount_drive()
from keras.callbacks import ModelCheckpoint
chkpoint_model=ModelCheckpoint("/gdrive/My Drive/EVA/Session9/model_customv1_cifar10_best.h5", monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=False, mode='max')
from keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(zoom_range=0.15,
horizontal_flip=True)
# train the model
start = time.time()
# Train the model
model_info = model.fit_generator(datagen.flow(train_features, train_labels, batch_size = 128),
samples_per_epoch = train_features.shape[0], nb_epoch = 100,
validation_data = (test_features, test_labels),
callbacks=[chkpoint_model,lr_scheduler],verbose=1)
end = time.time()
print ("Model took %0.2f seconds to train\n"%(end - start))
from keras.models import load_model
model1=load_model('/gdrive/My Drive/EVA/Session9/model_customv1_cifar10_best.h5')
Integrate Grad-CAM to visualize gradient heatmaps
We will integrate Grad-CAM to visualize where the network is looking at or which pixels in the image contribute most to the prediction being made .
Choose 4 images from the test dataset , predict their classes and print GradCam heatmap visualization for these 4 images
import cv2
from mpl_toolkits.axes_grid1 import ImageGrid
from google.colab.patches import cv2_imshow
#select test images and corresponding labels to print heatmap
x=np.array([test_features[41],test_features[410],test_features[222],test_features[950]])
y=[test_labels[41],test_labels[410],test_labels[222],test_labels[950]]
#make prediction for these 4 images
preds = model1.predict(x)
for j in range(4):
#get class id from the prediction values
class_idx = np.argmax(preds[j])
class_output = model1.output[:, class_idx]
## choose the layer before last 7x7 layer
last_conv_layer = model1.get_layer("rrl1")
# compute gradients and from it heatmap
grads = K.gradients(class_output, last_conv_layer.output)[0]
pooled_grads = K.mean(grads, axis=(0, 1, 2))
iterate = K.function([model1.input], [pooled_grads, last_conv_layer.output[0]])
pooled_grads_value, conv_layer_output_value = iterate([x])
for i in range(10):
conv_layer_output_value[:, :, i] *= pooled_grads_value[i]
heatmap = np.mean(conv_layer_output_value, axis=-1)
heatmap = np.maximum(heatmap, 0)
heatmap /= np.max(heatmap)
img = x[j]
#resize heatmap 7x7 to image size of 32x32
heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))
heatmap = np.uint8(255 * heatmap)
heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
# convert from BGR to RGB
heatmap1 = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)
# create superimposed image if we want to print using cv2 (cv2_imshow supported in colab)
superimposed_img = cv2.addWeighted(img, 0.8, heatmap, 0.2, 0,dtype=5)
# since cv.imshow does not work in jupyter notebooks and colab
# we will use matplotlib to print the image and its heatmap
fig = plt.figure(1, (5,5))
grid = ImageGrid(fig, 111,
nrows_ncols=(1,2),
axes_pad=0.3,
)
print(" original class is :"+class_names[np.argmax(y[j])]+" and predicted class is :"+str(class_names[class_idx]))
grid[0].imshow(img)
grid[0].set_title('Original')
#print the original image and on top of it place the heat map at 70% transparency
grid[1].imshow(img,alpha=1)
grid[1].imshow(heatmap1,alpha=0.7)
grid[1].set_title('superimposed heatmap')
plt.show()
pred=model1.predict(test_features)
pred2=np.argmax(pred,axis=1)
wrong_set=[]
correct_set=[]
wrong_labels=[]
true_labels=[]
wrong_indices=[]
for i in range(10000):
if (pred2[i]==np.argmax(test_labels[i])):
correct_set.append(test_features[i])
else:
wrong_indices.append(i)
wrong_labels.append(class_names[pred2[i]])
true_labels.append(class_names[np.argmax(test_labels[i])])
wrong_set.append(test_features[i])
print(' Selection of 4 misclassified images \n _________________________________\n')
from mpl_toolkits.axes_grid1 import ImageGrid
fig = plt.figure(1, (12, 12))
grid = ImageGrid(fig, 111,
nrows_ncols=(1, 4),
axes_pad=1,
)
for i in range(5,9):
grid[i-5].imshow(wrong_set[i].reshape(32,32,3))
grid[i-5].set_title('{2}: {0}, predicted: {1}'.format(true_labels[i],wrong_labels[i],wrong_indices[i]))
plt.show()
w_list=wrong_indices[5:9]
x=[]
y=[]
for i in range(len(w_list)):
x.append(test_features[w_list[i]])
y.append(test_labels[w_list[i]])
#convert the image list to numpy array
x=np.array(x)
#make prediction for these 4 images
preds = model1.predict(x)
for j in range(len(x)):
#get class id from the prediction values
class_idx = np.argmax(preds[j])
class_output = model1.output[:, class_idx]
## choose the layer before last 7x7 layer
last_conv_layer = model1.get_layer("rrl1")
# compute gradients and from it heatmap
grads = K.gradients(class_output, last_conv_layer.output)[0]
pooled_grads = K.mean(grads, axis=(0, 1, 2))
iterate = K.function([model1.input], [pooled_grads, last_conv_layer.output[0]])
pooled_grads_value, conv_layer_output_value = iterate([x])
for i in range(10):
conv_layer_output_value[:, :, i] *= pooled_grads_value[i]
heatmap = np.mean(conv_layer_output_value, axis=-1)
heatmap = np.maximum(heatmap, 0)
heatmap /= np.max(heatmap)
img = x[j]
#resize heatmap 7x7 to image size of 32x32
heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))
heatmap = np.uint8(255 * heatmap)
heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
# convert from BGR to RGB
heatmap1 = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)
# create superimposed image if we want to print using cv2 (cv2_imshow supported in colab)
superimposed_img = cv2.addWeighted(img, 0.8, heatmap, 0.2, 0,dtype=5)
# since cv.imshow does not work in jupyter notebooks and colab
# we will use matplotlib to print the image and its heatmap
fig = plt.figure(1, (5,5))
grid = ImageGrid(fig, 111,
nrows_ncols=(1,2),
axes_pad=0.3,
)
print(" original class is :"+class_names[np.argmax(y[j])]+" and predicted class is :"+str(class_names[class_idx]))
grid[0].imshow(img)
grid[0].set_title('Original')
#print the original image and on top of it place the heat map at 70% transparency
grid[1].imshow(img,alpha=1)
grid[1].imshow(heatmap1,alpha=0.7)
grid[1].set_title('superimposed heatmap')
plt.show()
We trained the model with some basic data augmentation techniques available in Keras and visualized the Grad-CAM heatmaps for a selection 4 correctly classified images and 4 misclassifed images . Let us now use another augmentation technique called cutout to train the model and see if it improves the prediction of these misclassified images and also visualize where the model looks at when making the prediction
Cutout Augmentation
Cutout was first presented as an effective augmentation technique in these two papers :
Improved Regularization of Convolutional Neural Networks with Cutout and Random Erasing Data Augmentation
The idea is to randomly cut away patches of information from images that a model is training on to force it to learn from more parts of the image. This would help the model learn more features about a class instead of depending on some simple assumptions using smaller areas within the image . This helps the model generalize better and make better predictions .
We will use python code for cutout /random erasing found at https://github.com/yu4u/cutout-random-erasing
!wget https://raw.githubusercontent.com/yu4u/cutout-random-erasing/master/random_eraser.py
from keras.layers import Conv2D,BatchNormalization,MaxPooling2D,Activation,Flatten
# Define the model #RF
model = Sequential()
model.add(Conv2D(32, 3, border_mode='same', name='layer1', input_shape=(32, 32, 3))) #3
model.add(BatchNormalization(name='BN1'))
model.add(Activation('relu',name='rl1'))
#Conv block 1
model.add(Conv2D(64, 3,name='layer2',border_mode='same')) #5
model.add(BatchNormalization(name='BN2'))
model.add(Activation('relu',name='rl2'))
model.add(Conv2D(128, 3,name='layer3')) #7
model.add(BatchNormalization(name='BN3'))
model.add(Activation('relu',name='rl3'))
#dropout after conv block1
model.add(Dropout(0.1,name='drp1'))
#Transition Block 1
model.add(Conv2D(32,1,name='tb1'))
model.add(BatchNormalization(name='tb-BN1'))
model.add(Activation('relu',name='tb-rl1'))
model.add(MaxPooling2D(pool_size=(2, 2),name='mp1')) #14
#Conv Block 2
model.add(Conv2D(64, 3, name='layer4',border_mode='same')) #16
model.add(BatchNormalization(name='BN4'))
model.add(Activation('relu',name='rl4'))
model.add(Conv2D(128, 3,name='layer5',border_mode='same')) #18
model.add(BatchNormalization(name='BN5'))
model.add(Activation('relu',name='rl5'))
#dropout after conv block2
model.add(Dropout(0.1,name='drp2'))
#Transition Block 2
model.add(Conv2D(32,1,name='tb2'))
model.add(BatchNormalization(name='tb-BN2'))
model.add(Activation('relu',name='tb-rl2'))
model.add(MaxPooling2D(pool_size=(2, 2),name='mp2')) #36 - we have reached the image size here
#final conv Block
model.add(Conv2D(64, 3, name='layer6',border_mode='same')) #38
model.add(BatchNormalization(name='BN6'))
model.add(Activation('relu',name='rl6'))
model.add(Conv2D(128, 3,name='layer7',border_mode='same')) #40
model.add(BatchNormalization(name='BN7'))
model.add(Activation('relu',name='rl7'))
#dropout after final conv block
model.add(Dropout(0.1,name='d3'))
#Pointwise convolution to squash 128 channels to 10 output channels
model.add(Conv2D(10,1,name='red1'))
model.add(BatchNormalization(name='red-BN1'))
model.add(Activation('relu',name='rrl1'))
#last conv layer - No ReLU activation, No Batch Normalization
model.add(Conv2D(10,7,name='layer8')) #47
#Flatten the output
model.add(Flatten())
#Softmax activation to output likelihood values for classes
model.add(Activation('softmax'))
#Print model summary
model.summary()
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.003), metrics=['accuracy'])
chkpoint_model=ModelCheckpoint("/gdrive/My Drive/EVA/Session9/model3_with_cutout_cifar10_best.h5", monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=False, mode='max')
from random_eraser import get_random_eraser
from keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(preprocessing_function=get_random_eraser(v_l=0, v_h=1),
zoom_range=0.15,
horizontal_flip=True)
# train the model
start = time.time()
# Train the model
model_info = model.fit_generator(datagen.flow(train_features, train_labels, batch_size = 128),
samples_per_epoch = train_features.shape[0], nb_epoch = 100,
validation_data = (test_features, test_labels),
callbacks=[chkpoint_model,lr_scheduler],verbose=1)
end = time.time()
print ("Model took %0.2f seconds to train\n"%(end - start))
model1=load_model('/gdrive/My Drive/EVA/Session9/model3_with_cutout_cifar10_best.h5')
x=np.array([test_features[41],test_features[410],test_features[222],test_features[950]])
y=[test_labels[41],test_labels[410],test_labels[222],test_labels[950]]
#make prediction for these 4 images
preds = model1.predict(x)
for j in range(4):
#get class id from the prediction values
class_idx = np.argmax(preds[j])
class_output = model1.output[:, class_idx]
## choose the layer before last 7x7 layer
last_conv_layer = model1.get_layer("rrl1")
# compute gradients and from it heatmap
grads = K.gradients(class_output, last_conv_layer.output)[0]
pooled_grads = K.mean(grads, axis=(0, 1, 2))
iterate = K.function([model1.input], [pooled_grads, last_conv_layer.output[0]])
pooled_grads_value, conv_layer_output_value = iterate([x])
for i in range(10):
conv_layer_output_value[:, :, i] *= pooled_grads_value[i]
heatmap = np.mean(conv_layer_output_value, axis=-1)
heatmap = np.maximum(heatmap, 0)
heatmap /= np.max(heatmap)
img = x[j]
#resize heatmap 7x7 to image size of 32x32
heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))
heatmap = np.uint8(255 * heatmap)
heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
# convert from BGR to RGB
heatmap1 = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)
# create superimposed image if we want to print using cv2 (cv2_imshow supported in colab)
superimposed_img = cv2.addWeighted(img, 0.8, heatmap, 0.2, 0,dtype=5)
# since cv.imshow does not work in jupyter notebooks and colab
# we will use matplotlib to print the image and its heatmap
fig = plt.figure(1, (5,5))
grid = ImageGrid(fig, 111,
nrows_ncols=(1,2),
axes_pad=0.3,
)
print(" original class is :"+class_names[np.argmax(y[j])]+" and predicted class is :"+str(class_names[class_idx]))
grid[0].imshow(img)
grid[0].set_title('Original')
#print the original image and on top of it place the heat map at 70% transparency
grid[1].imshow(img,alpha=1)
grid[1].imshow(heatmap1,alpha=0.7)
grid[1].set_title('superimposed heatmap')
plt.show()
w_list=wrong_indices[5:9]
x=[]
y=[]
for i in range(len(w_list)):
x.append(test_features[w_list[i]])
y.append(test_labels[w_list[i]])
#convert the image list to numpy array
x=np.array(x)
#make prediction for these 4 images
preds = model1.predict(x)
for j in range(len(x)):
#get class id from the prediction values
class_idx = np.argmax(preds[j])
class_output = model1.output[:, class_idx]
## choose the layer before last 7x7 layer
last_conv_layer = model1.get_layer("rrl1")
# compute gradients and from it heatmap
grads = K.gradients(class_output, last_conv_layer.output)[0]
pooled_grads = K.mean(grads, axis=(0, 1, 2))
iterate = K.function([model1.input], [pooled_grads, last_conv_layer.output[0]])
pooled_grads_value, conv_layer_output_value = iterate([x])
for i in range(10):
conv_layer_output_value[:, :, i] *= pooled_grads_value[i]
heatmap = np.mean(conv_layer_output_value, axis=-1)
heatmap = np.maximum(heatmap, 0)
heatmap /= np.max(heatmap)
img = x[j]
#resize heatmap 7x7 to image size of 32x32
heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))
heatmap = np.uint8(255 * heatmap)
heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
# convert from BGR to RGB
heatmap1 = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)
# create superimposed image if we want to print using cv2 (cv2_imshow supported in colab)
superimposed_img = cv2.addWeighted(img, 0.8, heatmap, 0.2, 0,dtype=5)
# since cv.imshow does not work in jupyter notebooks and colab
# we will use matplotlib to print the image and its heatmap
fig = plt.figure(1, (5,5))
grid = ImageGrid(fig, 111,
nrows_ncols=(1,2),
axes_pad=0.3,
)
print(" original class is :"+class_names[np.argmax(y[j])]+" and predicted class is :"+str(class_names[class_idx]))
grid[0].imshow(img)
grid[0].set_title('Original')
#print the original image and on top of it place the heat map at 70% transparency
grid[1].imshow(img,alpha=1)
grid[1].imshow(heatmap1,alpha=0.7)
grid[1].set_title('superimposed heatmap')
plt.show()