Source code for tf_keras_vis.gradcam_plus_plus

from typing import Union

import numpy as np
import tensorflow as tf
import tensorflow.keras.backend as K
from scipy.ndimage.interpolation import zoom

from . import ModelVisualization
from .utils import is_mixed_precision, normalize, zoom_factor
from .utils.model_modifiers import ExtractIntermediateLayerForGradcam as ModelModifier


[docs] class GradcamPlusPlus(ModelVisualization): """Grad-CAM++ References: * GradCAM++: Improved Visual Explanations for Deep Convolutional Networks (https://arxiv.org/pdf/1710.11063.pdf) """
[docs] def __call__(self, score, seed_input, penultimate_layer=None, seek_penultimate_conv_layer=True, gradient_modifier=None, activation_modifier=lambda cam: K.relu(cam), training=False, expand_cam=True, normalize_cam=True, unconnected_gradients=tf.UnconnectedGradients.NONE) -> Union[np.ndarray, list]: """Generate gradient based class activation maps (CAM) by using positive gradient of penultimate_layer with respect to score. Args: score: A :obj:`tf_keras_vis.utils.scores.Score` instance, function or a list of them. For example of the Score instance to specify visualizing target:: scores = CategoricalScore([1, 294, 413]) The code above means the same with the one below:: score = lambda outputs: (outputs[0][1], outputs[1][294], outputs[2][413]) When the model has multiple outputs, you MUST pass a list of Score instances or functions. For example:: from tf_keras_vis.utils.scores import CategoricalScore, InactiveScore score = [ CategoricalScore([1, 23]), # For 1st model output InactiveScore(), # For 2nd model output ... ] seed_input: A tf.Tensor, :obj:`numpy.ndarray` or a list of them to input in the model. That's when the model has multiple inputs, you MUST pass a list of tensors. penultimate_layer: An index or name of the layer, or the tf.keras.layers.Layer instance itself. When None, it means the same with `-1`. If the layer specified by this option is not `convolutional` layer, `penultimate_layer` will work as the offset to seek `convolutional` layer. Defaults to None. seek_penultimate_conv_layer: A bool that indicates whether or not seeks a penultimate layer when the layer specified by `penultimate_layer` is not `convolutional` layer. Defaults to True. gradient_modifier: A function to modify gradients. Defaults to None. activation_modifier: A function to modify the Class Activation Map (CAM). Defaults to `lambda cam: K.relu(cam)`. training: A bool that indicates whether the model's training-mode on or off. Defaults to False. expand_cam: True to resize CAM to the same as input image size. **Note!** When False, even if the model has multiple inputs, return only a CAM. Defaults to True. normalize_cam: When True, CAM will be normalized. Defaults to True. unconnected_gradients: Specifies the gradient value returned when the given input tensors are unconnected. Defaults to tf.UnconnectedGradients.NONE. Returns: An :obj:`numpy.ndarray` or a list of them. They are the Class Activation Maps (CAMs) that indicate the `seed_input` regions whose change would most contribute the score value. Raises: :obj:`ValueError`: When there is any invalid arguments. """ # Preparing scores = self._get_scores_for_multiple_outputs(score) seed_inputs = self._get_seed_inputs_for_multiple_inputs(seed_input) # Processing gradcam model = ModelModifier(penultimate_layer, seek_penultimate_conv_layer)(self.model) with tf.GradientTape(watch_accessed_variables=False) as tape: tape.watch(seed_inputs) outputs = model(seed_inputs, training=training) outputs, penultimate_output = outputs[:-1], outputs[-1] score_values = self._calculate_scores(outputs, scores) grads = tape.gradient(score_values, penultimate_output, unconnected_gradients=unconnected_gradients) # When mixed precision enabled if is_mixed_precision(model): grads = tf.cast(grads, dtype=model.variable_dtype) penultimate_output = tf.cast(penultimate_output, dtype=model.variable_dtype) score_values = [tf.cast(v, dtype=model.variable_dtype) for v in score_values] score_values = sum(tf.math.exp(o) for o in score_values) score_values = tf.reshape(score_values, score_values.shape + (1, ) * (grads.ndim - 1)) if gradient_modifier is not None: grads = gradient_modifier(grads) first_derivative = score_values * grads second_derivative = first_derivative * grads third_derivative = second_derivative * grads global_sum = K.sum(penultimate_output, axis=tuple(np.arange(len(penultimate_output.shape))[1:-1]), keepdims=True) alpha_denom = second_derivative * 2.0 + third_derivative * global_sum alpha_denom = alpha_denom + tf.cast((second_derivative == 0.0), second_derivative.dtype) alphas = second_derivative / alpha_denom alpha_normalization_constant = K.sum(alphas, axis=tuple(np.arange(len(alphas.shape))[1:-1]), keepdims=True) alpha_normalization_constant = alpha_normalization_constant + tf.cast( (alpha_normalization_constant == 0.0), alpha_normalization_constant.dtype) alphas = alphas / alpha_normalization_constant if activation_modifier is None: weights = first_derivative else: weights = activation_modifier(first_derivative) deep_linearization_weights = weights * alphas deep_linearization_weights = K.sum( deep_linearization_weights, axis=tuple(np.arange(len(deep_linearization_weights.shape))[1:-1]), keepdims=True) cam = K.sum(deep_linearization_weights * penultimate_output, axis=-1) if activation_modifier is not None: cam = activation_modifier(cam) if not expand_cam: if normalize_cam: cam = normalize(cam) return cam # Visualizing factors = (zoom_factor(cam.shape, X.shape) for X in seed_inputs) cam = [zoom(cam, factor, order=1) for factor in factors] if normalize_cam: cam = [normalize(x) for x in cam] if len(self.model.inputs) == 1 and not isinstance(seed_input, list): cam = cam[0] return cam