一個模型到底學到了什麼東西一直都是一個研究課題,也是大家在意的,包括表徵學習、小樣本學習、資料的偏差問題等議題,如果知道模型如何學習、最後學到什麼,都會對這些問題有幫助,有些人就在研究可解釋化(Explainable),今天介紹一個相關的技術:特徵圖視覺化(Feature Map Visualization)。
class FilterVisualizer():
def __init__(self, size=56, upscaling_steps=12, upscaling_factor=1.2):
self.size = size
self.upscaling_steps = upscaling_steps
self.upscaling_factor = upscaling_factor
self.model = vgg16(pre=True).cuda().eval()
set_trainable(self.model, False)
def visualize(self, layer, filter, lr=0.1, opt_steps=20, blur=None):
sz = self.size
# generate random image and register hook
img = np.uint8(np.random.uniform(150, 180, (sz, sz, 3)))/255
activations = SaveFeatures(list(self.model.children())[layer])
# scale the image up upscaling_steps times
for _ in range(self.upscaling_steps):
train_tfms, val_tfms = tfms_from_model(vgg16, sz)
# convert image to Variable that requires grad
img_var = V(val_tfms(img)[None], requires_grad=True)
optimizer = torch.optim.Adam([img_var], lr=lr, weight_decay=1e-6)
for n in range(opt_steps): # optimize pixel values for opt_steps times
optimizer.zero_grad()
self.model(img_var)
loss = -activations.features[0, filter].mean()
loss.backward()
optimizer.step()
img = val_tfms.denorm(img_var.data.cpu().numpy()[0].transpose(1,2,0))
self.output = img
# calculate new image size then scale image up
sz = int(self.upscaling_factor * sz)
img = cv2.resize(img, (sz, sz), interpolation = cv2.INTER_CUBIC)
# blur image to reduce high frequency patterns
if blur is not None:
img = cv2.blur(img,(blur,blur))
self.save(layer, filter)
activations.close()
def save(self, layer, filter):
plt.imsave("layer_"+str(layer)+"_filter_"+str(filter)+".jpg", np.clip(self.output, 0, 1))
[1]:Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). Imagenet classification with deep convolutional neural networks. In Advances in neural information processing systems (pp. 1097-1105).
[2]:Girshick, R., Donahue, J., Darrell, T., & Malik, J. (2014). Rich feature hierarchies for accurate object detection and semantic segmentation. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 580-587).
[3]:LeCun, Y., Boser, B. E., Denker, J. S., Henderson, D., Howard, R. E., Hubbard, W. E., & Jackel, L. D. (1990). Handwritten digit recognition with a back-propagation network. In Advances in neural information processing systems (pp. 396-404).
[4]:Matthew, D., & Fergus, R. (2014, September). Visualizing and understanding convolutional neural networks. In Proceedings of the 13th European Conference Computer Vision and Pattern Recognition, Zurich, Switzerland (pp. 6-12).
[5]:Yosinski, J., Clune, J., Nguyen, A., Fuchs, T., & Lipson, H. (2015). Understanding neural networks through deep visualization. arXiv preprint arXiv:1506.06579.
[6]:Zeiler, M. D., Taylor, G. W., & Fergus, R. (2011, November). Adaptive deconvolutional networks for mid and high level feature learning. In 2011 International Conference on Computer Vision(pp. 2018-2025). IEEE.