Caffeの学習済みモデルの利用

学習済みCaffeモデルの重みをReNomモデルに読み込む

多くのニューラルネットワークの学習済みモデルが, Model Zoo において公開されています.これらの学習済みモデルを用いることで,私達は簡単に論文の結果を再現したり,他のタスクに利用したりすることができます.モデルはcaffemodelバイナリファイルで定義されており,ReNomで利用するためには変換をする必要があります.

このチュートリアルでは,VGGネットワークモデル[1]を例に,caffeで定義された学習済みモデルの重みをReNomにロードする方法を紹介します.VGGモデルは百万枚以上の画像を用いて学習されており,画像分類タスク等に利用することができます.

必要なライブラリ

  • matplotlib 2.0.2
  • numpy 1.12.1
  • pillow 4.2.1
  • caffe 1.0
caffemodelファイルを解析するため,CaffeをPythonインタフェース付きでインストールする必要があります.Caffeのインストールについては, http://caffe.berkeleyvision.org/installation.html を参照してください.
Caffeを必要とせずcaffemodelをロードする関数が近々ReNomに実装される予定です.それまでは,この変換手順に従って変換を行ってください.
In [1]:
import caffe
import renom as rm
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

ReNomモデルの定義

VGGモデル(VGG16/VGG19)をReNomのFunctionモデルとして定義します.
VGGの畳み込み層のみが必要な場合には,”without_top”モデルを使います.
In [2]:
class VGG16(rm.Model):

    def __init__(self, classes=10):
        super(VGG16, self).__init__()
        self.conv1_1 = rm.Conv2d(channel=64, padding=1, filter=3)
        self.conv1_2 = rm.Conv2d(channel=64, padding=1, filter=3)
        self.conv2_1 = rm.Conv2d(channel=128, padding=1, filter=3)
        self.conv2_2 = rm.Conv2d(channel=128, padding=1, filter=3)
        self.conv3_1 = rm.Conv2d(channel=256, padding=1, filter=3)
        self.conv3_2 = rm.Conv2d(channel=256, padding=1, filter=3)
        self.conv3_3 = rm.Conv2d(channel=256, padding=1, filter=3)
        self.conv4_1 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv4_2 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv4_3 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv5_1 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv5_2 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv5_3 = rm.Conv2d(channel=512, padding=1, filter=3)
        self._pool = rm.MaxPool2d(filter=2, stride=2)

        self.fc6 = rm.Dense(4096)
        self.fc7 = rm.Dense(4096)
        self.fc8 = rm.Dense(classes)
        self._dropout = rm.Dropout(dropout_ratio=0.5)

    def forward(self, x):
        c1_1 = rm.relu(self.conv1_1(x))
        c1_2 = rm.relu(self.conv1_2(c1_1))
        p1 = self._pool(c1_2)

        c2_1 = rm.relu(self.conv2_1(p1))
        c2_2 = rm.relu(self.conv2_2(c2_1))
        p2 = self._pool(c2_2)

        c3_1 = rm.relu(self.conv3_1(p2))
        c3_2 = rm.relu(self.conv3_2(c3_1))
        c3_3 = rm.relu(self.conv3_3(c3_2))
        p3 = self._pool(c3_3)

        c4_1 = rm.relu(self.conv4_1(p3))
        c4_2 = rm.relu(self.conv4_2(c4_1))
        c4_3 = rm.relu(self.conv4_3(c4_2))
        p4 = self._pool(c4_3)

        c5_1 = rm.relu(self.conv5_1(p4))
        c5_2 = rm.relu(self.conv5_2(c5_1))
        c5_3 = rm.relu(self.conv5_3(c5_2))
        p5 = self._pool(c5_3)

        fl = rm.flatten(p5)
        f6 = self._dropout(rm.relu(self.fc6(fl)))
        f7 = self._dropout(rm.relu(self.fc7(f6)))
        f8 = self.fc8(f7)
        return f8
In [3]:
class VGG16_without_top(rm.Model):

    def __init__(self, classes=10):
        super(VGG16_without_top, self).__init__()
        self.conv1_1 = rm.Conv2d(channel=64, padding=1, filter=3)
        self.conv1_2 = rm.Conv2d(channel=64, padding=1, filter=3)
        self.conv2_1 = rm.Conv2d(channel=128, padding=1, filter=3)
        self.conv2_2 = rm.Conv2d(channel=128, padding=1, filter=3)
        self.conv3_1 = rm.Conv2d(channel=256, padding=1, filter=3)
        self.conv3_2 = rm.Conv2d(channel=256, padding=1, filter=3)
        self.conv3_3 = rm.Conv2d(channel=256, padding=1, filter=3)
        self.conv4_1 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv4_2 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv4_3 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv5_1 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv5_2 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv5_3 = rm.Conv2d(channel=512, padding=1, filter=3)
        self._pool = rm.MaxPool2d(filter=2, stride=2)

    def forward(self, x):
        c1_1 = rm.relu(self.conv1_1(x))
        c1_2 = rm.relu(self.conv1_2(c1_1))
        p1 = self._pool(c1_2)

        c2_1 = rm.relu(self.conv2_1(p1))
        c2_2 = rm.relu(self.conv2_2(c2_1))
        p2 = self._pool(c2_2)

        c3_1 = rm.relu(self.conv3_1(p2))
        c3_2 = rm.relu(self.conv3_2(c3_1))
        c3_3 = rm.relu(self.conv3_3(c3_2))
        p3 = self._pool(c3_3)

        c4_1 = rm.relu(self.conv4_1(p3))
        c4_2 = rm.relu(self.conv4_2(c4_1))
        c4_3 = rm.relu(self.conv4_3(c4_2))
        p4 = self._pool(c4_3)

        c5_1 = rm.relu(self.conv5_1(p4))
        c5_2 = rm.relu(self.conv5_2(c5_1))
        c5_3 = rm.relu(self.conv5_3(c5_2))
        p5 = self._pool(c5_3)

        return p5
In [4]:
class VGG19(rm.Model):

    def __init__(self, classes=10):
        super(VGG19, self).__init__()
        self.conv1_1 = rm.Conv2d(channel=64, padding=1, filter=3)
        self.conv1_2 = rm.Conv2d(channel=64, padding=1, filter=3)
        self.conv2_1 = rm.Conv2d(channel=128, padding=1, filter=3)
        self.conv2_2 = rm.Conv2d(channel=128, padding=1, filter=3)
        self.conv3_1 = rm.Conv2d(channel=256, padding=1, filter=3)
        self.conv3_2 = rm.Conv2d(channel=256, padding=1, filter=3)
        self.conv3_3 = rm.Conv2d(channel=256, padding=1, filter=3)
        self.conv3_4 = rm.Conv2d(channel=256, padding=1, filter=3)
        self.conv4_1 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv4_2 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv4_3 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv4_4 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv5_1 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv5_2 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv5_3 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv5_4 = rm.Conv2d(channel=512, padding=1, filter=3)
        self._pool = rm.MaxPool2d(filter=2, stride=2)

        self.fc6 = rm.Dense(4096)
        self.fc7 = rm.Dense(4096)
        self.fc8 = rm.Dense(classes)
        self._dropout = rm.Dropout(dropout_ratio=0.5)

    def forward(self, x):
        c1_1 = rm.relu(self.conv1_1(x))
        c1_2 = rm.relu(self.conv1_2(c1_1))
        p1 = self._pool(c1_2)

        c2_1 = rm.relu(self.conv2_1(p1))
        c2_2 = rm.relu(self.conv2_2(c2_1))
        p2 = self._pool(c2_2)

        c3_1 = rm.relu(self.conv3_1(p2))
        c3_2 = rm.relu(self.conv3_2(c3_1))
        c3_3 = rm.relu(self.conv3_3(c3_2))
        c3_4 = rm.relu(self.conv3_4(c3_3))
        p3 = self._pool(c3_4)

        c4_1 = rm.relu(self.conv4_1(p3))
        c4_2 = rm.relu(self.conv4_2(c4_1))
        c4_3 = rm.relu(self.conv4_3(c4_2))
        c4_4 = rm.relu(self.conv4_4(c4_3))
        p4 = self._pool(c4_4)

        c5_1 = rm.relu(self.conv5_1(p4))
        c5_2 = rm.relu(self.conv5_2(c5_1))
        c5_3 = rm.relu(self.conv5_3(c5_2))
        c5_4 = rm.relu(self.conv5_4(c5_3))
        p5 = self._pool(c5_4)

        fl = rm.flatten(p5)
        f6 = self._dropout(rm.relu(self.fc6(fl)))
        f7 = self._dropout(rm.relu(self.fc7(f6)))
        f8 = self.fc8(f7)
        return f8
In [5]:
class VGG19_without_top(rm.Model):

    def __init__(self, classes=10):
        super(VGG19_without_top, self).__init__()
        self.conv1_1 = rm.Conv2d(channel=64, padding=1, filter=3)
        self.conv1_2 = rm.Conv2d(channel=64, padding=1, filter=3)
        self.conv2_1 = rm.Conv2d(channel=128, padding=1, filter=3)
        self.conv2_2 = rm.Conv2d(channel=128, padding=1, filter=3)
        self.conv3_1 = rm.Conv2d(channel=256, padding=1, filter=3)
        self.conv3_2 = rm.Conv2d(channel=256, padding=1, filter=3)
        self.conv3_3 = rm.Conv2d(channel=256, padding=1, filter=3)
        self.conv3_4 = rm.Conv2d(channel=256, padding=1, filter=3)
        self.conv4_1 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv4_2 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv4_3 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv4_4 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv5_1 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv5_2 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv5_3 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv5_4 = rm.Conv2d(channel=512, padding=1, filter=3)
        self._pool = rm.MaxPool2d(filter=2, stride=2)

    def forward(self, x):
        c1_1 = rm.relu(self.conv1_1(x))
        c1_2 = rm.relu(self.conv1_2(c1_1))
        p1 = self._pool(c1_2)

        c2_1 = rm.relu(self.conv2_1(p1))
        c2_2 = rm.relu(self.conv2_2(c2_1))
        p2 = self._pool(c2_2)

        c3_1 = rm.relu(self.conv3_1(p2))
        c3_2 = rm.relu(self.conv3_2(c3_1))
        c3_3 = rm.relu(self.conv3_3(c3_2))
        c3_4 = rm.relu(self.conv3_4(c3_3))
        p3 = self._pool(c3_4)

        c4_1 = rm.relu(self.conv4_1(p3))
        c4_2 = rm.relu(self.conv4_2(c4_1))
        c4_3 = rm.relu(self.conv4_3(c4_2))
        c4_4 = rm.relu(self.conv4_4(c4_3))
        p4 = self._pool(c4_4)

        c5_1 = rm.relu(self.conv5_1(p4))
        c5_2 = rm.relu(self.conv5_2(c5_1))
        c5_3 = rm.relu(self.conv5_3(c5_2))
        c5_4 = rm.relu(self.conv5_4(c5_3))
        p5 = self._pool(c5_4)

        return p5

caffemodelのReNomモデルへの変換

このステップでは,ダウンロードしたcaffemodelを上記のReNomモデルに変換します.予め,”VGG_ILSVRC_16(19)_layers_deploy.prototxt”と”VGG_ILSVRC_16(19)_layers.caffemodel”を こちら からダウンロードしてください.

In [6]:
def caffe2renom_vgg(model="VGG16", with_top=True):
    if model == "VGG16":
        net = caffe.Net("./VGG_ILSVRC_16_layers_deploy.prototxt", caffe.TEST)
        net.copy_from("./VGG_ILSVRC_16_layers.caffemodel")
        if with_top:
            model = VGG16()
        else:
            model = VGG16_without_top()
    elif model == "VGG19":
        net = caffe.Net("./VGG_ILSVRC_19_layers_deploy.prototxt", caffe.TEST)
        net.copy_from("./VGG_ILSVRC_19_layers.caffemodel")
        if with_top:
            model = VGG19()
        else:
            model = VGG19_without_top()

    name_layers = net.params.keys()
    for name in name_layers:
        try:
            layer = getattr(model, name)
        except AttributeError:
            continue

        if name[:2] == "fc":
            layer.params['w'] = rm.Variable(net.params[name][0].data.T)
            layer.params['b'] = rm.Variable(net.params[name][1].\
                                    data[np.newaxis,:])
        else:
            layer.params['w'] = rm.Variable(net.params[name][0].data)
            layer.params['b'] = rm.Variable(net.params[name][1].\
                                data[np.newaxis,:,np.newaxis,np.newaxis])

    return model
In [7]:
model16 = caffe2renom_vgg(model="VGG16", with_top=True)
model16_without_top = caffe2renom_vgg(model="VGG16", with_top=False)
#model19 = caffe2renom_vgg(model="VGG19", with_top=True)
#model19_without_top = caffe2renom_vgg(model="VGG19", with_top=False)

読み込んだモデルの確認

モデルの検証のため,下記のように分類のテストを行います.

In [8]:
model16.set_models(inference=True)
#model19.set_models(inference=True)
In [9]:
# Load and Preprocessing the image.
img_test = Image.open("./cat.jpg")
img_test_array = np.asarray(img_test.resize((224, 224)), dtype='float32')
img_test_array[:,:,0] -= 103.939
img_test_array[:,:,1] -= 116.779
img_test_array[:,:,2] -= 123.68
img_test_array = img_test_array.transpose((2,0,1))
img_test_array = np.expand_dims(img_test_array, axis=0)

# Test pretrained model
pred16 = np.argmax(model16(img_test_array).as_ndarray(), axis=1)
#pred19 = np.argmax(model19(img_test_array).as_ndarray(), axis=1)
print(pred16)
#print(pred19)
plt.imshow(img_test)
plt.title('input image')
plt.axis('off')
plt.show()
[281]
[281]
../../../_images/notebooks_image_processing_caffe2renom_notebook_13_1.png

ラベルID281はImageNetデータセットにおいて’tabby cat’(トラネコ)を示すので,モデルは正常に変換できているようです.

モデルの保存と読み込み

一旦ReNomモデルを作成してしまえば,HDF5データフォーマットによって重みを保存したりロードしたりすることができます.

In [10]:
# Saving
model16.save("weights_vgg16.h5")
model16_without_top.save("weights_vgg16_without_top.h5")
#model19.save("weights_vgg19.h5")
#model19_without_top.save("weights_vgg19_without_top.h5")
In [11]:
# Loading
model = VGG16()
model.load("weights_vgg16.h5")

より詳しく知りたい方は, 重みパラメータの保存と読み込み を参照してください.

参考文献

[1] K. Simonyan, A. Zisserman, Very Deep Convolutional Networks for Large-Scale Image Recognition, arXiv:1409.1556