Using Pre-trained Caffe model in ReNom

Loading the weights of pre-trained Caffe model into ReNom model.

A lot of pre-trained neural network models are available on Model Zoo . Using these models, we can easily reproduce the results of papers or use them for other tasks. The models are defined as caffemodel binary files, so we need to convert them to ReNom models.

In this tutorial, we introduce how to load the caffemodel into ReNom. As an example, we focus on the VGG network models[1]. VGG models are trained based on more than a million images and can be used for image classification tasks.

Required Libraries

For parsing caffemodel files, we need to install Caffe with Python interface. For installation, please see http://caffe.berkeleyvision.org/installation.html .
Please note that we’ll impletent the function to load caffemodel without Caffe soon. Until then, this converting procedure might be useful.
In [1]:
import caffe
import renom as rm
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

Define ReNom models

We define VGG models (VGG16/VGG19) as the function model of ReNom.
If you want to use only convolution layers of VGG, “without_top” models are used.
In [2]:
class VGG16(rm.Model):

    def __init__(self, classes=10):
        super(VGG16, self).__init__()
        self.conv1_1 = rm.Conv2d(channel=64, padding=1, filter=3)
        self.conv1_2 = rm.Conv2d(channel=64, padding=1, filter=3)
        self.conv2_1 = rm.Conv2d(channel=128, padding=1, filter=3)
        self.conv2_2 = rm.Conv2d(channel=128, padding=1, filter=3)
        self.conv3_1 = rm.Conv2d(channel=256, padding=1, filter=3)
        self.conv3_2 = rm.Conv2d(channel=256, padding=1, filter=3)
        self.conv3_3 = rm.Conv2d(channel=256, padding=1, filter=3)
        self.conv4_1 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv4_2 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv4_3 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv5_1 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv5_2 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv5_3 = rm.Conv2d(channel=512, padding=1, filter=3)
        self._pool = rm.MaxPool2d(filter=2, stride=2)

        self.fc6 = rm.Dense(4096)
        self.fc7 = rm.Dense(4096)
        self.fc8 = rm.Dense(classes)
        self._dropout = rm.Dropout(dropout_ratio=0.5)

    def forward(self, x):
        c1_1 = rm.relu(self.conv1_1(x))
        c1_2 = rm.relu(self.conv1_2(c1_1))
        p1 = self._pool(c1_2)

        c2_1 = rm.relu(self.conv2_1(p1))
        c2_2 = rm.relu(self.conv2_2(c2_1))
        p2 = self._pool(c2_2)

        c3_1 = rm.relu(self.conv3_1(p2))
        c3_2 = rm.relu(self.conv3_2(c3_1))
        c3_3 = rm.relu(self.conv3_3(c3_2))
        p3 = self._pool(c3_3)

        c4_1 = rm.relu(self.conv4_1(p3))
        c4_2 = rm.relu(self.conv4_2(c4_1))
        c4_3 = rm.relu(self.conv4_3(c4_2))
        p4 = self._pool(c4_3)

        c5_1 = rm.relu(self.conv5_1(p4))
        c5_2 = rm.relu(self.conv5_2(c5_1))
        c5_3 = rm.relu(self.conv5_3(c5_2))
        p5 = self._pool(c5_3)

        fl = rm.flatten(p5)
        f6 = self._dropout(rm.relu(self.fc6(fl)))
        f7 = self._dropout(rm.relu(self.fc7(f6)))
        f8 = self.fc8(f7)
        return f8
In [3]:
class VGG16_without_top(rm.Model):

    def __init__(self, classes=10):
        super(VGG16_without_top, self).__init__()
        self.conv1_1 = rm.Conv2d(channel=64, padding=1, filter=3)
        self.conv1_2 = rm.Conv2d(channel=64, padding=1, filter=3)
        self.conv2_1 = rm.Conv2d(channel=128, padding=1, filter=3)
        self.conv2_2 = rm.Conv2d(channel=128, padding=1, filter=3)
        self.conv3_1 = rm.Conv2d(channel=256, padding=1, filter=3)
        self.conv3_2 = rm.Conv2d(channel=256, padding=1, filter=3)
        self.conv3_3 = rm.Conv2d(channel=256, padding=1, filter=3)
        self.conv4_1 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv4_2 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv4_3 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv5_1 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv5_2 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv5_3 = rm.Conv2d(channel=512, padding=1, filter=3)
        self._pool = rm.MaxPool2d(filter=2, stride=2)

    def forward(self, x):
        c1_1 = rm.relu(self.conv1_1(x))
        c1_2 = rm.relu(self.conv1_2(c1_1))
        p1 = self._pool(c1_2)

        c2_1 = rm.relu(self.conv2_1(p1))
        c2_2 = rm.relu(self.conv2_2(c2_1))
        p2 = self._pool(c2_2)

        c3_1 = rm.relu(self.conv3_1(p2))
        c3_2 = rm.relu(self.conv3_2(c3_1))
        c3_3 = rm.relu(self.conv3_3(c3_2))
        p3 = self._pool(c3_3)

        c4_1 = rm.relu(self.conv4_1(p3))
        c4_2 = rm.relu(self.conv4_2(c4_1))
        c4_3 = rm.relu(self.conv4_3(c4_2))
        p4 = self._pool(c4_3)

        c5_1 = rm.relu(self.conv5_1(p4))
        c5_2 = rm.relu(self.conv5_2(c5_1))
        c5_3 = rm.relu(self.conv5_3(c5_2))
        p5 = self._pool(c5_3)

        return p5
In [4]:
class VGG19(rm.Model):

    def __init__(self, classes=10):
        super(VGG19, self).__init__()
        self.conv1_1 = rm.Conv2d(channel=64, padding=1, filter=3)
        self.conv1_2 = rm.Conv2d(channel=64, padding=1, filter=3)
        self.conv2_1 = rm.Conv2d(channel=128, padding=1, filter=3)
        self.conv2_2 = rm.Conv2d(channel=128, padding=1, filter=3)
        self.conv3_1 = rm.Conv2d(channel=256, padding=1, filter=3)
        self.conv3_2 = rm.Conv2d(channel=256, padding=1, filter=3)
        self.conv3_3 = rm.Conv2d(channel=256, padding=1, filter=3)
        self.conv3_4 = rm.Conv2d(channel=256, padding=1, filter=3)
        self.conv4_1 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv4_2 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv4_3 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv4_4 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv5_1 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv5_2 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv5_3 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv5_4 = rm.Conv2d(channel=512, padding=1, filter=3)
        self._pool = rm.MaxPool2d(filter=2, stride=2)

        self.fc6 = rm.Dense(4096)
        self.fc7 = rm.Dense(4096)
        self.fc8 = rm.Dense(classes)
        self._dropout = rm.Dropout(dropout_ratio=0.5)

    def forward(self, x):
        c1_1 = rm.relu(self.conv1_1(x))
        c1_2 = rm.relu(self.conv1_2(c1_1))
        p1 = self._pool(c1_2)

        c2_1 = rm.relu(self.conv2_1(p1))
        c2_2 = rm.relu(self.conv2_2(c2_1))
        p2 = self._pool(c2_2)

        c3_1 = rm.relu(self.conv3_1(p2))
        c3_2 = rm.relu(self.conv3_2(c3_1))
        c3_3 = rm.relu(self.conv3_3(c3_2))
        c3_4 = rm.relu(self.conv3_4(c3_3))
        p3 = self._pool(c3_4)

        c4_1 = rm.relu(self.conv4_1(p3))
        c4_2 = rm.relu(self.conv4_2(c4_1))
        c4_3 = rm.relu(self.conv4_3(c4_2))
        c4_4 = rm.relu(self.conv4_4(c4_3))
        p4 = self._pool(c4_4)

        c5_1 = rm.relu(self.conv5_1(p4))
        c5_2 = rm.relu(self.conv5_2(c5_1))
        c5_3 = rm.relu(self.conv5_3(c5_2))
        c5_4 = rm.relu(self.conv5_4(c5_3))
        p5 = self._pool(c5_4)

        fl = rm.flatten(p5)
        f6 = self._dropout(rm.relu(self.fc6(fl)))
        f7 = self._dropout(rm.relu(self.fc7(f6)))
        f8 = self.fc8(f7)
        return f8
In [5]:
class VGG19_without_top(rm.Model):

    def __init__(self, classes=10):
        super(VGG19_without_top, self).__init__()
        self.conv1_1 = rm.Conv2d(channel=64, padding=1, filter=3)
        self.conv1_2 = rm.Conv2d(channel=64, padding=1, filter=3)
        self.conv2_1 = rm.Conv2d(channel=128, padding=1, filter=3)
        self.conv2_2 = rm.Conv2d(channel=128, padding=1, filter=3)
        self.conv3_1 = rm.Conv2d(channel=256, padding=1, filter=3)
        self.conv3_2 = rm.Conv2d(channel=256, padding=1, filter=3)
        self.conv3_3 = rm.Conv2d(channel=256, padding=1, filter=3)
        self.conv3_4 = rm.Conv2d(channel=256, padding=1, filter=3)
        self.conv4_1 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv4_2 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv4_3 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv4_4 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv5_1 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv5_2 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv5_3 = rm.Conv2d(channel=512, padding=1, filter=3)
        self.conv5_4 = rm.Conv2d(channel=512, padding=1, filter=3)
        self._pool = rm.MaxPool2d(filter=2, stride=2)

    def forward(self, x):
        c1_1 = rm.relu(self.conv1_1(x))
        c1_2 = rm.relu(self.conv1_2(c1_1))
        p1 = self._pool(c1_2)

        c2_1 = rm.relu(self.conv2_1(p1))
        c2_2 = rm.relu(self.conv2_2(c2_1))
        p2 = self._pool(c2_2)

        c3_1 = rm.relu(self.conv3_1(p2))
        c3_2 = rm.relu(self.conv3_2(c3_1))
        c3_3 = rm.relu(self.conv3_3(c3_2))
        c3_4 = rm.relu(self.conv3_4(c3_3))
        p3 = self._pool(c3_4)

        c4_1 = rm.relu(self.conv4_1(p3))
        c4_2 = rm.relu(self.conv4_2(c4_1))
        c4_3 = rm.relu(self.conv4_3(c4_2))
        c4_4 = rm.relu(self.conv4_4(c4_3))
        p4 = self._pool(c4_4)

        c5_1 = rm.relu(self.conv5_1(p4))
        c5_2 = rm.relu(self.conv5_2(c5_1))
        c5_3 = rm.relu(self.conv5_3(c5_2))
        c5_4 = rm.relu(self.conv5_4(c5_3))
        p5 = self._pool(c5_4)

        return p5

Convert caffemodel to ReNom model

In this step, we convert the downloaded caffemodels to above ReNom models. Please download “VGG_ILSVRC_16(19)_layers_deploy.prototxt” and “VGG_ILSVRC_16(19)_layers.caffemodel” from here in advance.

In [6]:
def caffe2renom_vgg(model="VGG16", with_top=True):
    if model == "VGG16":
        net = caffe.Net("./VGG_ILSVRC_16_layers_deploy.prototxt", caffe.TEST)
        net.copy_from("./VGG_ILSVRC_16_layers.caffemodel")
        if with_top:
            model = VGG16()
        else:
            model = VGG16_without_top()
    elif model == "VGG19":
        net = caffe.Net("./VGG_ILSVRC_19_layers_deploy.prototxt", caffe.TEST)
        net.copy_from("./VGG_ILSVRC_19_layers.caffemodel")
        if with_top:
            model = VGG19()
        else:
            model = VGG19_without_top()

    name_layers = net.params.keys()
    for name in name_layers:
        try:
            layer = getattr(model, name)
        except AttributeError:
            continue

        if name[:2] == "fc":
            layer.params['w'] = rm.Variable(net.params[name][0].data.T)
            layer.params['b'] = rm.Variable(net.params[name][1].\
                                    data[np.newaxis,:])
        else:
            layer.params['w'] = rm.Variable(net.params[name][0].data)
            layer.params['b'] = rm.Variable(net.params[name][1].\
                                data[np.newaxis,:,np.newaxis,np.newaxis])

    return model
In [7]:
model16 = caffe2renom_vgg(model="VGG16", with_top=True)
model16_without_top = caffe2renom_vgg(model="VGG16", with_top=False)
#model19 = caffe2renom_vgg(model="VGG19", with_top=True)
#model19_without_top = caffe2renom_vgg(model="VGG19", with_top=False)

Check the models loaded

For validation, we’ll do a classification test as below.

In [8]:
model16.set_models(inference=True)
#model19.set_models(inference=True)
In [9]:
# Load and Preprocessing the image.
img_test = Image.open("./cat.jpg")
img_test_array = np.asarray(img_test.resize((224, 224)), dtype='float32')
img_test_array[:,:,0] -= 103.939
img_test_array[:,:,1] -= 116.779
img_test_array[:,:,2] -= 123.68
img_test_array = img_test_array.transpose((2,0,1))
img_test_array = np.expand_dims(img_test_array, axis=0)

# Test pretrained model
pred16 = np.argmax(model16(img_test_array).as_ndarray(), axis=1)
#pred19 = np.argmax(model19(img_test_array).as_ndarray(), axis=1)
print(pred16)
#print(pred19)
plt.imshow(img_test)
plt.title('input image')
plt.axis('off')
plt.show()
[281]
[281]
../../../_images/notebooks_image_processing_caffe2renom_notebook_13_1.png

Label ID 281 corresponds to ‘tabby cat’ in ImageNet dataset, so the models are successfully converted.

Saving and Loading Models

Once we make ReNom models, we can save and load the weights as HDF5 data format.

In [10]:
# Saving
model16.save("weights_vgg16.h5")
model16_without_top.save("weights_vgg16_without_top.h5")
#model19.save("weights_vgg19.h5")
#model19_without_top.save("weights_vgg19_without_top.h5")
In [11]:
# Loading
model = VGG16()
model.load("weights_vgg16.h5")

If you want to know more detail, please see the tutorial Saving and Loading Models .

References

[1] K. Simonyan, A. Zisserman, Very Deep Convolutional Networks for Large-Scale Image Recognition, arXiv:1409.1556