# 正規化と標準化 ¶

しかし、正規化や標準化を用いることで図にあるようなerror surfaceの形を円に近づけることができて、勾配法によって正しく損失関数が最小化される方向に重みが更新できることが期待されます。

## Required Libaries ¶

• matplotlib 2.0.2
• numpy 1.12.1
• scikit-learn 0.18.2
• pandas 0.20.3
from __future__ import division, print_function
import numpy as np
import pandas as pd

import renom as rm
from renom.cuda import set_cuda_active
from sklearn.preprocessing import LabelBinarizer, label_binarize
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
# If you would like to use GPU computing, set this to True. Otherwise, False.
set_cuda_active(False)


## Make Data ¶

The reference of the dataset is below.

MAGIC Gamma Telescope Data Set, R. K. Bock Major Atmospheric Gamma Imaging Cherenkov Telescope project (MAGIC),
P. Savicky Institute of Computer Science, AS of CR Czech Republic.
df = pd.read_csv("magic04.data",header=None)
X = df.drop(10,axis=1).values.astype(float)
y = df.iloc[:,10].replace("g",1).replace("h",0).values.astype(float).reshape(-1,1)
print("X:{} y:{}".format(X.shape, y.shape))

X:(19020, 10) y:(19020, 1)

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)
print("X_train:{} y_train:{} X_test:{} y_test:{}".format(X_train.shape, y_train.shape, X_test.shape, y_test.shape))

X_train:(15216, 10) y_train:(15216, 1) X_test:(3804, 10) y_test:(3804, 1)


## 標準化をしないケース ¶

sequential = rm.Sequential([
rm.Dense(64),
rm.Relu(),
rm.Dense(32),
rm.Relu(),
rm.Dense(1)
])

batch_size = 128
epoch = 15
N = len(X_train)
optimizer = Sgd(lr=0.01)
learning_curve = []
test_learning_curve = []

for i in range(epoch):
perm = np.random.permutation(N)
loss = 0
for j in range(0, N // batch_size):
train_batch = X_train[perm[j*batch_size : (j+1)*batch_size]]
response_batch = y_train[perm[j*batch_size : (j+1)*batch_size]]

with sequential.train():
z = sequential(train_batch)
l = rm.sigmoid_cross_entropy(z, response_batch)
loss += l.as_ndarray()
train_loss = loss / (N // batch_size)
z_test = sequential(X_test)
test_loss = rm.sigmoid_cross_entropy(z_test, y_test).as_ndarray()
test_learning_curve.append(test_loss)
learning_curve.append(train_loss)
print("epoch:{:03d}, train_loss:{:.4f}, test_loss:{:.4f}".format(i, float(train_loss), float(test_loss)))

epoch:000, train_loss:0.6906, test_loss:0.5011
epoch:001, train_loss:0.4930, test_loss:0.4914
epoch:002, train_loss:0.4805, test_loss:0.4932
epoch:003, train_loss:0.4707, test_loss:0.4717
epoch:004, train_loss:0.4669, test_loss:0.4788
epoch:005, train_loss:0.4612, test_loss:0.4841
epoch:006, train_loss:0.4542, test_loss:0.4646
epoch:007, train_loss:0.4510, test_loss:0.4687
epoch:008, train_loss:0.4503, test_loss:0.4518
epoch:009, train_loss:0.4425, test_loss:0.4446
epoch:010, train_loss:0.4415, test_loss:0.4811
epoch:011, train_loss:0.4392, test_loss:0.4473
epoch:012, train_loss:0.4357, test_loss:0.4441
epoch:013, train_loss:0.4341, test_loss:0.4414
epoch:014, train_loss:0.4351, test_loss:0.4362


## 標準化をするケース ¶

$$X\_new = \frac{X\_old - X\_mean}{X\_std}$$
X_train_mean = np.mean(X_train,axis=0)
X_train_std = np.std(X_train, axis=0)
X_train = (X_train - X_train_mean) / X_train_std

X_test = (X_test - X_train_mean) / X_train_std
print(X_train)

[[-0.58201867 -0.1658305  -0.18455665 ..., -0.59053373  2.37721221
-1.23456824]
[ 0.13326953  0.22006933  1.50925445 ..., -0.84979314 -0.98602078
0.17031323]
[-0.75331084 -0.42985235 -0.35030862 ..., -0.16949744 -1.02206549
0.58497995]
...,
[-0.78272801 -0.80924514 -1.47088532 ...,  0.2453504  -0.67826979
-1.19999152]
[-0.15077038  0.77937076  1.84861092 ...,  0.67049787 -0.88432171
-1.11888926]
[-0.76934393 -0.36479821 -0.26011069 ..., -0.43940831 -0.01727761
-1.95602149]]

sequential = rm.Sequential([
rm.Dense(64),
rm.Relu(),
rm.Dense(32),
rm.Relu(),
rm.Dense(1)
])

batch_size = 128
epoch = 15
N = len(X_train)
optimizer = Sgd(lr=0.01)
learning_curve = []
test_learning_curve = []

for i in range(epoch):
perm = np.random.permutation(N)
loss = 0
for j in range(0, N // batch_size):
train_batch = X_train[perm[j*batch_size : (j+1)*batch_size]]
response_batch = y_train[perm[j*batch_size : (j+1)*batch_size]]

with sequential.train():
z = sequential(train_batch)
l = rm.sigmoid_cross_entropy(z, response_batch)
loss += l.as_ndarray()
train_loss = loss / (N // batch_size)
z_test = sequential(X_test)
test_loss = rm.sigmoid_cross_entropy(z_test, y_test).as_ndarray()
test_learning_curve.append(test_loss)
learning_curve.append(train_loss)
print("epoch:{:03d}, train_loss:{:.4f}, test_loss:{:.4f}".format(i, float(train_loss), float(test_loss)))

epoch:000, train_loss:0.6376, test_loss:0.5432
epoch:001, train_loss:0.5090, test_loss:0.4764
epoch:002, train_loss:0.4617, test_loss:0.4439
epoch:003, train_loss:0.4365, test_loss:0.4244
epoch:004, train_loss:0.4204, test_loss:0.4116
epoch:005, train_loss:0.4090, test_loss:0.4021
epoch:006, train_loss:0.3999, test_loss:0.3945
epoch:007, train_loss:0.3930, test_loss:0.3881
epoch:008, train_loss:0.3872, test_loss:0.3831
epoch:009, train_loss:0.3817, test_loss:0.3784
epoch:010, train_loss:0.3761, test_loss:0.3747
epoch:011, train_loss:0.3717, test_loss:0.3709
epoch:012, train_loss:0.3687, test_loss:0.3675
epoch:013, train_loss:0.3648, test_loss:0.3647
epoch:014, train_loss:0.3614, test_loss:0.3618

2つの例を比べて、標準化を使ったケースのほうが損失が下がるスピードが若干速かったかと思います。