写在前面
上一篇文章介绍了如何使用Keras实现经典的AlexNet(12年ImageNet冠军),VGGNet也是最经典的网络结构之一,获得了14年的ImageNet亚军。VGGNet的结构非常简洁,整个网络都使用了同样大小的3*3卷积核尺寸和2*2最大池化尺寸。
技术要点
一、使用了更小的3*3卷积核,和更深的网络。两个3*3卷积核的堆叠相对于5*5卷积核的视野,三个3*3卷积核的堆叠相当于7*7卷积核的视野。这样一方面可以有更少的参数(3个堆叠的3*3结构只有7*7结构参数数量的(3*3*3)/(7*7)=55%);另一方面拥有更多的非线性变换,增加了CNN对特征的学习能力。
二、在VGGNet的C结构中,引入1*1的卷积核,在不影响输入输出维度的情况下,引入非线性变换,增加网络的表达能力。
三、训练时,先训练级别简单的VGGNet的A级网络,然后使用A网络的权重来初始化后面的复杂模型,加快训练的收敛速度。
四、采用了Multi-Scale的方法来训练和预测。可以增加训练的数据量,防止模型过拟合,提升预测准确率。
网络结构
代码
#-*- coding: UTF-8 -*-
"""
Author: lanbing510
Environment: Keras2.0.5,Python2.7
Model: VGGNet-19
"""
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D
from keras.layers import Flatten, Dense, Dropout
from keras.layers import Input
from keras.models import Model
from keras import regularizers
from keras.utils import plot_model
from KerasLayers.Custom_layers import LRN2D
# Global Constants
NB_CLASS=1000
LEARNING_RATE=0.01
MOMENTUM=0.9
ALPHA=0.0001
BETA=0.75
GAMMA=0.1
DROPOUT=0.5
WEIGHT_DECAY=0.0005
LRN2D_NORM=True
DATA_FORMAT='channels_last' # Theano:'channels_first' Tensorflow:'channels_last'
def conv2D_lrn2d(x,filters,kernel_size,strides=(1,1),padding='same',data_format=DATA_FORMAT,dilation_rate=(1,1),activation='relu',use_bias=True,kernel_initializer='glorot_uniform',bias_initializer='zeros',kernel_regularizer=None,bias_regularizer=None,activity_regularizer=None,kernel_constraint=None,bias_constraint=None,lrn2d_norm=LRN2D_NORM,weight_decay=WEIGHT_DECAY):
if weight_decay:
kernel_regularizer=regularizers.l2(weight_decay)
bias_regularizer=regularizers.l2(weight_decay)
else:
kernel_regularizer=None
bias_regularizer=None
x=Conv2D(filters=filters,kernel_size=kernel_size,strides=strides,padding=padding,data_format=data_format,dilation_rate=dilation_rate,activation=activation,use_bias=use_bias,kernel_initializer=kernel_initializer,bias_initializer=bias_initializer,kernel_regularizer=kernel_regularizer,bias_regularizer=bias_regularizer,activity_regularizer=activity_regularizer,kernel_constraint=kernel_constraint,bias_constraint=bias_constraint)(x)
if lrn2d_norm:
x=LRN2D(alpha=ALPHA,beta=BETA)(x)
return x
def create_model():
if DATA_FORMAT=='channels_first':
INP_SHAPE=(3,224,224)
img_input=Input(shape=INP_SHAPE)
CONCAT_AXIS=1
elif DATA_FORMAT=='channels_last':
INP_SHAPE=(224,224,3)
img_input=Input(shape=INP_SHAPE)
CONCAT_AXIS=3
else:
raise Exception('Invalid Dim Ordering: '+str(DIM_ORDERING))
# Convolution Net Layer 1~2
x=conv2D_lrn2d(img_input,64,(3,3),1,padding='same',lrn2d_norm=False)
x=conv2D_lrn2d(x,64,(3,3),1,padding='same',lrn2d_norm=False)
x=MaxPooling2D(pool_size=(2,2),strides=2,padding='valid',data_format=DATA_FORMAT)(x)
# Convolution Net Layer 3~4
x=conv2D_lrn2d(x,128,(3,3),1,padding='same',lrn2d_norm=False)
x=conv2D_lrn2d(x,128,(3,3),1,padding='same',lrn2d_norm=False)
x=MaxPooling2D(pool_size=(2,2),strides=2,padding='valid',data_format=DATA_FORMAT)(x)
# Convolution Net Layer 5~8
x=conv2D_lrn2d(x,256,(3,3),1,padding='same',lrn2d_norm=False)
x=conv2D_lrn2d(x,256,(3,3),1,padding='same',lrn2d_norm=False)
x=conv2D_lrn2d(x,256,(3,3),1,padding='same',lrn2d_norm=False)
x=conv2D_lrn2d(x,256,(3,3),1,padding='same',lrn2d_norm=False)
x=MaxPooling2D(pool_size=(2,2),strides=2,padding='valid',data_format=DATA_FORMAT)(x)
# Convolution Net Layer 9~12
x=conv2D_lrn2d(x,512,(3,3),1,padding='same',lrn2d_norm=False)
x=conv2D_lrn2d(x,512,(3,3),1,padding='same',lrn2d_norm=False)
x=conv2D_lrn2d(x,512,(3,3),1,padding='same',lrn2d_norm=False)
x=conv2D_lrn2d(x,512,(3,3),1,padding='same',lrn2d_norm=False)
x=MaxPooling2D(pool_size=(2,2),strides=2,padding='valid',data_format=DATA_FORMAT)(x)
# Convolution Net Layer 13~16
x=conv2D_lrn2d(x,512,(3,3),1,padding='same',lrn2d_norm=False)
x=conv2D_lrn2d(x,512,(3,3),1,padding='same',lrn2d_norm=False)
x=conv2D_lrn2d(x,512,(3,3),1,padding='same',lrn2d_norm=False)
x=conv2D_lrn2d(x,512,(3,3),1,padding='same',lrn2d_norm=False)
x=MaxPooling2D(pool_size=(2,2),strides=2,padding='valid',data_format=DATA_FORMAT)(x)
# Convolution Net Layer 17
x=Flatten()(x)
x=Dense(4096,activation='relu')(x)
x=Dropout(DROPOUT)(x)
# Convolution Net Layer 18
x=Dense(4096,activation='relu')(x)
x=Dropout(DROPOUT)(x)
# Convolution Net Layer 19
x=Dense(output_dim=NB_CLASS,activation='softmax')(x)
return x,img_input,CONCAT_AXIS,INP_SHAPE,DATA_FORMAT
def check_print():
# Create the Model
x,img_input,CONCAT_AXIS,INP_SHAPE,DATA_FORMAT=create_model()
# Create a Keras Model
model=Model(input=img_input,output=[x])
model.summary()
# Save a PNG of the Model Build
plot_model(model,to_file='VGGNet.png')
model.compile(optimizer='rmsprop',loss='categorical_crossentropy')
print 'Model Compiled'
if __name__=='__main__':
check_print()
参考文献
[1] Simonyan, Karen, and Andrew Zisserman. "Very deep convolutional networks for large-scale image recognition." arXiv preprint arXiv:1409.1556 (2014).
[2] VGGNet Progect