tensorflow使用较为底层的方式复现VGG16
一般在网络上看到一些专业人士写的demo,要么看不懂,要么封装特别好使可移植性减弱。
为了巩固自己对经典网络的认识,我觉得用tensorflow以及tf.nn集成的库对经典网络进行复现,VGG作为最经典网络之一成为我的首要选择。
这是论文中的结构示意图,绿框中的是最后的网络结构,论文中提及,每一个卷积后隐层后加上relu激活函数。每次步幅为1,卷积后大小不变。
这里优化函数什么的我就按现在最先进的定义,tensorflow的函数不使用slim或keras。
附上demo:
# -*- coding: utf-8 -*-"""
Created on Wed Mar 19 2019
@author: Ruoyu Chen
The VGG16 networks
"""
import tensorflow as tf
import numpy as np
import os
# REGULARIZER = 0.01
BATCH_SIZE = 10
def weight_variable(shape, name=None):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial, name)
def bias_variable(shape,name=None):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial, name)
def conv2d(input, filter, name=None):
# filters with shape [filter_height * filter_width * in_channels, output_channels]
# Must have strides[0] = strides[3] =1
# For the most common case of the same horizontal and vertices strides, strides = [1, stride, stride, 1]
'''
Args:
input: A Tensor. Must be one of the following types: float32, float64.
filter: A Tensor. Must have the same type as input.
strides: A list of ints. 1-D of length 4. The stride of the sliding window for each dimension of input.
padding: A string from: "SAME", "VALID". The type of padding algorithm to use.
use_cudnn_on_gpu: An optional bool. Defaults to True.
name: A name for the operation (optional).
'''
return tf.nn.conv2d(input, filter, strides=[1,1,1,1], padding="SAME", name=name) # padding="SAME"用零填充边界
def max_pool_2x2(input, name):
return tf.nn.max_pool(input, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME", name=name)
def VGG16(input, keep_prob):
# input_size:(224,224,3)
kernel_1 = weight_variable([3,3,3,64], name = 'kernel_1')
kernel_2 = weight_variable([3,3,64,64], name = 'kernel_2')
bias_1 = bias_variable([64],name='bias_1')
bias_2 = bias_variable([64],name='bias_2')
conv_layer_1 = conv2d(input, kernel_1, name = 'conv_layer_1') + bias_1 # size = (224,224,64)
layer_1 = tf.nn.relu(conv_layer_1, name = 'layer_1')
conv_layer_2 = conv2d(layer_1, kernel_2, name = 'cov_layer_2') + bias_2 # size = (224,224,64)
layer_2 = tf.nn.relu(conv_layer_2, name = 'layer_2')
maxpool_1 = max_pool_2x2(layer_2, name = 'maxpool_1') # size = (112,112,64)
kernel_3 = weight_variable((3,3,64,128), name = 'kernel_3')
kernel_4 = weight_variable((3,3,128,128), name = 'kernel_4')
bias_3 = bias_variable([128],name='bias_3')
bias_4 = bias_variable([128],name='bias_4')
conv_layer_3 = conv2d(maxpool_1, kernel_3, name = 'conv_layer_3') + bias_3 # size = (112,112,128)
layer_3 = tf.nn.relu(conv_layer_3, name = 'layer_3')
conv_layer_4 = conv2d(layer_3, kernel_4, name = 'conv_layer_4') + bias_4 # size = (112,112,128)
layer_4 = tf.nn.relu(conv_layer_4, name = 'layer_4')
maxpool_2 = max_pool_2x2(layer_4, name = 'maxpool_2') # size = (56,56,128)
kernel_5 = weight_variable((3,3,128,256), name = 'kernel_5')
kernel_6 = weight_variable((3,3,256,256), name = 'kernel_6')
kernel_7 = weight_variable((1,1,256,256), name = 'kernel_7')
bias_5 = bias_variable([256],name='bias_5')
bias_6 = bias_variable([256],name='bias_6')
bias_7 = bias_variable([256],name='bias_7')
conv_layer_5 = conv2d(maxpool_2, kernel_5, name = 'conv_layer_5') + bias_5 # size = (56,56,256)
layer_5 = tf.nn.relu(conv_layer_5, name = 'layer_5')
conv_layer_6 = conv2d(layer_5, kernel_6, name = 'conv_layer_6') + bias_6 # size = (56,56,256)
layer_6 = tf.nn.relu(conv_layer_6, name = 'layer_6')
conv_layer_7 = conv2d(layer_6, kernel_7, name = 'conv_layer_7') + bias_7 # size = (56,56,256)
layer_7 = tf.nn.relu(conv_layer_7, name = 'layer_7')
maxpool_3 = max_pool_2x2(layer_7, name = 'maxpool_3') # size = (28,28,256)
kernel_8 = weight_variable((3,3,256,512), name = 'kernel_8')
kernel_9 = weight_variable((3,3,512,512), name = 'kernel_9')
kernel_10 = weight_variable((1,1,512,512), name = 'kernel_10')
bias_8 = bias_variable([512],name='bias_8')
bias_9 = bias_variable([512],name='bias_9')
bias_10 = bias_variable([512],name='bias_10')
conv_layer_8 = conv2d(maxpool_3, kernel_8, name = 'conv_layer_8') + bias_8 # size = (28,28,512)
layer_8 = tf.nn.relu(conv_layer_8, name = 'layer_8')
conv_layer_9 = conv2d(layer_8, kernel_9, name = 'conv_layer_9') + bias_9 # size = (28,28,512)
layer_9 = tf.nn.relu(conv_layer_9, name = 'layer_9')
conv_layer_10 = conv2d(layer_9, kernel_10, name = 'conv_layer_10') + bias_10 # size = (28,28,512)
layer_10 = tf.nn.relu(conv_layer_10, name = 'layer_10')
maxpool_4 = max_pool_2x2(layer_10, name = 'maxpool_10') # size = (14,14,512)
kernel_11 = weight_variable((3,3,512,512), name = 'kernel_11')
kernel_12 = weight_variable((3,3,512,512), name = 'kernel_12')
kernel_13 = weight_variable((1,1,512,512), name = 'kernel_13')
bias_11 = bias_variable([512],name='bias_11')
bias_12 = bias_variable([512],name='bias_12')
bias_13 = bias_variable([512],name='bias_13')
conv_layer_11 = conv2d(maxpool_4, kernel_11, name = 'conv_layer_11') + bias_11 # size = (14,14,512)
layer_11 = tf.nn.relu(conv_layer_11, name = 'layer_11')
conv_layer_12 = conv2d(layer_11, kernel_12, name = 'conv_layer_12') + bias_12 # size = (14,14,512)
layer_12 = tf.nn.relu(conv_layer_12, name = 'layer_12')
conv_layer_13 = conv2d(layer_12, kernel_13, name = 'conv_layer_13') + bias_13 # size = (14,14,512)
layer_13 = tf.nn.relu(conv_layer_13, name = 'layer_13')
maxpool_5 = max_pool_2x2(layer_13, name = 'maxpool_10') # size = (7,7,512)
line = tf.reshape(maxpool_5, [-1, 25088])
fc_14 = weight_variable([25088, 4096], name = 'fc_14')
fc_15 = weight_variable([4096, 4096], name = 'fc_14')
fc_16 = weight_variable([4096, 1000], name = 'fc_14')
bias_14 = bias_variable([4096],name='bias_14')
bias_15 = bias_variable([4096],name='bias_15')
bias_16 = bias_variable([1000],name='bias_16')
matmul_layer_14 = tf.matmul(line, fc_14, name = 'matmul_layer_14') + bias_14
layer_14 = tf.nn.relu(matmul_layer_14, name = 'layer_14')
layer_14_dropout = tf.nn.dropout(layer_14, keep_prob, name = 'layer_14_dropout')
matmul_layer_15 = tf.matmul(layer_14_dropout, fc_15, name = 'matmul_layer_15') + bias_15
layer_15 = tf.nn.relu(matmul_layer_15, name = 'layer_15')
layer_15_dropout = tf.nn.dropout(layer_15, keep_prob, name = 'layer_15_dropout')
matmul_layer_16 = tf.matmul(layer_15_dropout, fc_16, name = 'matmul_layer_16') + bias_16
layer_16 = tf.nn.relu(matmul_layer_16, name = 'layer_16')
output = tf.nn.softmax(layer_16, name = 'output')
return output
def backward(datasets, label, test_data, test_label):
X = tf.placeholder(tf.float32, [None, 224,224,3], name = "Input")
Y_ = tf.placeholder(tf.float32, [None, 1], name = 'Estimation')
LEARNING_RATE_BASE = 0.00001 # 最初学习率
LEARNING_RATE_DECAY = 0.99 # 学习率的衰减率
LEARNING_RATE_STEP = 1000 # 喂入多少轮BATCH-SIZE以后,更新一次学习率。一般为总样本数量/BATCH_SIZE
gloabl_steps = tf.Variable(0, trainable=False) # 计数器,用来记录运行了几轮的BATCH_SIZE,初始为0,设置为不可训练
learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, gloabl_steps, LEARNING_RATE_STEP, LEARNING_RATE_DECAY, staircase=True)
keep_prob = tf.placeholder(tf.float32, name = "keep_prob")
y = VGG16(X)
global_step = tf.Variable(0, trainable=False)
loss_mse = tf.reduce_mean(-tf.reduce_sum(Y_ * tf.log(y), reduction_indices=[1]))
# loss_mse = tf.reduce_mean(tf.square(y-Y_))
# train_step = tf.train.GradientDescentOptimizer(0.001).minimize(loss_mse)
# train_step=tf.train.MomentumOptimizer(0.001,0.9).minimize(loss_mse) #其他方法
train_step=tf.train.AdamOptimizer(0.001).minimize(loss_mse)
saver = tf.train.Saver()
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
# 训练模型。
STEPS = 500001
min_loss = 1
for i in range(STEPS):
start = (i*BATCH_SIZE) % 862
end = start + BATCH_SIZE
sess.run(train_step, feed_dict={X: datasets[start:end], Y_: label[start:end]})
if i % 100 == 0:
train_loss = sess.run(loss_mse, feed_dict={X: datasets, Y_: label, keep_prob:1})
total_loss = sess.run(loss_mse, feed_dict={X: test_data, Y_: test_label, keep_prob:1})
if total_loss < min_loss:
min_loss = total_loss
f = open('./text/loss.txt', 'a')
f.write("After %d training step(s), loss_mse on train data is %g, loss_mse on val data is %g, min_loss is %g\n" % (i, train_loss, total_loss, min_loss))
print("After %d training step(s), loss_mse on train data is %g, loss_mse on val data is %g, min_loss is %g" % (i, train_loss, total_loss, min_loss))
f.close()
if i % 10000 == 0:
saver.save(sess, './checkpoint/variable', global_step = i)
def main():
# datasets, label, test_data, test_label = reload_all_data()
datasets, label, test_data, test_label = Sequential_disruption()
backward(datasets, label, test_data, test_label)
if __name__ == '__main__':
main()
demo可能不是特别正规,为的仅是了解网络结构以及理解。
这里最开始是导入一些必要的库,但是numpy和os我并没有用到。
然后定义了三个函数:weight_variable,bias_variable和conv2d,为了减小下面demo的长度,都是tf.nn的函数。
然后在VGG16里面,我设置了网络结构,只是向前传播的,每一次卷积都有数字标号,代表第几层。
最后的background就是反向传播,很套路的方法,设置好输入的占位比,衰减学习率,优化器,损失函数。训练时候多少显示一次进展,多少步保存一下。
在最后的main函数里面第一句:datasets, label, test_data, test_label = Sequential_disruption(),这里Sequential_disruption函数没有的,这句话意思是获取训练的数据和标签,以及训练中val的数据与标签。
然后就能够使用该demo训练了。
这个demo我没有实战使用过,但是在jupyter notebook上测试过其网络的正确性。
这样较为底层的demo方便个人的使用,在以后可以基于该网络做出快速的修改。
由于本人时间原因,暂时不做太多的解释工作,如果库不懂什么意思可以查看官方文档,如果有疑惑或异议欢迎留言。
以上是 tensorflow使用较为底层的方式复现VGG16 的全部内容, 来源链接: utcz.com/a/53986.html