一、构建路线
个人感觉对于任何一个深度学习库,如mxnet、tensorflow、theano、caffe等,基本上
都采用同样的一个学习流程,大体流程如下:
(1)训练阶段:数据打包 -》网络构建、训练 -》模型保存 -》可视化查看损失函数、验证精度
(2)测试阶段:模型加载 -》测试图片读取 -》预测显示结果
(3)移植阶段:量化、压缩加速 -》微调 -》C + +移植打包 -》上线
以tensorflow为例子,讲解整个流程的大体架构,完成一个深度学习项目所需要熟悉
的过程代码。
二、训练、测试阶段
1、tensorflow打包数据
这一步对于tensorflow来说,也可以直接自己在线读取:.jpg图片、标签文件等,然后通过
phaceholder变量,把数据送入网络中,进行计算。
不过这种效率比较低,对于大规模训练数据来说,我们需要一个比较高效的方式,tensorflow
建议我们采用tfrecoder进行高效数据读取。学习tensorflow一定要学会tfrecoder文件写
入、读取,具体示例代码如下:
# coding=utf-8
# tensorflow高效数据读取训练
import tensorflow as tf
import cv2
# 把train.txt文件格式,每一行:图片路径名 类别标签
# 将数据打包,转换成tfrecords格式,以便后续高效读取
def encode_to_tfrecords(lable_file, data_root, new_name='data.tfrecords',
resize=None):
writer = tf.python_io.TFRecordWriter(data_root + '/' + new_name)
num_example = 0
with open(lable_file, 'r') as f:
for l in f.readlines():
l = l.split()
image = cv2.imread(data_root + "/" + l[0])
if resize is not None:
image = cv2.resize(image, resize) # 为了
height, width, nchannel = image.shape
label = int(l[1])
example = tf.train.Example(features=tf.train.Features(feature={
'height': tf.train.Feature(int64_list=tf.train.Int64List(value=
[height])),
'width': tf.train.Feature(int64_list=tf.train.Int64List(value
=[width])),
'nchannel': tf.train.Feature(int64_list=tf.train.Int64List(value
=[nchannel])),
'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=
[image.tobytes()])),
'label': tf.train.Feature(int64_list=tf.train.Int64List(value=
[label]))
}))
serialized = example.SerializeToString()
writer.write(serialized)
num_example += 1
print lable_file, "样本数据量:", num_example
writer.close()
# 读取tfrecords文件
def decode_from_tfrecords(filename, num_epoch=None):
filename_queue = tf.train.string_input_producer([filename],
num_epochs=num_epoch)
# 因为有的训练数据过于庞大,被分成了很多个文件,所以第一个参数就是文件列表名参数
reader = tf.TFRecordReader()
_, serialized = reader.read(filename_queue)
example = tf.parse_single_example(serialized, features={
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'nchannel': tf.FixedLenFeature([], tf.int64),
'image': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([], tf.int64)
})
label = tf.cast(example['label'], tf.int32)
image = tf.decode_raw(example['image'], tf.uint8)
image = tf.reshape(image, tf.pack([
tf.cast(example['height'], tf.int32),
tf.cast(example['width'], tf.int32),
tf.cast(example['nchannel'], tf.int32)]))
# label=example['label']
return image, label
# 根据队列流数据格式,解压出一张图片后,输入一张图片,对其做预处理、及样本随机扩充
def get_batch(image, label, batch_size, crop_size):
# 数据扩充变换
distorted_image = tf.random_crop(image, [crop_size, crop_size, 3])
# 随机裁剪
distorted_image = tf.image.random_flip_up_down(distorted_image)
# 上下随机翻转
# distorted_image = tf.image.random_brightness(distorted_image,max_delta=63)
#亮度变化
# distorted_image = tf.image.random_contrast(distorted_image,lower=0.2, upper
=1.8)#对比度变化
# 生成batch
# shuffle_batch的参数:capacity用于定义shuttle的范围,如果是对整个训练数据集,
获取batch,那么capacity就应该够大
# 保证数据打的足够乱
images, label_batch = tf.train.shuffle_batch([distorted_image, label],
batch_size=batch_size,
num_threads=16, capacity=50000, min_after_dequeue=10000)
# images, label_batch=tf.train.batch([distorted_image, label],
batch_size=batch_size)
# 调试显示
# tf.image_summary('images', images)
return images, tf.reshape(label_batch, [batch_size])
# 这个是用于测试阶段,使用的get_batch函数
def get_test_batch(image, label, batch_size, crop_size):
# 数据扩充变换
distorted_image = tf.image.central_crop(image, 39. / 45.)
distorted_image = tf.random_crop(distorted_image, [crop_size, crop_size, 3]) # 随机裁剪
images, label_batch = tf.train.batch([distorted_image, label],
batch_size=batch_size)
return images, tf.reshape(label_batch, [batch_size])
# 测试上面的压缩、解压代码
def test():
encode_to_tfrecords("data/train.txt", "data", (100, 100))
image, label = decode_from_tfrecords('data/data.tfrecords')
batch_image, batch_label = get_batch(image, label, 3) # batch 生成测试
init = tf.initialize_all_variables()
with tf.Session() as session:
session.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
for l in range(100000): # 每run一次,就会指向下一个样本,一直循环
# image_np,label_np=session.run([image,label])#每调用run一次,那么
'''''cv2.imshow("temp",image_np)
cv2.waitKey()'''
# print label_np
# print image_np.shape
batch_image_np, batch_label_np = session.run([batch_image, batch_label])
print batch_image_np.shape
print batch_label_np.shape
coord.request_stop() # queue需要关闭,否则报错
coord.join(threads)
# test()
2、网络架构与训练
经过上面的数据格式处理,接着我们只要写一写网络结构、网络优化方法,把数据搞进网络中就可以了,
具体示例代码如下:
# coding=utf-8
import tensorflow as tf
from data_encoder_decoeder import encode_to_tfrecords, decode_from_tfrecords,
get_batch, get_test_batch
import cv2
import os
class network(object):
def __init__(self):
with tf.variable_scope("weights"):
self.weights = {
# 39*39*3->36*36*20->18*18*20
'conv1': tf.get_variable('conv1', [4, 4, 3, 20],
initializer=tf.contrib.layers.xavier_initializer_conv2d()),
# 18*18*20->16*16*40->8*8*40
'conv2': tf.get_variable('conv2', [3, 3, 20, 40],
initializer=tf.contrib.layers.xavier_initializer_conv2d()),
# 8*8*40->6*6*60->3*3*60
'conv3': tf.get_variable('conv3', [3, 3, 40, 60],
initializer=tf.contrib.layers.xavier_initializer_conv2d()),
# 3*3*60->120
'fc1': tf.get_variable('fc1', [3 * 3 * 60, 120], initializer=tf.contrib.layers.xavier_initializer()),
# 120->6
'fc2': tf.get_variable('fc2', [120, 6], initializer=tf.contrib.layers.xavier_initializer()),
}
with tf.variable_scope("biases"):
self.biases = {
'conv1': tf.get_variable('conv1', [20, ],
initializer=tf.constant_initializer(value=0.0, dtype=tf.float32)),
'conv2': tf.get_variable('conv2', [40, ],
initializer=tf.constant_initializer(value=0.0, dtype=tf.float32)),
'conv3': tf.get_variable('conv3', [60, ],
initializer=tf.constant_initializer(value=0.0, dtype=tf.float32)),
'fc1': tf.get_variable('fc1', [120, ],
initializer=tf.constant_initializer(value=0.0, dtype=tf.float32)),
'fc2': tf.get_variable('fc2', [6, ], initializer=tf.constant_initializer(value=0.0, dtype=tf.float32))
}
def inference(self, images):
# 向量转为矩阵
images = tf.reshape(images, shape=[-1, 39, 39, 3]) # [batch, in_height, in_width, in_channels]
images = (tf.cast(images, tf.float32) / 255. - 0.5) * 2 # 归一化处理
# 第一层
conv1 = tf.nn.bias_add(tf.nn.conv2d(images, self.weights['conv1'], strides=[1, 1, 1, 1], padding='VALID'),
self.biases['conv1'])
relu1 = tf.nn.relu(conv1)
pool1 = tf.nn.max_pool(relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
# 第二层
conv2 = tf.nn.bias_add(tf.nn.conv2d(pool1, self.weights['conv2'], strides=[1, 1, 1, 1], padding='VALID'),
self.biases['conv2'])
relu2 = tf.nn.relu(conv2)
pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
# 第三层
conv3 = tf.nn.bias_add(tf.nn.conv2d(pool2, self.weights['conv3'], strides=[1, 1, 1, 1], padding='VALID'),
self.biases['conv3'])
relu3 = tf.nn.relu(conv3)
pool3 = tf.nn.max_pool(relu3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
# 全连接层1,先把特征图转为向量
flatten = tf.reshape(pool3, [-1, self.weights['fc1'].get_shape().as_list()[0]])
drop1 = tf.nn.dropout(flatten, 0.5)
fc1 = tf.matmul(drop1, self.weights['fc1']) + self.biases['fc1']
fc_relu1 = tf.nn.relu(fc1)
fc2 = tf.matmul(fc_relu1, self.weights['fc2']) + self.biases['fc2']
return fc2
def inference_test(self, images):
# 向量转为矩阵
images = tf.reshape(images, shape=[-1, 39, 39, 3]) # [batch, in_height, in_width, in_channels]
images = (tf.cast(images, tf.float32) / 255. - 0.5) * 2 # 归一化处理
# 第一层
conv1 = tf.nn.bias_add(tf.nn.conv2d(images, self.weights['conv1'], strides=[1, 1, 1, 1], padding='VALID'),
self.biases['conv1'])
relu1 = tf.nn.relu(conv1)
pool1 = tf.nn.max_pool(relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
# 第二层
conv2 = tf.nn.bias_add(tf.nn.conv2d(pool1, self.weights['conv2'], strides=[1, 1, 1, 1], padding='VALID'),
self.biases['conv2'])
relu2 = tf.nn.relu(conv2)
pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
# 第三层
conv3 = tf.nn.bias_add(tf.nn.conv2d(pool2, self.weights['conv3'], strides=[1, 1, 1, 1], padding='VALID'),
self.biases['conv3'])
relu3 = tf.nn.relu(conv3)
pool3 = tf.nn.max_pool(relu3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
# 全连接层1,先把特征图转为向量
flatten = tf.reshape(pool3, [-1, self.weights['fc1'].get_shape().as_list()[0]])
fc1 = tf.matmul(flatten, self.weights['fc1']) + self.biases['fc1']
fc_relu1 = tf.nn.relu(fc1)
fc2 = tf.matmul(fc_relu1, self.weights['fc2']) + self.biases['fc2']
return fc2
# 计算softmax交叉熵损失函数
def sorfmax_loss(self, predicts, labels):
predicts = tf.nn.softmax(predicts)
labels = tf.one_hot(labels, self.weights['fc2'].get_shape().as_list()[1])
loss = -tf.reduce_mean(labels * tf.log(predicts)) # tf.nn.softmax_cross_entropy_with_logits(predicts, labels)
self.cost = loss
return self.cost
# 梯度下降
def optimer(self, loss, lr=0.001):
train_optimizer = tf.train.GradientDescentOptimizer(lr).minimize(loss)
return train_optimizer
def train():
encode_to_tfrecords("data/train.txt", "data", 'train.tfrecords', (45, 45))
image, label = decode_from_tfrecords('data/train.tfrecords')
batch_image, batch_label = get_batch(image, label, batch_size=50, crop_size=39) # batch 生成测试
# 网络链接,训练所用
net = network()
inf = net.inference(batch_image)
loss = net.sorfmax_loss(inf, batch_label)
opti = net.optimer(loss)
# 验证集所用
encode_to_tfrecords("data/val.txt", "data", 'val.tfrecords', (45, 45))
test_image, test_label = decode_from_tfrecords('data/val.tfrecords', num_epoch=None)
test_images, test_labels = get_test_batch(test_image, test_label, batch_size=120, crop_size=39) # batch 生成测试
test_inf = net.inference_test(test_images)
correct_prediction = tf.equal(tf.cast(tf.argmax(test_inf, 1), tf.int32), test_labels)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
init = tf.initialize_all_variables()
with tf.Session() as session:
session.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
max_iter = 100000
iter = 0
if os.path.exists(os.path.join("model", 'model.ckpt')) is True:
tf.train.Saver(max_to_keep=None).restore(session, os.path.join("model", 'model.ckpt'))
while iter < max_iter:
loss_np, _, label_np, image_np, inf_np = session.run([loss, opti, batch_label, batch_image, inf])
# print image_np.shape
# cv2.imshow(str(label_np[0]),image_np[0])
# print label_np[0]
# cv2.waitKey()
# print label_np
if iter % 50 == 0:
print 'trainloss:', loss_np
if iter % 500 == 0:
accuracy_np = session.run([accuracy])
print '***************test accruacy:', accuracy_np, '*******************'
tf.train.Saver(max_to_keep=None).save(session, os.path.join('model', 'model.ckpt'))
iter += 1
coord.request_stop() # queue需要关闭,否则报错
coord.join(threads)
train()
3、可视化显示
(1)
首先再源码中加入需要跟踪的变量:
style = "font-size:18px;" > tf.scalar_summary("cost_function", loss) # 损失函数值</span>
(2)
然后定义执行操作:
style = "font-size:18px;" > merged_summary_op = tf.merge_all_summaries() < / span >
(3)
再session中定义保存路径:
style = "font-size:18px;" > summary_writer = tf.train.SummaryWriter('log', session.graph) < / span >
(4)
然后再session执行的时候,保存:
[python]
view
plain
copy
summary_str, loss_np, _ = session.run([merged_summary_op, loss, opti])
summary_writer.add_summary(summary_str, iter)
(5)
最后只要训练完毕后,直接再终端输入命令:
python / usr / local / lib / python2
.7 / dist - packages / tensorflow / tensorboard / tensorboard.py - -logdir = log
然后打开浏览器网址:
[python]
view
plain
copy
< span
style = "font-size:18px;" > http: // 0.0
.0
.0:6006 < / span >
即可观训练曲线。
4、测试阶段
测试阶段主要是直接通过加载图模型、读取参数等,然后直接通过tensorflow的相关函数,进行调用,而不需要网络架构相关的代码;通过内存feed_dict的方式,对相关的输入节点赋予相关的数据,进行前向传导,并获取相关的节点数值。
# coding=utf-8
import tensorflow as tf
import os
import cv2
def load_model(session, netmodel_path, param_path):
new_saver = tf.train.import_meta_graph(netmodel_path)
new_saver.restore(session, param_path)
x = tf.get_collection('test_images')[0] # 在训练阶段需要调用tf.add_to_collection('test_images',test_images),保存之
y = tf.get_collection("test_inf")[0]
batch_size = tf.get_collection("batch_size")[0]
return x, y, batch_size
def load_images(data_root):
filename_queue = tf.train.string_input_producer(data_root)
image_reader = tf.WholeFileReader()
key, image_file = image_reader.read(filename_queue)
image = tf.image.decode_jpeg(image_file)
return image, key
def test(data_root="data/race/cropbrown"):
image_filenames = os.listdir(data_root)
image_filenames = [(data_root + '/' + i) for i in image_filenames]
# print cv2.imread(image_filenames[0]).shape
# image,key=load_images(image_filenames)
race_listsrc = ['black', 'brown', 'white', 'yellow']
with tf.Session() as session:
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
x, y, batch_size = load_model(session, os.path.join("model", 'model_ori_race.ckpt.meta'),
os.path.join("model", 'model_ori_race.ckpt'))
predict_label = tf.cast(tf.argmax(y, 1), tf.int32)
print x.get_shape()
for imgf in image_filenames:
image = cv2.imread(imgf)
image = cv2.resize(image, (76, 76)).reshape((1, 76, 76, 3))
print "cv shape:", image.shape
# cv2.imshow("t",image_np[:,:,::-1])
y_np = session.run(predict_label, feed_dict={x: image, batch_size: 1})
print race_listsrc[y_np]
coord.request_stop() # queue需要关闭,否则报错
coord.join(threads)
4、移植阶段
(1)
一个算法经过实验阶段后,接着就要进入移植商用,因此接着需要采用tensorflow的c
api函数,直接进行预测推理,首先我们先把tensorflow编译成链接库,然后编写cmake,调用tensorflow链接库:
style = "font-size:18px;" > bazel
build - c
opt // tensorflow:libtensorflow.so
在bazel - bin / tensorflow目录下会生成libtensorflow.so文件
5、C + + API调用、cmake
编写:
三、熟悉常用API
1、LSTM使用
import tensorflow.nn.rnn_cell
lstm = rnn_cell.BasicLSTMCell(lstm_size) # 创建一个lstm cell单元类,隐藏层神经元个数为lstm_size
state = tf.zeros([batch_size, lstm.state_size]) # 一个序列隐藏层的状态值
loss = 0.0
for current_batch_of_words in words_in_dataset:
output, state = lstm(current_batch_of_words, state) # 返回值为隐藏层神经元的输出
logits = tf.matmul(output, softmax_w) + softmax_b # matmul矩阵点乘
probabilities = tf.nn.softmax(logits) # softmax输出
loss += loss_function(probabilities, target_words) < / span >
1、one - hot函数:
[python]
view
plain
copy
< span
style = "font-size:18px;" > # ont hot 可以把训练数据的标签,直接转换成one_hot向量,用于交叉熵损失函数
import tensorflow as tf
a = tf.convert_to_tensor([[1], [2], [4]])
b = tf.one_hot(a, 5) < / span >
>> b的值为
[python]
view
plain
copy
< span
style = "font-size:18px;" > [[[0. 1. 0. 0. 0.]]
[[0. 0. 1. 0. 0.]]
[[0. 0. 0. 0. 1.]]] < / span >
2、assign_sub
[python]
view
plain
copy
< span
style = "font-size:18px;" >
import tensorflow as tf
x = tf.Variable(10, name="x")
sub = x.assign_sub(3) # 如果直接采用x.assign_sub,那么可以看到x的值也会发生变化
init_op = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init_op)
print sub.eval()
print x.eval() < / span >
可以看到输入sub = x = 7
采用state_ops的assign_sub也是同样sub = x = 7
也就是说assign函数返回结果值的同时,变量本身的值也会被改变
3、变量查看
# 查看所有的变量
for l in tf.all_variables():
print l.name < / span >
4、slice函数:
import cv2
import tensorflow as tf
# slice 函数可以用于切割子矩形图片,参数矩形框的rect,begin=(minx,miny),size=(width,height)
minx = 20
miny = 30
height = 100
width = 200
image = tf.placeholder(dtype=tf.uint8, shape=(386, 386, 3))
rect_image = tf.slice(image, (miny, minx, 0), (height, width, -1))
cvimage = cv2.imread("1.jpg")
cv2.imshow("cv2", cvimage[miny:(miny + height), minx:(minx + width), :])
with tf.Session() as sess:
tfimage = sess.run([rect_image], {image: cvimage})
cv2.imshow('tf', tfimage[0])
cv2.waitKey() < / span >
5、正太分布随机初始化
[python]
view
plain
copy
tf.truncated_normal
6、打印操作运算在硬件设备信息
[python]
view
plain
copy
tf.ConfigProto(log_device_placement=True)
7、变量域名的reuse:
[python]
view
plain
copy
import tensorflow as tf
with tf.variable_scope('foo'): # 在没有启用reuse的情况下,如果该变量还未被创建,那么就创建该变量,如果已经创建过了,那么就获取该共享变量
v = tf.get_variable('v', [1])
with tf.variable_scope('foo', reuse=True): # 如果启用了reuse,那么编译的时候,如果get_variable没有遇到一个已经创建的变量,是会出错的
v1 = tf.get_variable('v1', [1])
8、allow_soft_placement的使用:allow_soft_placement = True,允许当在代码中指定tf.device设备,如果设备找不到,那么就采用默认的设备。如果该参数设置为false,当设备找不到的时候,会直接编译不通过。
9、batch
normalize调用:
[python]
view
plain
copy
tf.contrib.layers.batch_norm(x, decay=0.9, updates_collections=None,
epsilon=self.epsilon, scale=True, scope=self.name)