tensorflow项目学习路径

2017 年 11 月 23 日 北京思腾合力科技有限公司

一、构建路线
个人感觉对于任何一个深度学习库，如mxnet、tensorflow、theano、caffe等，基本上
都采用同样的一个学习流程，大体流程如下：
(1)训练阶段：数据打包 -》网络构建、训练 -》模型保存 -》可视化查看损失函数、验证精度
(2)测试阶段：模型加载 -》测试图片读取 -》预测显示结果
(3)移植阶段：量化、压缩加速 -》微调 -》C + +移植打包 -》上线

以tensorflow为例子，讲解整个流程的大体架构，完成一个深度学习项目所需要熟悉
的过程代码。


二、训练、测试阶段
1、tensorflow打包数据
这一步对于tensorflow来说，也可以直接自己在线读取：.jpg图片、标签文件等，然后通过
phaceholder变量，把数据送入网络中，进行计算。
不过这种效率比较低，对于大规模训练数据来说，我们需要一个比较高效的方式，tensorflow

建议我们采用tfrecoder进行高效数据读取。学习tensorflow一定要学会tfrecoder文件写
入、读取，具体示例代码如下：

# coding=utf-8  
# tensorflow高效数据读取训练  
import tensorflow as tf
import cv2


# 把train.txt文件格式，每一行：图片路径名   类别标签  

# 将数据打包，转换成tfrecords格式，以便后续高效读取  
def encode_to_tfrecords(lable_file, data_root, new_name='data.tfrecords', 
resize=None):
    writer = tf.python_io.TFRecordWriter(data_root + '/' + new_name)
    num_example = 0
    with open(lable_file, 'r') as f:
        for l in f.readlines():
            l = l.split()
            image = cv2.imread(data_root + "/" + l[0])
            if resize is not None:
                image = cv2.resize(image, resize)  # 为了  
            height, width, nchannel = image.shape

            label = int(l[1])

            example = tf.train.Example(features=tf.train.Features(feature={
                'height': tf.train.Feature(int64_list=tf.train.Int64List(value=

[height])),
                'width': tf.train.Feature(int64_list=tf.train.Int64List(value

=[width])),
                'nchannel': tf.train.Feature(int64_list=tf.train.Int64List(value

=[nchannel])),
                'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=

[image.tobytes()])),
                'label': tf.train.Feature(int64_list=tf.train.Int64List(value=

[label]))
            }))
            serialized = example.SerializeToString()
            writer.write(serialized)
            num_example += 1
    print lable_file, "样本数据量：", num_example
    writer.close()


# 读取tfrecords文件  
def decode_from_tfrecords(filename, num_epoch=None):
    filename_queue = tf.train.string_input_producer([filename],
                                                    num_epochs=num_epoch)  

# 因为有的训练数据过于庞大，被分成了很多个文件，所以第一个参数就是文件列表名参数  
    reader = tf.TFRecordReader()
    _, serialized = reader.read(filename_queue)
    example = tf.parse_single_example(serialized, features={
        'height': tf.FixedLenFeature([], tf.int64),
        'width': tf.FixedLenFeature([], tf.int64),
        'nchannel': tf.FixedLenFeature([], tf.int64),
        'image': tf.FixedLenFeature([], tf.string),
        'label': tf.FixedLenFeature([], tf.int64)
    })
    label = tf.cast(example['label'], tf.int32)
    image = tf.decode_raw(example['image'], tf.uint8)
    image = tf.reshape(image, tf.pack([
        tf.cast(example['height'], tf.int32),
        tf.cast(example['width'], tf.int32),
        tf.cast(example['nchannel'], tf.int32)]))
    # label=example['label']  
    return image, label


# 根据队列流数据格式，解压出一张图片后，输入一张图片，对其做预处理、及样本随机扩充  
def get_batch(image, label, batch_size, crop_size):
    # 数据扩充变换  
    distorted_image = tf.random_crop(image, [crop_size, crop_size, 3]) 
 # 随机裁剪  
    distorted_image = tf.image.random_flip_up_down(distorted_image)  

# 上下随机翻转  
    # distorted_image = tf.image.random_brightness(distorted_image,max_delta=63)
#亮度变化  
    # distorted_image = tf.image.random_contrast(distorted_image,lower=0.2, upper
=1.8)#对比度变化  

    # 生成batch  
    # shuffle_batch的参数：capacity用于定义shuttle的范围，如果是对整个训练数据集，
获取batch，那么capacity就应该够大  
    # 保证数据打的足够乱  
    images, label_batch = tf.train.shuffle_batch([distorted_image, label], 

batch_size=batch_size,
                                               
 num_threads=16, capacity=50000, min_after_dequeue=10000)
    # images, label_batch=tf.train.batch([distorted_image, label],
batch_size=batch_size)  


    # 调试显示  
    # tf.image_summary('images', images)  
    return images, tf.reshape(label_batch, [batch_size])


# 这个是用于测试阶段，使用的get_batch函数  
def get_test_batch(image, label, batch_size, crop_size):
    # 数据扩充变换  
    distorted_image = tf.image.central_crop(image, 39. / 45.)
    distorted_image = tf.random_crop(distorted_image, [crop_size, crop_size, 3])  # 随机裁剪  
    images, label_batch = tf.train.batch([distorted_image, label],
 batch_size=batch_size)
    return images, tf.reshape(label_batch, [batch_size])


# 测试上面的压缩、解压代码  
def test():
    encode_to_tfrecords("data/train.txt", "data", (100, 100))
    image, label = decode_from_tfrecords('data/data.tfrecords')
    batch_image, batch_label = get_batch(image, label, 3)  # batch 生成测试  
    init = tf.initialize_all_variables()
    with tf.Session() as session:
        session.run(init)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        for l in range(100000):  # 每run一次，就会指向下一个样本，一直循环  
            # image_np,label_np=session.run([image,label])#每调用run一次，那么  
            '''''cv2.imshow("temp",image_np) 
            cv2.waitKey()'''
            # print label_np  
            # print image_np.shape  


            batch_image_np, batch_label_np = session.run([batch_image, batch_label])
            print batch_image_np.shape
            print batch_label_np.shape

        coord.request_stop()  # queue需要关闭，否则报错  
        coord.join(threads)
        # test()  


2、网络架构与训练
经过上面的数据格式处理，接着我们只要写一写网络结构、网络优化方法，把数据搞进网络中就可以了，
具体示例代码如下：

# coding=utf-8  
import tensorflow as tf
from  data_encoder_decoeder import encode_to_tfrecords, decode_from_tfrecords,
 get_batch, get_test_batch
import cv2
import os


class network(object):
    def __init__(self):
        with tf.variable_scope("weights"):
            self.weights = {
                # 39*39*3->36*36*20->18*18*20  
                'conv1': tf.get_variable('conv1', [4, 4, 3, 20],
                                         initializer=tf.contrib.layers.xavier_initializer_conv2d()),
                # 18*18*20->16*16*40->8*8*40  
                'conv2': tf.get_variable('conv2', [3, 3, 20, 40],
                                         initializer=tf.contrib.layers.xavier_initializer_conv2d()),
                # 8*8*40->6*6*60->3*3*60  
                'conv3': tf.get_variable('conv3', [3, 3, 40, 60],
                                         initializer=tf.contrib.layers.xavier_initializer_conv2d()),
                # 3*3*60->120  
                'fc1': tf.get_variable('fc1', [3 * 3 * 60, 120], initializer=tf.contrib.layers.xavier_initializer()),
                # 120->6  
                'fc2': tf.get_variable('fc2', [120, 6], initializer=tf.contrib.layers.xavier_initializer()),
            }
        with tf.variable_scope("biases"):
            self.biases = {
                'conv1': tf.get_variable('conv1', [20, ],
                                         initializer=tf.constant_initializer(value=0.0, dtype=tf.float32)),
                'conv2': tf.get_variable('conv2', [40, ],
                                         initializer=tf.constant_initializer(value=0.0, dtype=tf.float32)),
                'conv3': tf.get_variable('conv3', [60, ],
                                         initializer=tf.constant_initializer(value=0.0, dtype=tf.float32)),
                'fc1': tf.get_variable('fc1', [120, ],
                                       initializer=tf.constant_initializer(value=0.0, dtype=tf.float32)),
                'fc2': tf.get_variable('fc2', [6, ], initializer=tf.constant_initializer(value=0.0, dtype=tf.float32))

            }

    def inference(self, images):
        # 向量转为矩阵  
        images = tf.reshape(images, shape=[-1, 39, 39, 3])  # [batch, in_height, in_width, in_channels]  
        images = (tf.cast(images, tf.float32) / 255. - 0.5) * 2  # 归一化处理  

        # 第一层  
        conv1 = tf.nn.bias_add(tf.nn.conv2d(images, self.weights['conv1'], strides=[1, 1, 1, 1], padding='VALID'),
                               self.biases['conv1'])

        relu1 = tf.nn.relu(conv1)
        pool1 = tf.nn.max_pool(relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')

        # 第二层  
        conv2 = tf.nn.bias_add(tf.nn.conv2d(pool1, self.weights['conv2'], strides=[1, 1, 1, 1], padding='VALID'),
                               self.biases['conv2'])
        relu2 = tf.nn.relu(conv2)
        pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')

        # 第三层  
        conv3 = tf.nn.bias_add(tf.nn.conv2d(pool2, self.weights['conv3'], strides=[1, 1, 1, 1], padding='VALID'),
                               self.biases['conv3'])
        relu3 = tf.nn.relu(conv3)
        pool3 = tf.nn.max_pool(relu3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')

        # 全连接层1，先把特征图转为向量  
        flatten = tf.reshape(pool3, [-1, self.weights['fc1'].get_shape().as_list()[0]])

        drop1 = tf.nn.dropout(flatten, 0.5)
        fc1 = tf.matmul(drop1, self.weights['fc1']) + self.biases['fc1']

        fc_relu1 = tf.nn.relu(fc1)

        fc2 = tf.matmul(fc_relu1, self.weights['fc2']) + self.biases['fc2']

        return fc2

    def inference_test(self, images):
        # 向量转为矩阵  
        images = tf.reshape(images, shape=[-1, 39, 39, 3])  # [batch, in_height, in_width, in_channels]  
        images = (tf.cast(images, tf.float32) / 255. - 0.5) * 2  # 归一化处理  

        # 第一层  
        conv1 = tf.nn.bias_add(tf.nn.conv2d(images, self.weights['conv1'], strides=[1, 1, 1, 1], padding='VALID'),
                               self.biases['conv1'])

        relu1 = tf.nn.relu(conv1)
        pool1 = tf.nn.max_pool(relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')

        # 第二层  
        conv2 = tf.nn.bias_add(tf.nn.conv2d(pool1, self.weights['conv2'], strides=[1, 1, 1, 1], padding='VALID'),
                               self.biases['conv2'])
        relu2 = tf.nn.relu(conv2)
        pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')

        # 第三层  
        conv3 = tf.nn.bias_add(tf.nn.conv2d(pool2, self.weights['conv3'], strides=[1, 1, 1, 1], padding='VALID'),
                               self.biases['conv3'])
        relu3 = tf.nn.relu(conv3)
        pool3 = tf.nn.max_pool(relu3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')

        # 全连接层1，先把特征图转为向量  
        flatten = tf.reshape(pool3, [-1, self.weights['fc1'].get_shape().as_list()[0]])

        fc1 = tf.matmul(flatten, self.weights['fc1']) + self.biases['fc1']
        fc_relu1 = tf.nn.relu(fc1)

        fc2 = tf.matmul(fc_relu1, self.weights['fc2']) + self.biases['fc2']

        return fc2

        # 计算softmax交叉熵损失函数  

    def sorfmax_loss(self, predicts, labels):
        predicts = tf.nn.softmax(predicts)
        labels = tf.one_hot(labels, self.weights['fc2'].get_shape().as_list()[1])
        loss = -tf.reduce_mean(labels * tf.log(predicts))  # tf.nn.softmax_cross_entropy_with_logits(predicts, labels)  
        self.cost = loss
        return self.cost
        # 梯度下降  

    def optimer(self, loss, lr=0.001):
        train_optimizer = tf.train.GradientDescentOptimizer(lr).minimize(loss)

        return train_optimizer


def train():
    encode_to_tfrecords("data/train.txt", "data", 'train.tfrecords', (45, 45))
    image, label = decode_from_tfrecords('data/train.tfrecords')
    batch_image, batch_label = get_batch(image, label, batch_size=50, crop_size=39)  # batch 生成测试  

    # 网络链接,训练所用  
    net = network()
    inf = net.inference(batch_image)
    loss = net.sorfmax_loss(inf, batch_label)
    opti = net.optimer(loss)

    # 验证集所用  
    encode_to_tfrecords("data/val.txt", "data", 'val.tfrecords', (45, 45))
    test_image, test_label = decode_from_tfrecords('data/val.tfrecords', num_epoch=None)
    test_images, test_labels = get_test_batch(test_image, test_label, batch_size=120, crop_size=39)  # batch 生成测试  
    test_inf = net.inference_test(test_images)
    correct_prediction = tf.equal(tf.cast(tf.argmax(test_inf, 1), tf.int32), test_labels)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    init = tf.initialize_all_variables()
    with tf.Session() as session:
        session.run(init)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        max_iter = 100000
        iter = 0
        if os.path.exists(os.path.join("model", 'model.ckpt')) is True:
            tf.train.Saver(max_to_keep=None).restore(session, os.path.join("model", 'model.ckpt'))
        while iter < max_iter:
            loss_np, _, label_np, image_np, inf_np = session.run([loss, opti, batch_label, batch_image, inf])
            # print image_np.shape  
            # cv2.imshow(str(label_np[0]),image_np[0])  
            # print label_np[0]  
            # cv2.waitKey()  
            # print label_np  
            if iter % 50 == 0:
                print 'trainloss:', loss_np
            if iter % 500 == 0:
                accuracy_np = session.run([accuracy])
                print '***************test accruacy:', accuracy_np, '*******************'
                tf.train.Saver(max_to_keep=None).save(session, os.path.join('model', 'model.ckpt'))
            iter += 1

        coord.request_stop()  # queue需要关闭，否则报错  
        coord.join(threads)


train()

3、可视化显示
(1)
首先再源码中加入需要跟踪的变量：

style = "font-size:18px;" > tf.scalar_summary("cost_function", loss)  # 损失函数值</span>  
(2)
然后定义执行操作：

style = "font-size:18px;" > merged_summary_op = tf.merge_all_summaries() < / span >
(3)
再session中定义保存路径：

style = "font-size:18px;" > summary_writer = tf.train.SummaryWriter('log', session.graph) < / span >
(4)
然后再session执行的时候，保存：
[python]
view
plain
copy
summary_str, loss_np, _ = session.run([merged_summary_op, loss, opti])
summary_writer.add_summary(summary_str, iter)

(5)
最后只要训练完毕后，直接再终端输入命令：

python / usr / local / lib / python2
.7 / dist - packages / tensorflow / tensorboard / tensorboard.py - -logdir = log
然后打开浏览器网址：
[python]
view
plain
copy
< span
style = "font-size:18px;" > http: // 0.0
.0
.0:6006 < / span >
即可观训练曲线。

4、测试阶段
测试阶段主要是直接通过加载图模型、读取参数等，然后直接通过tensorflow的相关函数，进行调用，而不需要网络架构相关的代码；通过内存feed_dict的方式，对相关的输入节点赋予相关的数据，进行前向传导，并获取相关的节点数值。

# coding=utf-8  
import tensorflow  as tf
import os
import cv2


def load_model(session, netmodel_path, param_path):
    new_saver = tf.train.import_meta_graph(netmodel_path)
    new_saver.restore(session, param_path)
    x = tf.get_collection('test_images')[0]  # 在训练阶段需要调用tf.add_to_collection('test_images',test_images),保存之  
    y = tf.get_collection("test_inf")[0]
    batch_size = tf.get_collection("batch_size")[0]
    return x, y, batch_size


def load_images(data_root):
    filename_queue = tf.train.string_input_producer(data_root)
    image_reader = tf.WholeFileReader()
    key, image_file = image_reader.read(filename_queue)
    image = tf.image.decode_jpeg(image_file)
    return image, key


def test(data_root="data/race/cropbrown"):
    image_filenames = os.listdir(data_root)
    image_filenames = [(data_root + '/' + i) for i in image_filenames]

    # print cv2.imread(image_filenames[0]).shape  
    # image,key=load_images(image_filenames)  
    race_listsrc = ['black', 'brown', 'white', 'yellow']
    with tf.Session() as session:
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        x, y, batch_size = load_model(session, os.path.join("model", 'model_ori_race.ckpt.meta'),
                                      os.path.join("model", 'model_ori_race.ckpt'))
        predict_label = tf.cast(tf.argmax(y, 1), tf.int32)
        print x.get_shape()
        for imgf in image_filenames:
            image = cv2.imread(imgf)
            image = cv2.resize(image, (76, 76)).reshape((1, 76, 76, 3))
            print "cv shape:", image.shape

            # cv2.imshow("t",image_np[:,:,::-1])  
            y_np = session.run(predict_label, feed_dict={x: image, batch_size: 1})
            print race_listsrc[y_np]

        coord.request_stop()  # queue需要关闭，否则报错  
        coord.join(threads)


4、移植阶段
(1)
一个算法经过实验阶段后，接着就要进入移植商用，因此接着需要采用tensorflow的c
api函数，直接进行预测推理，首先我们先把tensorflow编译成链接库，然后编写cmake，调用tensorflow链接库：

style = "font-size:18px;" > bazel
build - c
opt // tensorflow:libtensorflow.so

在bazel - bin / tensorflow目录下会生成libtensorflow.so文件
5、C + + API调用、cmake
编写：
三、熟悉常用API
1、LSTM使用

import tensorflow.nn.rnn_cell

lstm = rnn_cell.BasicLSTMCell(lstm_size)  # 创建一个lstm cell单元类，隐藏层神经元个数为lstm_size  

state = tf.zeros([batch_size, lstm.state_size])  # 一个序列隐藏层的状态值  

loss = 0.0
for current_batch_of_words in words_in_dataset:
    output, state = lstm(current_batch_of_words, state)  # 返回值为隐藏层神经元的输出  
    logits = tf.matmul(output, softmax_w) + softmax_b  # matmul矩阵点乘  
    probabilities = tf.nn.softmax(logits)  # softmax输出  
    loss += loss_function(probabilities, target_words) < / span >

1、one - hot函数：
[python]
view
plain
copy
< span
style = "font-size:18px;" >  # ont hot 可以把训练数据的标签，直接转换成one_hot向量，用于交叉熵损失函数  
import tensorflow as tf

a = tf.convert_to_tensor([[1], [2], [4]])
b = tf.one_hot(a, 5) < / span >

>> b的值为
[python]
view
plain
copy
< span
style = "font-size:18px;" > [[[0.  1.  0.  0.  0.]]

                             [[0.  0.  1.  0.  0.]]

                             [[0.  0.  0.  0.  1.]]] < / span >
2、assign_sub
[python]
view
plain
copy
< span
style = "font-size:18px;" >
import tensorflow as tf

x = tf.Variable(10, name="x")
sub = x.assign_sub(3)  # 如果直接采用x.assign_sub，那么可以看到x的值也会发生变化  
init_op = tf.initialize_all_variables()
with tf.Session() as sess:
    sess.run(init_op)
    print sub.eval()
    print x.eval() < / span >
可以看到输入sub = x = 7

采用state_ops的assign_sub也是同样sub = x = 7
也就是说assign函数返回结果值的同时，变量本身的值也会被改变
3、变量查看
 # 查看所有的变量  
for l in tf.all_variables():
    print l.name < / span >
4、slice函数：

import cv2
import tensorflow as tf

# slice 函数可以用于切割子矩形图片，参数矩形框的rect,begin=(minx,miny),size=(width,height)  
minx = 20
miny = 30
height = 100
width = 200

image = tf.placeholder(dtype=tf.uint8, shape=(386, 386, 3))
rect_image = tf.slice(image, (miny, minx, 0), (height, width, -1))

cvimage = cv2.imread("1.jpg")
cv2.imshow("cv2", cvimage[miny:(miny + height), minx:(minx + width), :])

with tf.Session() as sess:
    tfimage = sess.run([rect_image], {image: cvimage})
    cv2.imshow('tf', tfimage[0])
cv2.waitKey() < / span >

5、正太分布随机初始化
[python]
view
plain
copy
tf.truncated_normal

6、打印操作运算在硬件设备信息
[python]
view
plain
copy
tf.ConfigProto(log_device_placement=True)
7、变量域名的reuse：
[python]
view
plain
copy
import tensorflow as tf

with tf.variable_scope('foo'):  # 在没有启用reuse的情况下,如果该变量还未被创建,那么就创建该变量,如果已经创建过了,那么就获取该共享变量  
    v = tf.get_variable('v', [1])
with tf.variable_scope('foo', reuse=True):  # 如果启用了reuse,那么编译的时候,如果get_variable没有遇到一个已经创建的变量,是会出错的  
    v1 = tf.get_variable('v1', [1])
8、allow_soft_placement的使用：allow_soft_placement = True，允许当在代码中指定tf.device设备，如果设备找不到，那么就采用默认的设备。如果该参数设置为false，当设备找不到的时候，会直接编译不通过。
9、batch
normalize调用：
[python]
view
plain
copy
tf.contrib.layers.batch_norm(x, decay=0.9, updates_collections=None, 

epsilon=self.epsilon, scale=True, scope=self.name)