机器之心发布
向往深度学习技术,可是深度学习框架太难学怎么办?百度倾心打造飞桨框架高层 API,零基础也能轻松上手深度学习,一起来看看吧?另:文末有福利,一定要看完呦~
import paddle
from paddle.vision.transforms import Compose, Normalize
from paddle.vision.datasets import MNIST
import paddle.nn as nn
# 数据预处理,这里用到了归一化
transform = Compose([Normalize(mean=[127.5],
std=[127.5],
data_format='CHW')])
# 数据加载,在训练集上应用数据预处理的操作
train_dataset = paddle.vision.datasets.MNIST(mode='train', transform=transform)
test_dataset = paddle.vision.datasets.MNIST(mode='test', transform=transform)
# 模型组网
mnist = nn.Sequential(
nn.Flatten(),
nn.Linear(784, 512),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(512, 10))
# 模型封装,用 Model 类封装
model = paddle.Model(mnist)
# 模型配置:为模型训练做准备,设置优化器,损失函数和精度计算方式
model.prepare(optimizer=paddle.optimizer.Adam(parameters=model.parameters()),
loss=nn.CrossEntropyLoss(),
metrics=paddle.metric.Accuracy())
# 模型训练,
model.fit(train_dataset,
epochs=10,
batch_size=64,
verbose=1)
# 模型评估,
model.evaluate(test_dataset, verbose=1)
# 模型保存,
model.save('model_path')
from paddle.io import Dataset
class MyDataset(Dataset):
"""
步骤一:继承 paddle.io.Dataset 类
"""
def __init__(self):
"""
步骤二:实现构造函数,定义数据读取方式,划分训练和测试数据集
"""
super(MyDataset, self).__init__()
self.data = [
['traindata1', 'label1'],
['traindata2', 'label2'],
['traindata3', 'label3'],
['traindata4', 'label4'],
]
def __getitem__(self, index):
"""
步骤三:实现__getitem__方法,定义指定 index 时如何获取数据,并返回单条数据(训练数据,对应的标签)
"""
data = self.data[index][0]
label = self.data[index][1]
return data, label
def __len__(self):
"""
步骤四:实现__len__方法,返回数据集总数目
"""
return len(self.data)
# 测试定义的数据集
train_dataset = MyDataset()
print('=============train dataset=============')
for data, label in train_dataset:
print(data, label)
# Sequential 形式组网
mnist = nn.Sequential(
nn.Flatten(),
nn.Linear(784, 512),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(512, 10)
)
# SubClass 方式组网
class Mnist(nn.Layer):
def __init__(self):
super(Mnist, self).__init__()
self.flatten = nn.Flatten()
self.linear_1 = nn.Linear(784, 512)
self.linear_2 = nn.Linear(512, 10)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(0.2)
def forward(self, inputs):
y = self.flatten(inputs)
y = self.linear_1(y)
y = self.relu(y)
y = self.dropout(y)
y = self.linear_2(y)
return y
import paddle
from paddle.vision.models import resnet18
# 方式一: 一行代码直接使用
resnetresnet = resnet18()
# 方式二: 作为主干网络进行二次开发
class FaceNet(paddle.nn.Layer):
def __init__(self, num_keypoints=15, pretrained=False):
super(FaceNet, self).__init__()
self.backbone = resnet18(pretrained)
self.outLayer1 = paddle.nn.Linear(1000, 512)
self.outLayer2 = paddle.nn.Linear(512, num_keypoints*2)
def forward(self, inputs):
out = self.backbone(inputs)
out = self.outLayer1(out)
out = self.outLayer2(out)
return out
mnist = nn.Sequential(
nn.Flatten(),
nn.Linear(784, 512),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(512, 10))
# 模型封装,用 Model 类封装
model = paddle.Model(mnist)
model.summary()
---------------------------------------------------------------------------
Layer (type) Input Shape Output Shape Param #
===========================================================================
Flatten-795 [[32, 1, 28, 28]] [32, 784] 0
Linear-5 [[32, 784]] [32, 512] 401,920
ReLU-3 [[32, 512]] [32, 512] 0
Dropout-3 [[32, 512]] [32, 512] 0
Linear-6 [[32, 512]] [32, 10] 5,130
===========================================================================
Total params: 407,050
Trainable params: 407,050
Non-trainable params: 0
---------------------------------------------------------------------------
Input size (MB): 0.10
Forward/backward pass size (MB): 0.57
Params size (MB): 1.55
Estimated Total Size (MB): 2.22
---------------------------------------------------------------------------
{'total_params': 407050, 'trainable_params': 407050}
# 将网络结构用 Model 类封装成为模型
model = paddle.Model(mnist)
# 为模型训练做准备,设置优化器,损失函数和精度计算方式
model.prepare(optimizer=paddle.optimizer.Adam(parameters=model.parameters()),
loss=paddle.nn.CrossEntropyLoss(),
metrics=paddle.metric.Accuracy())
# 启动模型训练,指定训练数据集,设置训练轮次,设置每次数据集计算的批次大小,设置日志格式
model.fit(train_dataset,
epochs=10,
batch_size=64,
verbose=1)
# 启动模型评估,指定数据集,设置日志格式
model.evaluate(test_dataset, verbose=1)
# 启动模型测试,指定测试集
Model.predict(test_dataset)
# 模型封装,用 Model 类封装
model = paddle.Model(mnist)
# 模型配置:为模型训练做准备,设置优化器,损失函数和精度计算方式
model.prepare(optimizer=paddle.optimizer.Adam(parameters=model.parameters()),
loss=nn.CrossEntropyLoss(),
metrics=paddle.metric.Accuracy())
# 构建训练集数据加载器
train_loader = paddle.io.DataLoader(train_dataset, batch_size=64, shuffle=True)
# 使用 train_batch 完成训练
for batch_id, data in enumerate(train_loader()):
model.train_batch([data[0]],[data[1]])
# 构建测试集数据加载器
test_loader = paddle.io.DataLoader(test_dataset, places=paddle.CPUPlace(), batch_size=64, shuffle=True)
# 使用 eval_batch 完成验证
for batch_id, data in enumerate(test_loader()):
model.eval_batch([data[0]],[data[1]])
# 使用 predict_batch 完成预测
for batch_id, data in enumerate(test_loader()):
model.predict_batch([data[0]])
# CPU 版
$ pip3 install paddlepaddle==2.0.0rc0 -i https://mirror.baidu.com/pypi/simple
# GPU 版
$ pip3 install paddlepaddle_gpu==2.0.0rc0 -i https://mirror.baidu.com/pypi/simple
本文为机器之心发布,转载请联系本公众号获得授权。
✄------------------------------------------------
加入机器之心(全职记者 / 实习生):hr@jiqizhixin.com
投稿或寻求报道:content@jiqizhixin.com
广告 & 商务合作:bd@jiqizhixin.com