Pytorch深度学习实现手势数字识别项目,因为其数据规整,场景简单,是一个经典的CNN模型入门项目,本文从代码角度,将项目分为四步步骤:数据加载、模型设计、训练模型、加载测试,以下是代码具体代码实现。
代码示例
1、数据加载
from torch.utils import data import os, glob from PIL import Image class GestureDataset(data.Dataset): def __init__(self, root, train=True, transform=None): self.data = self._read_file(root, train) self.transfrom = transform def __len__(self): return len(self.data) def __getitem__(self, index): file_path, label = self.data[index] img = Image.open(file_path) if self.transfrom: img = self.transfrom(img) return img, label # 读取文件 def _read_file(self, root, train): dir_name = 'train' if train else 'test' dir_path = os.path.join(root, dir_name) lst = [] if train: for dir in self._list_file(dir_path): for file_path in self._list_file(dir): # ./datas/train/9/IMG_5805.JPG label = file_path.split('/')[-2] lst.append((file_path, label)) else: for file_path in self._list_file(dir_path): # ./datas/test/example_9.JPG label = file_path[-5] lst.append((file_path, label)) return lst def _list_file(self, dir_path): return glob.glob(dir_path + '/*')2、模型设计
import torch import torch.nn as nn class GestureModule(nn.Module): def __init__(self, mudule_path): self.mudule_path = mudule_path super().__init__() self.conv1 = nn.Sequential( nn.Conv2d(3, 64, 9, 1, 4), #(64, 100, 100) nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2) #(64, 50, 50) ) self.conv2 = nn.Sequential( nn.Conv2d(64, 128, 5, 1, 2), #(64, 50, 50) nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2) #(128, 25, 25) ) self.conv3 = nn.Sequential( nn.Conv2d(128, 256, 5, 1, 2), #(256, 25, 25) nn.BatchNorm2d(256), nn.Dropout2d(p=0.4), nn.ReLU(), nn.MaxPool2d(2) #(256, 12, 12) ) self.out = nn.Linear(256 * 12 * 12, 10) def forward(self, x): x = self.conv1(x) x = self.conv2(x) x = self.conv3(x) x = x.view(x.shape[0], -1) return self.out(x) def save(self): torch.save(self, self.mudule_path) @staticmethod def load(mudule_path): if os.path.exists(mudule_path): return torch.load(mudule_path)
3、模型训练
import numpy as np from torchvision import transforms from torch.utils.data import DataLoader def train(module_path): #加载训练数据 trans = transforms.Compose([transforms.ToTensor(), transforms.Resize([100, 100])]) root = './datas' ds = GestureDataset(root, transform=trans) # 加载训练数据 loader = DataLoader(ds, batch_size=50, shuffle=True) # 实例化模型 module = GestureModule(module_path) # 训练模型 optimizer = torch.optim.Adam(module.parameters(), lr=0.005) loss_fn = nn.CrossEntropyLoss() for epoch in range(100): min_loss = 1000 for x, y in loader: p_y = module(x) # y = np.array(y, dtype='float') # # 需要注意长度问题 # y = F.one_hot(torch.from_numpy(y).long(), 10) # loss = loss_fn(p_y.float(), y.float()) y = torch.tensor(np.array(y, dtype='float')).long() loss = loss_fn(p_y, y) print('epoch:', epoch, ' loss:', loss.item()) # 判断loss,更新模型文件 if float(loss.item()) < min_loss: module.save() # 反向传播 optimizer.zero_grad() loss.backward() optimizer.step()
4、模型测试
if __name__ == '__main__': module_path = './module.pkl' if not os.path.exists(module_path): train(module_path) module = GestureModule.load(module_path) # 预测 trans = transforms.Compose([transforms.ToTensor(), transforms.Resize([100, 100])]) root = './datas' test_ds = GestureDataset(root, train=False, transform=trans) test_loader = DataLoader(test_ds, shuffle=True) t_cnt = len(test_loader) cnt = 0 for x, y in test_loader: pred_y = module(x) print('真实值:', y[0], '预测值:', pred_y[0].argmax().item()) if int(pred_y[0].argmax().item()) == int(y[0]): cnt += 1 print('正确率:', cnt / t_cnt)
本文为 陈华 原创,欢迎转载,但请注明出处:http://edu.ichenhua.cn/read/245