簡介
Residual / Bottleneck Block
短路連線(shortcuts)
ResNet解決了甚麼問題?
ResNet的出現,解決了因為網路層數加深,而訓練效果變差的問題。為何訓練不起來,當網路層數越深,梯度爆炸或梯度消失的機率發生就越高,透過Batch normalization的方法,有緩解了這個問題。
但由於上述狀況仍無法解決此問題,論文提出了恆等映射(Identity mapping)的方法,增加網路層數,但訓練的誤差不會增加。詳細可以參考這篇文章(傳送門)。
import的套件
import torch
import torch.nn as nn
from torch.autograd import Variable
from dataset import CaptchaData
from torch.utils.data import DataLoader
from torchvision.transforms import Compose, ToTensor,ColorJitter,RandomRotation,RandomAffine,Resize,Normalize,CenterCrop,RandomApply,RandomErasing
import torchvision.models as models
import time
import copy
dataset載入以及DataLoader
train_dataset = CaptchaData('./mask_2/train',
transform=transforms)
train_data_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=0,
shuffle=True, drop_last=True,pin_memory=True)
test_data = CaptchaData('./mask_2/test',
transform=transforms_1)
test_data_loader = DataLoader(test_data, batch_size=batch_size,
num_workers=0, shuffle=True, drop_last=True,pin_memory=True)
transforms的設置
transform_set = [ RandomRotation(degrees=10,fill=(255, 255, 255)),
RandomAffine(degrees=(-10,+10), translate=(0.2, 0.2), fillcolor=(255, 255, 255)),
RandomAffine(degrees=(-10,+10),scale=(0.8, 0.8),fillcolor=(255, 255, 255)),
RandomAffine(degrees=(-10,+10),shear=(0, 0, 0, 20),fillcolor=(255, 255, 255))]
transforms = Compose([RandomApply(transform_set, p=0.7),
ToTensor(),
Normalize((0.5,), (0.5,))
])
transforms_1 = Compose([
ToTensor(),
Normalize((0.5,), (0.5,))
])
計算準確度
def calculat_acc(output, target):
output, target = output.view(-1, 800), target.view(-1, 800)
output = nn.functional.softmax(output, dim=1)
output = torch.argmax(output, dim=1)
target = torch.argmax(target, dim=1)
output, target = output.view(-1, 1), target.view(-1, 1)
correct_list = []
for i, j in zip(target, output):
if torch.equal(i, j):
correct_list.append(1)
else:
correct_list.append(0)
acc = sum(correct_list) / len(correct_list)
return acc
預訓練模型
model = models.resnet152(num_classes=800)
儲存best_model(test_score最高的模型)
if epoch > min_epoch and acc_best <= acc:
acc_best = acc
best_model = copy.deepcopy(model)
完整的code
import torch
import torch.nn as nn
from torch.autograd import Variable
from dataset import CaptchaData
from torch.utils.data import DataLoader
from torchvision.transforms import Compose, ToTensor,ColorJitter,RandomRotation,RandomAffine,Resize,Normalize,CenterCrop,RandomApply,RandomErasing
import torchvision.models as models
import time
import copy
import matplotlib.pyplot as plt
batch_size = 32
base_lr = 0.01
max_epoch = 25
model_path = './resnet_mask2.pth'
restor = False
def calculat_acc(output, target):
output, target = output.view(-1, 800), target.view(-1, 800)
output = nn.functional.softmax(output, dim=1)
output = torch.argmax(output, dim=1)
target = torch.argmax(target, dim=1)
output, target = output.view(-1, 1), target.view(-1, 1)
correct_list = []
for i, j in zip(target, output):
if torch.equal(i, j):
correct_list.append(1)
else:
correct_list.append(0)
acc = sum(correct_list) / len(correct_list)
return acc
def train():
acc_best = 0
best_model = None
min_epoch = 1
transform_set = [ RandomRotation(degrees=10,fill=(255, 255, 255)),
RandomAffine(degrees=(-10,+10), translate=(0.2, 0.2), fillcolor=(255, 255, 255)),
RandomAffine(degrees=(-10,+10),scale=(0.8, 0.8),fillcolor=(255, 255, 255)),
RandomAffine(degrees=(-10,+10),shear=(0, 0, 0, 20),fillcolor=(255, 255, 255))
]
transforms = Compose([ ToTensor(),
RandomApply(transform_set, p=0.7),
Normalize((0.5,), (0.5,))
])
transforms_1 = Compose([
ToTensor(),
Normalize((0.5,), (0.5,))
])
train_dataset = CaptchaData(r'C:\Users\Frank\PycharmProjects\practice\mountain\清洗標籤final\train_nomask',
transform=transforms)
train_data_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=0,
shuffle=True, drop_last=True,pin_memory=True)
test_data = CaptchaData(r'C:\Users\Frank\PycharmProjects\practice\mountain\清洗標籤final\test_nomask',
transform=transforms_1)
test_data_loader = DataLoader(test_data, batch_size=batch_size,
num_workers=0, shuffle=True, drop_last=True,pin_memory=True)
print('load.........................')
model = models.resnet152(num_classes=800)
if torch.cuda.is_available():
model.cuda()
if restor:
model.load_state_dict(torch.load(model_path))
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.8)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max =10 , eta_min=0, last_epoch=-1, verbose=False)
criterion = nn.CrossEntropyLoss()
acc_history_train = []
loss_history_train = []
loss_history_test = []
acc_history_test = []
for epoch in range(max_epoch):
start_ = time.time()
loss_history = []
acc_history = []
model.train()
for img, target in train_data_loader:
img = Variable(img)
target = Variable(target)
if torch.cuda.is_available():
img = img.cuda()
target = target.cuda()
target = torch.tensor(target, dtype=torch.long)
output = model(img)
loss = criterion(output, torch.max(target,1)[1])
optimizer.zero_grad()
loss.backward()
optimizer.step()
acc = calculat_acc(output, target)
acc_history.append(float(acc))
loss_history.append(float(loss))
scheduler.step()
print('train_loss: {:.4}|train_acc: {:.4}'.format(
torch.mean(torch.Tensor(loss_history)),
torch.mean(torch.Tensor(acc_history)),
))
acc_history_train.append((torch.mean(torch.Tensor(acc_history))).float())
loss_history_train.append((torch.mean(torch.Tensor(loss_history))).float())
loss_history = []
acc_history = []
model.eval()
for img, target in test_data_loader:
img = Variable(img)
target = Variable(target)
if torch.cuda.is_available():
img = img.cuda()
target = target.cuda()
output = model(img)
acc = calculat_acc(output, target)
if epoch > min_epoch and acc_best <= acc:
acc_best = acc
best_model = copy.deepcopy(model)
acc_history.append(float(acc))
loss_history.append(float(loss))
print('test_loss: {:.4}|test_acc: {:.4}'.format(
torch.mean(torch.Tensor(loss_history)),
torch.mean(torch.Tensor(acc_history)),
))
acc_history_test.append((torch.mean(torch.Tensor(acc_history))).float())
loss_history_test.append((torch.mean(torch.Tensor(loss_history))).float())
print('epoch: {}|time: {:.4f}'.format(epoch, time.time() - start_))
print("==============================================")
torch.save(model.state_dict(), model_path)
modelbest = best_model
torch.save(modelbest, './resnet152_mask2.pth')
# 畫出acc學習曲線
acc = acc_history_train
epoches = range(1, len(acc) + 1)
val_acc = acc_history_test
plt.plot(epoches, acc, 'b', label='Training acc')
plt.plot(epoches, val_acc, 'r', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend(loc='lower right')
plt.grid()
# 儲存acc學習曲線
plt.savefig('./acc_ResNet152.png')
plt.show()
# 畫出loss學習曲線
loss = loss_history_train
val_loss = loss_history_test
plt.plot(epoches, loss, 'b', label='Training loss')
plt.plot(epoches, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.legend(loc='upper right')
plt.grid()
# 儲存loss學習曲線
plt.savefig('./loss_ResNet152.png')
plt.show()
if __name__ == "__main__":
train()
pass
學習曲線
準確度
總結