這邊都採用變動學習率CosineAnnealing。示範我這邊T_max只用6。
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=6, eta_min=0, last_epoch=-1)
batch_size = 50
base_lr = 0.6
max_epoch = 20
model_path = './tttdensenet.pth'
# 將圖片轉為tensor
transforms = Compose([ToTensor()])
train_dataset = CaptchaData('./pic_train2', './answer/answer_train_v2.csv',transform=transforms)
# dataloader讀取batchsize資料
train_data_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=0,shuffle=True, drop_last=True)
test_data = CaptchaData('./pic_test2','./answer/answer_test_v2.csv',transform=transforms)
test_data_loader = DataLoader(test_data, batch_size=batch_size,num_workers=0, shuffle=True, drop_last=True)
# 讀取預訓練模型densenet201
cnn = models.densenet201(num_classes=180)
if torch.cuda.is_available():
cnn.cuda()
if restor:
cnn.load_state_dict(torch.load(model_path))
# 採用SGD + momentum當優化器
optimizer = torch.optim.SGD(cnn.parameters(), lr=base_lr, momentum=0.9)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=6, eta_min=0, last_epoch=-1)
criterion = nn.MultiLabelSoftMarginLoss()
acc_history_train = []
loss_history_train = []
loss_history_test = []
acc_history_test = []
for epoch in range(max_epoch):
start_ = time.time()
loss_history = []
acc_history = []
cnn.train()
for img, target in train_data_loader:
img = Variable(img)
target = Variable(target)
if torch.cuda.is_available():
img = img.cuda()
target = target.cuda()
output = cnn(img)
loss = criterion(output, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
acc = calculat_acc(output, target)
acc_history.append(float(acc))
loss_history.append(float(loss))
# 要加上這段scheduler.step()
scheduler.step()
print('train_loss: {:.4}|train_acc: {:.4}'.format(
torch.mean(torch.Tensor(loss_history)),
torch.mean(torch.Tensor(acc_history)),
))
acc_history_train.append((torch.mean(torch.Tensor(acc_history))).float())
loss_history_train.append((torch.mean(torch.Tensor(loss_history))).float())
loss_history = []
acc_history = []
cnn.eval()
for img, target in test_data_loader:
img = Variable(img)
target = Variable(target)
if torch.cuda.is_available():
img = img.cuda()
target = target.cuda()
output = cnn(img)
acc = calculat_acc(output, target)
acc_history.append(float(acc))
loss_history.append(float(loss))
print('test_loss: {:.4}|test_acc: {:.4}'.format(
torch.mean(torch.Tensor(loss_history)),
torch.mean(torch.Tensor(acc_history)),
))
acc_history_test.append((torch.mean(torch.Tensor(acc_history))).float())
loss_history_test.append((torch.mean(torch.Tensor(loss_history))).float())
print('epoch: {}|time: {:.4f}'.format(epoch, time.time() - start_))
print("========================================")
torch.save(cnn.state_dict(), model_path)
# 畫出acc學習曲線
acc = acc_history_train
epoches = range(1, len(acc) + 1)
val_acc = acc_history_test
plt.plot(epoches, acc, 'b', label='Training acc')
plt.plot(epoches, val_acc, 'r', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend(loc='lower right')
plt.grid()
# 儲存acc學習曲線
plt.savefig('./acc.png')
plt.show()
# 畫出loss學習曲線
loss = loss_history_train
val_loss = loss_history_test
plt.plot(epoches, loss, 'b', label='Training loss')
plt.plot(epoches, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.legend(loc='upper right')
plt.grid()
# 儲存loss學習曲線
plt.savefig('loss.png')
plt.show()
Adam
只有將優化器改為Adam,其餘都一樣。
# 需要修改的只有這個code,將optimizer改成Adam就好。
optimizer = torch.optim.Adam(cnn.parameters(), lr=base_lr)
學習曲線
搭配CosineAnnealing之後,準確度稍微提升。
Adadelta
optimizer = torch.optim.Adadelta( cnn.parameters(),lr=base_lr, rho=0.9, eps=1e-06, weight_decay=0)
學習曲線
原本用固定學習率時收斂就較慢,搭配cosineAnnealing顯得收斂更慢,建議可以用學習率衰減的方式,另外就是因為節省時間,這邊都只有練20或40個epoches,當練多一點時,可以將T_max調整大一點,相信準確度和收斂狀況會提升較多。
Adagrad
optimizer = torch.optim.Adagrad(cnn.parameters(), lr=0.01, lr_decay=0, weight_decay=0)
學習曲線
準確率有較固定學習率還低一點。但因這是示範而已,正常應該多跑幾個epoches且多嘗試初始學習率和參數,都會有所幫助。
Adamax
optimizer = torch.optim.Adamax(cnn.parameters(), lr=0.002, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
optimizer = torch.optim.ASGD(cnn.parameters(), lr=0.8, lambd=0.0001, alpha=0.75, t0=1000000.0, weight_decay=0)
學習曲線
可以發現ASGD收斂原本就較緩慢,這次設置了60個epoches,loss值下降更緩慢,並沒有比固定學習率效果來的好。可以試試看學習率衰減來更新,而不是用循環的學習率更新方式。
RMSprop
全都照預設去設置。
optimizer = torch.optim.RMSprop(cnn.parameters(), lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False)
可以發現這個優化器,準確度很震盪,但搭配後訓練出一個不錯的準確度。
SGD+momentum
Adam
Adadelta
Adagrad
Adamax
ASGD
RMSprop
搭配CosineAnnealing(餘弦退火)的更新學習率後,有些反而提升準確度,有些反而下降,建議可以多嘗試,小弟我本身在做玉山比賽的話是用SGD+momentum搭配CosineAnnealing,效果還不錯。
優化器和學習率更新,搭配不同的資料集時,可能會有不同的效果,多嘗試,會練出屬於自己最好的模型。