目前除了BLEU以外找不到好的衡量GPT模型訓練結果好壞的方法,不過以之前的經驗,其實跟直接算loss也差不多,所以在pretrain與sft的階段都直接使用valid_loss來區別model的好壞。
@torch.no_grad()
def valid_epoch(epoch):
global best_val_loss
losses = []
model.eval()
for _, (X, Y) in enumerate(val_loader):
X=X.to(device)
Y=Y.to(device)
with ctx:
logits, loss = model(X, Y)
losses.append(loss.item())
model.train()
val_loss=np.mean(losses)
#
logger.info('valid loss = {:.4f}'.format(val_loss))
if val_loss < best_val_loss:
best_val_loss = val_loss
logger.info('best val_loss: {} best_epoch: {} '.format(best_val_loss,epoch))
torch.save(raw_model.state_dict(),'{}/best.pth'.format(save_dir))
#
return val_loss