因為在金融數據預測裡,時間序列的資料不能隨機打亂,否則會發生「未來資訊洩漏到過去」的情況,導致模型表現看似很好,但實際上完全不符合交易場景,所以將原來的ML訓練模型改成Walk Forward Validation 版本,跟之前的差別在於原來的只有一次切分,調整後有多次切分
def train_ml_model_walkforward(X, y, n_splits=5):
"""
使用 Walk Forward Validation 訓練模型
:param X: 特徵資料
:param y: 標籤
:param n_splits: 分割次數 (k-fold)
"""
tscv = TimeSeriesSplit(n_splits=n_splits)
all_scores = []
split_num = 1
for train_index, test_index in tscv.split(X):
X_train, X_test = X.iloc[train_index], X.iloc[test_index]
y_train, y_test = y.iloc[train_index], y.iloc[test_index]
# 建立模型
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# 評估
acc = accuracy_score(y_test, y_pred)
all_scores.append(acc)
print(f"=== Split {split_num} ===")
print(f"Train size: {len(train_index)}, Test size: {len(test_index)}")
print(f"Accuracy: {acc:.4f}")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("\n")
split_num += 1
print("平均準確率:", sum(all_scores) / len(all_scores))
return all_scores
# -------------------------
# 主程式
# -------------------------
if __name__ == "__main__":
df = fetch_crypto_data("BTC/USDT", "1h", 500)
df = add_indicators(df)
X, y = prepare_ml_data(df)
scores = train_ml_model_walkforward(X, y, n_splits=5)