將prepare_ml_data的df = df.copy()移除
def prepare_ml_data(df, return_threshold=0.003):
df['Future_Close'] = df['close'].shift(-1)
df['Return'] = (df['Future_Close'] / df['close']) - 1
df['Target'] = (df['Return'] > return_threshold).astype(int)
features = [
'MA20', 'MA50', 'RSI', 'MACD', 'Signal',
'BB_Mid', 'BB_Upper', 'BB_Lower',
'Vol_MA20', 'Vol_MA50',
'ROC', 'ATR', 'Momentum',
'Price_vs_MA20', 'Price_vs_MA50'
]
df = df.dropna().reset_index(drop=True)
X = df[features]
y = df['Target']
return X, y, df
優勢: 提高代碼執行效率,避免不必要的數據複製。
在 walk_forward_train 函式中,當 folds 列表不為空時,加入一個檢查,確保 np.mean(scores) 不會因為 scores 為空而報錯
# -------------------------
# Step 4: 滑動視窗 Walk-forward 訓練 (修正版)
# -------------------------
def walk_forward_train(X, y, df, train_window=2000, test_window=400, step=400, model_cls=RandomForestClassifier, model_kwargs=None):
"""
sliding-window walk-forward training.
"""
if model_kwargs is None:
model_kwargs = {"n_estimators":200, "random_state":42}
n = len(X)
folds = []
scores = []
last_model = None
last_test_index = None
last_y_true = None
last_y_pred = None
start = 0
# 從 train_window 開始,確保有足夠訓練資料
while start + train_window + test_window <= n:
train_idx = list(range(start, start + train_window))
test_idx = list(range(start + train_window, start + train_window + test_window))
X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
model = model_cls(**model_kwargs)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
scores.append(acc)
# 為了簡化輸出,這裡不列印每個 Fold 的細節
# print(f"Fold {len(scores)} Accuracy: {acc:.4f}")
folds.append({
"train_index": train_idx,
"test_index": test_idx,
"model": model,
"y_true": y_test,
"y_pred": y_pred,
"acc": acc
})
# move window
start += step
# 保留最後一個 fold 的結果
last_model = model
last_test_index = test_idx
last_y_true = y_test
last_y_pred = y_pred
print(f"\nWalk-forward 訓練完成。共 {len(scores)} 個 Fold。")
# 📌 修正點:在計算平均準確率之前檢查 scores 列表是否為空
if scores:
avg_acc = np.mean(scores)
print(f"📊 平均準確率: {avg_acc:.4f}")
else:
# 如果 scores 為空,則平均準確率為 N/A
print("📊 平均準確率: N/A (無 Fold 執行)")
if not folds:
print("❌ 錯誤:Walk-forward 沒有執行任何 Fold。請檢查資料量是否足夠。")
return None, None, None, None, None
# 🎯 新增分類報告,以量化模型在類別不平衡下的性能
from sklearn.metrics import classification_report # 確保這裡也可以使用
if last_y_pred is not None and len(last_y_pred) > 0:
print("\n=== 最後一個 Walk-forward 區段的詳細分類報告 (Target=1: 預測漲幅>0.3%) ===")
print(classification_report(last_y_true, last_y_pred,
target_names=['Target=0 (Down/Small)', 'Target=1 (Up)'],
zero_division=0))
# 使用最後一個 Fold 的結果繪製預測圖
plot_predictions(df, last_y_true, last_y_pred, last_test_index, f"Predictions vs Actual (Latest {test_window} bars)")
return last_model, last_test_index, last_y_true, last_y_pred, folds
優勢: 雖然在 if not folds: 已經處理了,但更好的做法是確保 np.mean 的輸入始終有效,提高代碼的魯棒性。