因為當下每執行一次程式碼會跑出的東西太多了有點雜,所以我對程式碼進行了修改,我希望只顯示最後一個spilt的圖和最後的Predictions vs Actual圖,並顯示時間長一點的資金曲線圖。
所以先改進了抓資料的時間
def fetch_crypto_data(symbol="BTC/USDT", timeframe="1h", limit_total=8000, force_reload=False):
先檢查快取檔
if os.path.exists(filename) and not force_reload:
print(f"✅ 讀取快取檔案 {filename}")
df = pd.read_csv(filename)
如果有了就直接讀沒有的話再去抓
ohlcv = exchange.fetch_ohlcv(symbol, timeframe=timeframe, since=since, limit=limit)
接下來將抓到的資料計算成技術指標,讓模型學習市場特徵。
再來建立訓練用的特徵和標籤。
最後來做模型訓練與回測。
視覺化的預測結果
plt.figure(figsize=(12,6))
plt.plot(df_test["timestamp"], df_test["close"], label="Close Price", color="black", alpha=0.6)
plt.scatter(df_test["timestamp"][df_test["Pred"] == 1], df_test["close"][df_test["Pred"] == 1], label="Pred Up", color="green", marker="^")
plt.scatter(df_test["timestamp"][df_test["Pred"] == 0], df_test["close"][df_test["Pred"] == 0], label="Pred Down", color="red", marker="v")
plt.title(title)
plt.xlabel("Time")
plt.ylabel("Price")
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
模擬策略在歷史資料中實際交易,並畫出資金曲線。
balance = initial_capital
equity_curve = [balance]
trades = []
position, entry_price, entry_capital = None, 0, 0
for i in range(1, len(df_test)):
price_now = df_test["close"].iloc[i]
rsi = df_test["RSI"].iloc[i]
pred = df_test["Pred"].iloc[i - 1]
atr = df_test["ATR"].iloc[i]
if position is None:
if pred == 1 and rsi > 50:
position = "long"
entry_price = price_now
entry_capital = balance * position_size_ratio
balance -= entry_capital * fee_rate
elif pred == 0 and rsi < 50:
position = "short"
entry_price = price_now
entry_capital = balance * position_size_ratio
balance -= entry_capital * fee_rate
elif position == "long":
change = (price_now - entry_price) / entry_price
stop_loss = -atr_multiplier * atr / entry_price
take_profit = take_profit_ratio
if change <= stop_loss or change >= take_profit:
pnl = entry_capital * change
balance += entry_capital + pnl - entry_capital * fee_rate
trades.append(pnl / entry_capital)
position = None
elif position == "short":
change = (entry_price - price_now) / entry_price
stop_loss = -atr_multiplier * atr / entry_price
take_profit = take_profit_ratio
if change <= stop_loss or change >= take_profit:
pnl = entry_capital * change
balance += entry_capital + pnl - entry_capital * fee_rate
trades.append(pnl / entry_capital)
position = None
equity_curve.append(balance)
if len(equity_curve) < len(df_test):
equity_curve += [balance] * (len(df_test) - len(equity_curve))
df_test["Equity"] = equity_curve
total_return = (balance / initial_capital - 1) * 100
max_drawdown = ((df_test["Equity"].cummax() - df_test["Equity"]) / df_test["Equity"].cummax()).max() * 100
win_rate = (sum([1 for t in trades if t > 0]) / len(trades)) * 100 if trades else 0
plt.figure(figsize=(12, 6))
plt.plot(df_test["timestamp"], df_test["Equity"], label="Equity Curve", color="blue")
plt.axhline(initial_capital, linestyle="--", color="gray", alpha=0.7)
plt.title("Backtest Equity Curve (Professional Edition Strategy)")
plt.xlabel("Time")
plt.ylabel("Capital (USDT)")
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
用 時間序列交叉驗證 (Walk-forward) 訓練模型,逐段測試。
分成n_split段,每段都用前面的資料訓練,下一段測試,計算每段accuracy,最後一次的模型結果被保留。
def train_ml_model_walkforward(X, y, df, n_splits=5):
tscv = TimeSeriesSplit(n_splits=n_splits)
scores = []
last_model, last_test_index, last_y_true, last_y_pred = None, None, None, None
for fold, (train_index, test_index) in enumerate(tscv.split(X)):
X_train, X_test = X.iloc[train_index], X.iloc[test_index]
y_train, y_test = y.iloc[train_index], y.iloc[test_index]
model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
scores.append(acc)
print(f"Fold {fold+1}/{n_splits} Accuracy: {acc:.4f}")
last_model, last_test_index, last_y_true, last_y_pred = model, test_index, y_test, y_pred
print(f"\nAverage Accuracy: {np.mean(scores):.4f}")
plot_predictions(df, last_y_true, last_y_pred, last_test_index, "Predictions vs Actual (Latest Split)")
return last_model, last_test_index, last_y_true, last_y_pred
用最新的400根資料預測市場走勢,顯示上漲下降的箭頭,
呼叫backtest_straegy()進行最終模擬交易。
def predict_on_latest(df, model, n_points=400, features=None):
n_points = min(n_points, len(df))
start_idx = len(df) - n_points
X_latest = df[features].iloc[start_idx:].reset_index(drop=True)
y_true = (df['close'].shift(-1) / df['close'] - 1).iloc[start_idx:].fillna(0)
y_pred = model.predict(X_latest)
plt.figure(figsize=(12,6))
plt.plot(df['timestamp'].iloc[start_idx:], df['close'].iloc[start_idx:], label='Close Price', color='black')
plt.scatter(df['timestamp'].iloc[start_idx:][y_pred==1], df['close'].iloc[start_idx:][y_pred==1], label='Pred Up', color='green', marker='^')
plt.scatter(df['timestamp'].iloc[start_idx:][y_pred==0], df['close'].iloc[start_idx:][y_pred==0], label='Pred Down', color='red', marker='v')
plt.title('Latest Predictions vs Actual (Full Model)')
plt.xticks(rotation=45)
plt.legend()
plt.tight_layout()
plt.show()
absolute_test_index = list(range(start_idx, len(df)))
backtest_strategy(df, y_true=y_true.astype(int), y_pred=y_pred, test_index=absolute_test_index)