目前徹底卡關,怎麼訓練都訓練不起來,試了很多種其他的方法,我看別人的教學都訓練得起來,但我訓練的結果都是啥都不幹,目前找不出原因,吃吐的感覺。
目前的程式碼,還在debug
import os
import pandas as pd
from stable_baselines3 import A2C
import gymnasium as gym
import gym_anytrading
from gym_anytrading.envs.trading_env import Positions
from stable_baselines3.common.monitor import Monitor
from PolygonIO.PolygonIODownloader import PolygonIODownloader
import talib
from tqdm import tqdm # 確保導入的是 tqdm 函數,而不是模組
import matplotlib.pyplot as plt
import plotly.graph_objs as go
FEATURE_COLUMNS = [
"macd",
"macd_signal",
"macd_hist",
"boll_ub",
"boll_lb",
"rsi_30",
"dx_30",
"close_30_sma",
"close_60_sma",
"atr_14",
"obv",
"adx_14",
"ema_20",
"ema_50",
"mfi_14",
"willr_14",
"cci_14",
"stoch_k",
"stoch_d",
]
def extract_custom_features(df):
# Add technical indicators
df["macd"], df["macd_signal"], df["macd_hist"] = talib.MACD(
df["close"], fastperiod=12, slowperiod=26, signalperiod=9
)
df["boll_ub"], df["boll_lb"], _ = talib.BBANDS(
df["close"], timeperiod=20, nbdevup=2, nbdevdn=2, matype=0
)
df["rsi_30"] = talib.RSI(df["close"], timeperiod=30)
df["close_30_sma"] = talib.SMA(df["close"], timeperiod=30)
df["close_60_sma"] = talib.SMA(df["close"], timeperiod=60)
df["atr_14"] = talib.ATR(df["high"], df["low"], df["close"], timeperiod=14)
df["obv"] = talib.OBV(df["close"], df["volume"])
df["adx_14"] = talib.ADX(df["high"], df["low"], df["close"], timeperiod=14)
df["ema_20"] = talib.EMA(df["close"], timeperiod=20)
df["ema_50"] = talib.EMA(df["close"], timeperiod=50)
df["mfi_14"] = talib.MFI(
df["high"], df["low"], df["close"], df["volume"], timeperiod=14
)
df["willr_14"] = talib.WILLR(df["high"], df["low"], df["close"], timeperiod=14)
df["cci_14"] = talib.CCI(df["high"], df["low"], df["close"], timeperiod=14)
df["stoch_k"], df["stoch_d"] = talib.STOCH(
df["high"],
df["low"],
df["close"],
fastk_period=14,
slowk_period=3,
slowd_period=3,
)
df = df.fillna(0)
return df
def plot_html_with_annotations(trace_list, time_ind, annotations):
fig = go.Figure()
# Add all traces to the figure
for trace in trace_list:
fig.add_trace(trace)
# Add annotations for profit/loss at sell points
fig.update_layout(annotations=annotations)
# Customize layout
fig.update_layout(
legend=dict(
x=0,
y=1,
traceorder="normal",
font=dict(family="sans-serif", size=15, color="black"),
bgcolor="White",
bordercolor="white",
borderwidth=2,
),
title={
"text": "Price with Buy/Sell Signals and Profit/Loss",
"y": 0.85,
"x": 0.5,
"xanchor": "center",
"yanchor": "top",
},
paper_bgcolor="rgba(1,1,0,0)",
plot_bgcolor="rgba(1, 1, 0, 0)",
yaxis_title="Price",
xaxis={
"type": "date",
"tick0": time_ind[0],
"tickmode": "linear",
"dtick": 86400000.0 * 80, # 80 days interval
},
)
fig.show()
# Assuming the data `df`, `long_ticks`, `short_ticks`, and `profit_diffs` are available
def plot_price_with_actions_and_profits(df, long_ticks, short_ticks, profit_diffs):
"""
使用 Plotly 繪製價格與每筆交易的利潤圖形,並輸出為 HTML 格式。
Parameters:
- df: 包含價格數據的 DataFrame,必須包含 'Close' 和 'date' 列。
- long_ticks: 買入信號(Long)的位置。
- short_ticks: 賣出信號(Short)的位置。
- profit_diffs: 每筆交易的利潤差異。
"""
# Initialize the trace list
trace_list = []
# Plot price line
price_trace = go.Scatter(
x=df["date"], y=df["Close"], mode="lines", name="Price", line=dict(color="blue")
)
trace_list.append(price_trace)
# Add buy signals (green triangle-up)
buy_trace = go.Scatter(
x=df["date"].iloc[long_ticks],
y=df["Close"].iloc[long_ticks],
mode="markers",
name="Buy Signal",
marker=dict(symbol="triangle-up", size=10, color="green"),
)
trace_list.append(buy_trace)
# Add sell signals (red triangle-down)
sell_trace = go.Scatter(
x=df["date"].iloc[short_ticks],
y=df["Close"].iloc[short_ticks],
mode="markers",
name="Sell Signal",
marker=dict(symbol="triangle-down", size=10, color="red"),
)
trace_list.append(sell_trace)
# Add annotations for profits/losses
annotations = []
for idx in long_ticks:
profit = 0
if idx < len(profit_diffs):
profit = profit_diffs[idx]
if profit == 0:
continue
color = "green" if profit > 0 else "red"
annotations.append(
dict(
x=df["date"].iloc[idx],
y=df["Close"].iloc[idx],
xref="x",
yref="y",
text=f"{profit*1000000:.2f}",
showarrow=True,
arrowhead=2,
ax=0,
ay=-30,
font=dict(color=color),
)
)
for idx in short_ticks:
profit = 0
if idx < len(profit_diffs):
profit = profit_diffs[idx]
if profit == 0:
continue
color = "green" if profit > 0 else "red"
annotations.append(
dict(
x=df["date"].iloc[idx],
y=df["Close"].iloc[idx],
xref="x",
yref="y",
text=f"{profit:.2f}",
showarrow=True,
arrowhead=2,
ax=0,
ay=-30,
font=dict(color=color),
)
)
# Generate the plot in HTML format with annotations
plot_html_with_annotations(trace_list, df["date"], annotations)
def main():
from finrl.meta.preprocessor.preprocessors import data_split
DATA_START_DATE = "2017-01-01"
DATA_END_DATE = "2024-10-09"
TRAIN_START_DATE = "2024-03-01"
TRAIN_END_DATE = "2024-10-09"
TRADE_START_DATE = "2024-03-01"
TRADE_END_DATE = "2024-10-09"
TRAINED_MODEL_DIR = f"BTCUSD_15_minute_{TRAIN_START_DATE}_{TRAIN_END_DATE}_ForexEnv"
TRAINED_MODEL_DIR = os.path.join("trained_models", TRAINED_MODEL_DIR)
os.makedirs(TRAINED_MODEL_DIR, exist_ok=True)
# Fetch data from PolygonIO
df_ohlcv = PolygonIODownloader().fetch_ohlcv(
["X:BTCUSD"], DATA_START_DATE, DATA_END_DATE, "minute", 15
)
df_ohlcv = df_ohlcv.rename(columns={"timestamp": "date", "ticker": "tic"})
df = df_ohlcv.sort_values(["date", "tic"]).reset_index(drop=True)
# Adding custom features to the dataset
# df = extract_custom_features(df)
df = df.rename(
columns={
"close": "Close", # 確認 'close' 被正確映射為 'Close'
"high": "High",
"low": "Low",
"open": "Open",
"volume": "Volume", # 確認 'volume' 被正確映射
}
)
train = data_split(df, TRAIN_START_DATE, TRAIN_END_DATE)
trade = data_split(df, TRADE_START_DATE, TRADE_END_DATE)
df = trade
print(f"Training Data Length: {len(train)}")
print(f"Trading Data Length: {len(trade)}")
# Define the Forex environment using gym-anytrading's ForexEnv
window_size = 256 # Set appropriate window size
frame_bound = (window_size, len(df)) # Set frame boundaries based on data length
# Initialize Forex environment
env = Monitor(
gym.make("forex-v0", df=df, window_size=window_size, frame_bound=frame_bound),
filename="monitor_log",
)
# Initialize and train A2C model
model = A2C("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=1000000)
# 開始回測
obs, info = env.reset()
actions = []
total_profits = [] # 儲存每一步的總利潤
profit_diffs = []
total_reward = 0
for step in tqdm(range(len(df)), desc="Backtesting Progress"):
action, _states = model.predict(obs)
actions.append(action)
obs, reward, done, truncated, info = env.step(action)
# 保存每一步的 total_profit
total_reward += reward # 累積每一步的 reward
total_profits.append(info["total_profit"])
if step > 0 and actions[step] != actions[step - 1]:
# print(f"Step: {step}, Action: {action}, Reward: {reward}, Info: {info}")
profit_diffs.append(total_profits[step] - total_profits[step - 1])
# print(f"Profit Diff[{step}]: {profit_diffs[-1]}")
else:
profit_diffs.append(0)
if done or truncated:
break
# 回測結束後印出總獎勵和總利潤
print(f"Total Reward: {total_reward}")
print(f"Final Total Profit: {total_profits[-1]}")
import numpy as np
position_history = env.get_wrapper_attr("get_position_history")()
window_ticks = np.arange(len(position_history))
prices = env.prices
short_ticks = []
long_ticks = []
for i, tick in enumerate(window_ticks):
if i == 0:
continue
if position_history[i] == position_history[i - 1]:
continue
if position_history[i] == Positions.Short:
short_ticks.append(tick)
elif position_history[i] == Positions.Long:
long_ticks.append(tick)
### cut off the last few steps to match the length of the total_profits
# backtestdf = df.iloc[: len(total_profits)].copy()
# position_history = position_history[: len(total_profits)].copy()
# long_ticks = long_ticks[: len(total_profits)].copy()
# short_ticks = short_ticks[: len(total_profits)].copy()
# profit_diffs = profit_diffs[: len(total_profits)].copy()
plot_price_with_actions_and_profits(df, long_ticks, short_ticks, profit_diffs)
# # 繪製結果
# plot_price_with_actions_and_profits(
# backtestdf, long_ticks, short_ticks, profit_diffs
# )
input("Press Enter to exit...")
if __name__ == "__main__":
main()