昨天嘗試使用StockTradingEnvCashpenalty
,雖然經過修改後可以正常使用,然而學習的效果卻很差,常常學到最後都是完全不動直接擺爛;嘗試很久無果後,今天改回去使用美股範例使用的StockTradingEnv
。
出乎我意料之外的,學習結果還是很差,幾乎都學不起來,唯一有學到做事的就是在第一天buy
完之後就不動了,直接hold
到最後一天,令我傻眼;因此我開始懷疑是FinRL
撰寫的交易環境(StockTradingEnv
,StockTradingEnvCashpenalty
)的邏輯有些問題,但我至今還是先把它當作黑箱使用,沒有特別去解析裡面的算法是否有問題,也只剩最後幾天了,沒辦法深究,所以只好上網看了很多別人的文章。
最終我還是懷疑是StockTradingEnv
,StockTradingEnvCashpenalty
的reward
機制有問題,因為先不談論效果多好或多差,至少都應該頻繁的做出action
而不是學到最後直接啥都不幹;而且我都直接讓train
跟trade
完全用相同的資料,還是這樣,所以一定是學習的某個環節出了問題,我一直調整DRL
的參數都沒有效果,所以我只能懷疑是ENV的問題。
以下是我今天的程式碼,基本上學到後面把backtest的結果畫出來就是在第一天buy然後hold到最後一天
def main():
DATA_START_DATE = "2017-01-01"
DATA_END_DATE = "2024-10-09"
INIT_AMOUNT = 1000000
TRAIN_START_DATE = "2024-03-01"
TRAIN_END_DATE = "2024-10-09"
TRADE_START_DATE = "2024-03-01"
TRADE_END_DATE = "2024-10-09"
TRAINED_MODEL_DIR = (
f"BTCUSD_15_minute_{TRAIN_START_DATE}_{TRAIN_END_DATE}_StockTradingEnv"
)
TRAINED_MODEL_DIR = os.path.join("trained_models", TRAINED_MODEL_DIR)
os.makedirs(TRAINED_MODEL_DIR, exist_ok=True)
df_ohlcv = PolygonIODownloader().fetch_ohlcv(
["X:BTCUSD"], DATA_START_DATE, DATA_END_DATE, "minute", 15
)
df_ohlcv = df_ohlcv.rename(columns={"timestamp": "date", "ticker": "tic"})
df = df_ohlcv.sort_values(["date", "tic"]).reset_index(drop=True)
df = extract_custom_features(df)
train = data_split(df, TRAIN_START_DATE, TRAIN_END_DATE)
trade = data_split(df, TRADE_START_DATE, TRADE_END_DATE)
print(f"Training Data Length: {len(train)}")
print(f"Trading Data Length: {len(trade)}")
# Step 2: Define Model Configurations
models_info = {
A2C: {
"params": {"n_steps": 5, "ent_coef": 0.005, "learning_rate": 0.0002},
"total_timesteps": 50000,
"save_path": os.path.join(TRAINED_MODEL_DIR, "agent_a2c.zip"),
},
}
# Step 3: Train DRL Models
# Initialize StockTradingEnv for training
stock_dimension = len(train.tic.unique())
state_space = 1 + 2 * stock_dimension + len(FEATURE_COLUMNS) * stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")
buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension
env_kwargs = {
"hmax": 100,
"initial_amount": INIT_AMOUNT,
"num_stock_shares": num_stock_shares,
"buy_cost_pct": buy_cost_list,
"sell_cost_pct": sell_cost_list,
"state_space": state_space,
"stock_dim": stock_dimension,
"tech_indicator_list": FEATURE_COLUMNS,
"action_space": stock_dimension,
"reward_scaling": 1e-4,
}
e_train_gym = StockTradingEnv(df=train, **env_kwargs)
# Train models
trained_models = train_drl(e_train_gym, models_info)
# Step 4: Backtest Models
# Initialize trading environment
e_trade_gym = StockTradingEnv(df=trade, **env_kwargs)
# Backtest trained models
backtest_results = backtest_drl(e_trade_gym, trained_models)
trade_dates = trade["date"].drop_duplicates().sort_values()
plot_cumulative_returns(
backtest_results,
trade_dates,
)
# Optional: Save backtest results
print("Backtest Results:")
for model_name, result in backtest_results.items():
print(f"{model_name}:")
print(result["daily_return"].head())
# Get the best performing model based on final cumulative return
best_model_name = max(
backtest_results,
key=lambda x: (backtest_results[x]["daily_return"] + 1).cumprod().iloc[-1],
)
print(f"Best performing model: {best_model_name}")
trade_dates = trade["date"].drop_duplicates().sort_values()
plot_cumulative_returns_withbuysell(
backtest_results, trade_dates, target_model_names=[best_model_name]
)
所以我現在決定先不使用FinRL
,我直接用了gym_anytrading
中已經寫好的現成Env
來使用;先不管效果了,至少得先學得出東西再說:
model.learn(total_timesteps=10000)
: 一開始訓練用的total_timesteps
很短,只想先看看結果有東西再說,之後如果真的訓練必須加上import os
import pandas as pd
from stable_baselines3 import A2C
import gymnasium as gym
import gym_anytrading
from PolygonIO.PolygonIODownloader import PolygonIODownloader
import talib
from tqdm import tqdm # 確保導入的是 tqdm 函數,而不是模組
FEATURE_COLUMNS = [
"macd",
"macd_signal",
"macd_hist",
"boll_ub",
"boll_lb",
"rsi_30",
"dx_30",
"close_30_sma",
"close_60_sma",
"atr_14",
"obv",
"adx_14",
"ema_20",
"ema_50",
"mfi_14",
"willr_14",
"cci_14",
"stoch_k",
"stoch_d",
]
def extract_custom_features(df):
# Add technical indicators
df["macd"], df["macd_signal"], df["macd_hist"] = talib.MACD(
df["close"], fastperiod=12, slowperiod=26, signalperiod=9
)
df["boll_ub"], df["boll_lb"], _ = talib.BBANDS(
df["close"], timeperiod=20, nbdevup=2, nbdevdn=2, matype=0
)
df["rsi_30"] = talib.RSI(df["close"], timeperiod=30)
df["close_30_sma"] = talib.SMA(df["close"], timeperiod=30)
df["close_60_sma"] = talib.SMA(df["close"], timeperiod=60)
df["atr_14"] = talib.ATR(df["high"], df["low"], df["close"], timeperiod=14)
df["obv"] = talib.OBV(df["close"], df["volume"])
df["adx_14"] = talib.ADX(df["high"], df["low"], df["close"], timeperiod=14)
df["ema_20"] = talib.EMA(df["close"], timeperiod=20)
df["ema_50"] = talib.EMA(df["close"], timeperiod=50)
df["mfi_14"] = talib.MFI(
df["high"], df["low"], df["close"], df["volume"], timeperiod=14
)
df["willr_14"] = talib.WILLR(df["high"], df["low"], df["close"], timeperiod=14)
df["cci_14"] = talib.CCI(df["high"], df["low"], df["close"], timeperiod=14)
df["stoch_k"], df["stoch_d"] = talib.STOCH(
df["high"],
df["low"],
df["close"],
fastk_period=14,
slowk_period=3,
slowd_period=3,
)
df = df.fillna(0)
return df
def compute_daily_return(df):
df["daily_return"] = df["Close"].pct_change().fillna(0)
return df
def main():
DATA_START_DATE = "2017-01-01"
DATA_END_DATE = "2024-10-09"
TRAIN_START_DATE = "2024-03-01"
TRAIN_END_DATE = "2024-10-09"
TRAINED_MODEL_DIR = f"BTCUSD_15_minute_{TRAIN_START_DATE}_{TRAIN_END_DATE}_ForexEnv"
TRAINED_MODEL_DIR = os.path.join("trained_models", TRAINED_MODEL_DIR)
os.makedirs(TRAINED_MODEL_DIR, exist_ok=True)
# Fetch data from PolygonIO
df_ohlcv = PolygonIODownloader().fetch_ohlcv(
["X:BTCUSD"], DATA_START_DATE, DATA_END_DATE, "minute", 15
)
df_ohlcv = df_ohlcv.rename(columns={"timestamp": "date", "ticker": "tic"})
df = df_ohlcv.sort_values(["date", "tic"]).reset_index(drop=True)
# Adding custom features to the dataset
df = extract_custom_features(df)
df = df.rename(
columns={
"close": "Close", # 確認 'close' 被正確映射為 'Close'
"high": "High",
"low": "Low",
"volume": "Volume", # 確認 'volume' 被正確映射
}
)
# Define the Forex environment using gym-anytrading's ForexEnv
window_size = 10 # Set appropriate window size
frame_bound = (window_size, len(df)) # Set frame boundaries based on data length
# Initialize Forex environment
env = gym.make("forex-v0", df=df, window_size=window_size, frame_bound=frame_bound)
# Initialize and train A2C model
model = A2C("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=10000)
# 回測模型
obs, info = env.reset()
actions = []
for step in tqdm(range(len(df)), desc="Backtesting Progress"):
action, _states = model.predict(obs)
actions.append(action)
obs, reward, done, truncated, info = env.step(action)
if done or truncated:
break
# 渲染回測結果
env.render_all()
if __name__ == "__main__":
main()