在上次的教學中我們使用了 Stable baseline3
來搭建我們的 RL agent 並將買賣過程放回 backtrader 上進行視覺化。在本教學中,我們將進一步深入強化學習在金融交易中的應用。我們將使用 Stable Baselines 3 實現一個基於 LSTM(長短期記憶網絡)的強化學習代理,並使用 Backtrader 進行回測。我們的代理將在每個時間步觀察過去 15 天的市場數據,以做出更明智的交易決策。今日 Colab
在 Google Colab 中,我們需要安裝以下庫:
!pip install stable-baselines3
!pip install gymnasium
!pip install gymnasium[classic_control]
!pip install backtrader
!pip install yfinance
!pip install matplotlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
import backtrader as bt
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common import env_checker
from stable_baselines3.common.policies import ActorCriticCnnPolicy, ActorCriticPolicy
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
import gymnasium as gym
from gymnasium import spaces
import torch
import torch.nn as nn
我們將使用 yfinance
下載蘋果公司(AAPL)的歷史數據。
data = yf.download('AAPL', start='2015-01-01', end='2021-01-01')
data.reset_index(inplace=True)
data['Date'] = pd.to_datetime(data['Date']) # 確保 Date 列為 datetime 類型
data = data[['Date', 'Open', 'High', 'Low', 'Close', 'Volume']]
data.set_index('Date', inplace=True)
data['Open'] = data['Open'].astype('float32')
data['High'] = data['High'].astype('float32')
data['Low'] = data['Low'].astype('float32')
data['Close'] = data['Close'].astype('float32')
data['Volume'] = data['Volume'].astype('float32')
在這一部分,我們將修改交易環境,使代理在每個時間步能夠觀察過去 15 天的數據。
(15, 5)
。class TradingEnv(gym.Env):
"""自訂的交易環境,用於強化學習模型訓練,代理觀察過去 15 天的數據"""
def __init__(self, data, window_size=15, cash=10000, commission=0.001):
super(TradingEnv, self).__init__()
self.data = data.reset_index()
self.cash = cash # 初始現金
self.initial_cash = cash # 紀錄初始現金
self.commission = commission # 交易手續費
self.window_size = window_size # 觀察窗口大小
self.current_step = self.window_size # 開始位置
# 定義觀測空間和行動空間
self.observation_space = spaces.Box(
low=-np.inf, high=np.inf, shape=(self.window_size, 5), dtype=np.float32
)
self.action_space = spaces.Discrete(3) # 0: 持有, 1: 買入, 2: 賣出
# 初始化帳戶資訊
self.position = 0 # 持有的股票數量
self.net_worth = self.cash # 資產淨值
self.prev_net_worth = self.cash # 前一步的資產淨值
# 紀錄交易訊號
self.trades = []
def _get_obs(self):
"""獲取當前的觀測值"""
obs = self.data.loc[self.current_step - self.window_size:self.current_step - 1, ['Open', 'High', 'Low', 'Close', 'Volume']].values
return obs
def reset(self, *, seed=None, options=None):
"""重置環境到初始狀態"""
super().reset(seed=seed)
self.current_step = self.window_size
self.position = 0
self.cash = self.initial_cash
self.net_worth = self.cash
self.prev_net_worth = self.cash
self.trades = [] # 重置交易紀錄
obs = self._get_obs()
info = {}
return obs, info
def step(self, action):
"""執行一個行動,並返回新的狀態和獎勵"""
current_price = self.data.loc[self.current_step, 'Close']
# 記錄交易訊號
date = self.data.loc[self.current_step, 'Date']
self.trades.append({'Date': date, 'Action': action})
# 計算交易手續費
commission = 0
if action == 1: # 買入
# 計算可買入的最大股數
max_shares = int(self.cash / (current_price * (1 + self.commission)))
if max_shares > 0:
# 更新帳戶餘額和持倉
cost = max_shares * current_price * (1 + self.commission)
self.cash -= cost
self.position += max_shares
commission = cost * self.commission
elif action == 2: # 賣出
if self.position > 0:
# 更新帳戶餘額和持倉
revenue = self.position * current_price * (1 - self.commission)
self.cash += revenue
commission = self.position * current_price * self.commission
self.position = 0
# action == 0 表示持有,不執行任何操作
self.current_step += 1
# 更新資產淨值
self.net_worth = self.cash + self.position * current_price
# 計算獎勵
reward = self.net_worth - self.prev_net_worth - commission
self.prev_net_worth = self.net_worth
# 判斷是否終止
if self.current_step >= len(self.data):
terminated = True
else:
terminated = False
truncated = False
obs = self._get_obs()
info = {}
return obs, reward, terminated, truncated, info
def render(self):
"""渲染環境(此處未實作)"""
pass
window_size
:新增一個參數,指定觀察窗口的大小,這裡設定為 15。observation_space
:觀測空間的形狀變為 (window_size, 5)
。_get_obs
方法:
current_step
:
window_size
,以確保有足夠的歷史數據。reset
和 step
方法:
current_step
,確保索引不超出數據範圍。為了使模型能夠處理我們的高維度觀測值((15, 5)
),我們需要自訂一個特徵提取器,使用 LSTM 處理時間序列數據。
class CustomLSTMFeatureExtractor(BaseFeaturesExtractor):
"""
自訂的特徵提取器,使用 LSTM 處理時間序列數據
"""
def __init__(self, observation_space: spaces.Box, features_dim: int = 128):
super(CustomLSTMFeatureExtractor, self).__init__(observation_space, features_dim)
self.lstm = nn.LSTM(input_size=5, hidden_size=features_dim, batch_first=True)
def forward(self, observations: torch.Tensor) -> torch.Tensor:
# observations shape: (batch_size, window_size, 5)
lstm_output, (h_n, c_n) = self.lstm(observations)
# 我們取最後一個時間步的輸出作為特徵
return h_n[-1]
nn.LSTM
處理觀測值。env = TradingEnv(data)
env_checker.check_env(env)
env = DummyVecEnv([lambda: env])
我們需要定義一個自訂的政策網絡,使用我們的特徵提取器。
policy_kwargs = dict(
features_extractor_class=CustomLSTMFeatureExtractor,
features_extractor_kwargs=dict(features_dim=128),
)
model = PPO('MlpPolicy', env, policy_kwargs=policy_kwargs, verbose=1)
model.learn(total_timesteps=10000)
env.envs[0].reset()
for i in range(env.envs[0].window_size, len(data)):
obs = env.envs[0]._get_obs()
# 需要將觀測值轉換為 tensor 並添加 batch 維度
action, _states = model.predict(obs, deterministic=True)
obs, rewards, dones, truncated, info = env.envs[0].step(action)
if dones:
break
deterministic=True
,以獲取策略的確定性行動。# 從環境中獲取交易紀錄
trades = pd.DataFrame(env.envs[0].trades)
# 將交易紀錄與原始數據合併
data.reset_index(inplace=True)
merged_data = pd.merge(data, trades, on='Date', how='left')
merged_data['Action'].fillna(0, inplace=True)
# 確保 Date 列為 datetime 類型
merged_data['Date'] = pd.to_datetime(merged_data['Date'])
class RLStrategy(bt.Strategy):
"""自訂的策略,用於在圖表上顯示買賣點"""
def __init__(self):
self.dataclose = self.datas[0].close
def next(self):
# 根據交易紀錄執行買賣
idx = len(self) - 1 # 當前索引
action = merged_data.loc[idx, 'Action']
if action == 1 and self.position.size == 0:
# 買入
self.buy(size=100)
elif action == 2 and self.position.size > 0:
# 賣出
self.sell(size=self.position.size)
cerebro = bt.Cerebro()
# 在添加數據時,指定 datetime 列
data_bt = bt.feeds.PandasData(
dataname=merged_data,
datetime='Date',
open='Open',
high='High',
low='Low',
close='Close',
volume='Volume',
openinterest=-1,
timeframe=bt.TimeFrame.Days
)
cerebro.adddata(data_bt)
cerebro.addstrategy(RLStrategy)
cerebro.broker.setcash(10000)
cerebro.broker.setcommission(commission=0.001)
print('初始資金: %.2f' % cerebro.broker.getvalue())
cerebro.run()
print('最終資金: %.2f' % cerebro.broker.getvalue())
成長不少:
# 繪製圖表
%matplotlib inline
plt.rcParams['figure.figsize'] = [20, 16]
plt.rcParams.update({'font.size': 12})
img = cerebro.plot(iplot = False)
img[0][0].savefig('backtrader_ppo_lstm.png')
通過 Backtrader 的圖表,我們可以看到模型在測試期間的買賣點,以及資產淨值的變化。這有助於我們直觀地了解模型的交易決策和績效。
以下是完整的程式碼,您可以直接在 Google Colab 上執行。
# 安裝必要的庫
!pip install stable-baselines3
!pip install gymnasium
!pip install gymnasium[classic_control]
!pip install backtrader
!pip install yfinance
!pip install matplotlib
# 導入庫
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
import backtrader as bt
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common import env_checker
from stable_baselines3.common.policies import ActorCriticCnnPolicy, ActorCriticPolicy
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
import gymnasium as gym
from gymnasium import spaces
import torch
import torch.nn as nn
# 獲取數據
data = yf.download('AAPL', start='2015-01-01', end='2021-01-01')
data.reset_index(inplace=True)
data['Date'] = pd.to_datetime(data['Date']) # 確保 Date 列為 datetime 類型
data = data[['Date', 'Open', 'High', 'Low', 'Close', 'Volume']]
data.set_index('Date', inplace=True)
# 數據預處理
data['Open'] = data['Open'].astype('float32')
data['High'] = data['High'].astype('float32')
data['Low'] = data['Low'].astype('float32')
data['Close'] = data['Close'].astype('float32')
data['Volume'] = data['Volume'].astype('float32')
# 定義交易環境
class TradingEnv(gym.Env):
"""自訂的交易環境,用於強化學習模型訓練,代理觀察過去 15 天的數據"""
def __init__(self, data, window_size=15, cash=10000, commission=0.001):
super(TradingEnv, self).__init__()
self.data = data.reset_index()
self.cash = cash # 初始現金
self.initial_cash = cash # 紀錄初始現金
self.commission = commission # 交易手續費
self.window_size = window_size # 觀察窗口大小
self.current_step = self.window_size # 開始位置
# 定義觀測空間和行動空間
self.observation_space = spaces.Box(
low=-np.inf, high=np.inf, shape=(self.window_size, 5), dtype=np.float32
)
self.action_space = spaces.Discrete(3) # 0: 持有, 1: 買入, 2: 賣出
# 初始化帳戶資訊
self.position = 0 # 持有的股票數量
self.net_worth = self.cash # 資產淨值
self.prev_net_worth = self.cash # 前一步的資產淨值
# 紀錄交易訊號
self.trades = []
def _get_obs(self):
"""獲取當前的觀測值"""
obs = self.data.loc[self.current_step - self.window_size:self.current_step - 1, ['Open', 'High', 'Low', 'Close', 'Volume']].values
return obs
def reset(self, *, seed=None, options=None):
"""重置環境到初始狀態"""
super().reset(seed=seed)
self.current_step = self.window_size
self.position = 0
self.cash = self.initial_cash
self.net_worth = self.cash
self.prev_net_worth = self.cash
self.trades = [] # 重置交易紀錄
obs = self._get_obs()
info = {}
return obs, info
def step(self, action):
"""執行一個行動,並返回新的狀態和獎勵"""
current_price = self.data.loc[self.current_step, 'Close']
# 記錄交易訊號
date = self.data.loc[self.current_step, 'Date']
self.trades.append({'Date': date, 'Action': action})
# 計算交易手續費
commission = 0
if action == 1: # 買入
# 計算可買入的最大股數
max_shares = int(self.cash / (current_price * (1 + self.commission)))
if max_shares > 0:
# 更新帳戶餘額和持倉
cost = max_shares * current_price * (1 + self.commission)
self.cash -= cost
self.position += max_shares
commission = cost * self.commission
elif action == 2: # 賣出
if self.position > 0:
# 更新帳戶餘額和持倉
revenue = self.position * current_price * (1 - self.commission)
self.cash += revenue
commission = self.position * current_price * self.commission
self.position = 0
# action == 0 表示持有,不執行任何操作
self.current_step += 1
# 更新資產淨值
self.net_worth = self.cash + self.position * current_price
# 計算獎勵
reward = self.net_worth - self.prev_net_worth - commission
self.prev_net_worth = self.net_worth
# 判斷是否終止
if self.current_step >= len(self.data):
terminated = True
else:
terminated = False
truncated = False
obs = self._get_obs()
info = {}
return obs, reward, terminated, truncated, info
def render(self):
"""渲染環境(此處未實作)"""
pass
# 定義自訂的特徵提取器
class CustomLSTMFeatureExtractor(BaseFeaturesExtractor):
"""
自訂的特徵提取器,使用 LSTM 處理時間序列數據
"""
def __init__(self, observation_space: spaces.Box, features_dim: int = 128):
super(CustomLSTMFeatureExtractor, self).__init__(observation_space, features_dim)
self.lstm = nn.LSTM(input_size=5, hidden_size=features_dim, batch_first=True)
def forward(self, observations: torch.Tensor) -> torch.Tensor:
# observations shape: (batch_size, window_size, 5)
lstm_output, (h_n, c_n) = self.lstm(observations)
# 我們取最後一個時間步的輸出作為特徵
return h_n[-1]
# 創建環境實例
env = TradingEnv(data)
# 檢查環境
env_checker.check_env(env)
# 包裝環境
env = DummyVecEnv([lambda: env])
# 定義政策網絡
policy_kwargs = dict(
features_extractor_class=CustomLSTMFeatureExtractor,
features_extractor_kwargs=dict(features_dim=128),
)
# 訓練模型
model = PPO('MlpPolicy', env, policy_kwargs=policy_kwargs, verbose=1)
model.learn(total_timesteps=10000)
# 測試模型並收集交易訊號
env.envs[0].reset()
for i in range(env.envs[0].window_size, len(data)):
obs = env.envs[0]._get_obs()
obs = obs[np.newaxis, :] # 添加 batch 維度
action, _states = model.predict(obs, deterministic=True)
obs, rewards, dones, truncated, info = env.envs[0].step(action[0])
if dones:
break
# 從環境中獲取交易紀錄
trades = pd.DataFrame(env.envs[0].trades)
# 將交易紀錄與原始數據合併
data.reset_index(inplace=True)
merged_data = pd.merge(data, trades, on='Date', how='left')
merged_data['Action'].fillna(0, inplace=True)
# 確保 Date 列為 datetime 類型
merged_data['Date'] = pd.to_datetime(merged_data['Date'])
# 在 Backtrader 中顯示買賣點
class RLStrategy(bt.Strategy):
"""自訂的策略,用於在圖表上顯示買賣點"""
def __init__(self):
self.dataclose = self.datas[0].close
def next(self):
# 根據交易紀錄執行買賣
idx = len(self) - 1 # 當前索引
action = merged_data.loc[idx, 'Action']
if action == 1 and self.position.size == 0:
# 買入
self.buy(size=100)
elif action == 2 and self.position.size > 0:
# 賣出
self.sell(size=self.position.size)
# 設置 Backtrader
cerebro = bt.Cerebro()
# 在添加數據時,指定 datetime 列
data_bt = bt.feeds.PandasData(
dataname=merged_data,
datetime='Date',
open='Open',
high='High',
low='Low',
close='Close',
volume='Volume',
openinterest=-1,
timeframe=bt.TimeFrame.Days
)
cerebro.adddata(data_bt)
cerebro.addstrategy(RLStrategy)
cerebro.broker.setcash(10000)
cerebro.broker.setcommission(commission=0.001)
print('初始資金: %.2f' % cerebro.broker.getvalue())
cerebro.run()
print('最終資金: %.2f' % cerebro.broker.getvalue())
# 繪製圖表
%matplotlib inline
cerebro.plot(iplot=True, volume=False)
在本教學中,我們:
window_size
,例如 5 天、30 天,觀察對模型性能的影響。希望通過本教學,能夠深入理解如何使用 LSTM 強化學習方法進行量化交易策略的開發,並掌握 Stable Baselines 3 和 Backtrader 的實際應用技巧。