Day 28 - 加密貨幣自動交易 (3/5) - iT 邦幫忙::一起幫忙解決難題，拯救 IT 人的一天

2024 iThome 鐵人賽

DAY 28

AI/ ML & Data

自動交易程式探索系列第 28 篇

Day 28 - 加密貨幣自動交易 (3/5)

16th鐵人賽

jjchen1

團隊北投溫泉公園的蛞蝓觀察小隊

2024-10-12 22:25:05

254 瀏覽

分享至

目前徹底卡關，怎麼訓練都訓練不起來，試了很多種其他的方法，我看別人的教學都訓練得起來，但我訓練的結果都是啥都不幹，目前找不出原因，吃吐的感覺。

目前的程式碼，還在debug

import os
import pandas as pd
from stable_baselines3 import A2C
import gymnasium as gym
import gym_anytrading
from gym_anytrading.envs.trading_env import Positions
from stable_baselines3.common.monitor import Monitor
from PolygonIO.PolygonIODownloader import PolygonIODownloader
import talib
from tqdm import tqdm  # 確保導入的是 tqdm 函數，而不是模組
import matplotlib.pyplot as plt
import plotly.graph_objs as go

FEATURE_COLUMNS = [
    "macd",
    "macd_signal",
    "macd_hist",
    "boll_ub",
    "boll_lb",
    "rsi_30",
    "dx_30",
    "close_30_sma",
    "close_60_sma",
    "atr_14",
    "obv",
    "adx_14",
    "ema_20",
    "ema_50",
    "mfi_14",
    "willr_14",
    "cci_14",
    "stoch_k",
    "stoch_d",
]


def extract_custom_features(df):
    # Add technical indicators
    df["macd"], df["macd_signal"], df["macd_hist"] = talib.MACD(
        df["close"], fastperiod=12, slowperiod=26, signalperiod=9
    )
    df["boll_ub"], df["boll_lb"], _ = talib.BBANDS(
        df["close"], timeperiod=20, nbdevup=2, nbdevdn=2, matype=0
    )
    df["rsi_30"] = talib.RSI(df["close"], timeperiod=30)
    df["close_30_sma"] = talib.SMA(df["close"], timeperiod=30)
    df["close_60_sma"] = talib.SMA(df["close"], timeperiod=60)
    df["atr_14"] = talib.ATR(df["high"], df["low"], df["close"], timeperiod=14)
    df["obv"] = talib.OBV(df["close"], df["volume"])
    df["adx_14"] = talib.ADX(df["high"], df["low"], df["close"], timeperiod=14)
    df["ema_20"] = talib.EMA(df["close"], timeperiod=20)
    df["ema_50"] = talib.EMA(df["close"], timeperiod=50)
    df["mfi_14"] = talib.MFI(
        df["high"], df["low"], df["close"], df["volume"], timeperiod=14
    )
    df["willr_14"] = talib.WILLR(df["high"], df["low"], df["close"], timeperiod=14)
    df["cci_14"] = talib.CCI(df["high"], df["low"], df["close"], timeperiod=14)
    df["stoch_k"], df["stoch_d"] = talib.STOCH(
        df["high"],
        df["low"],
        df["close"],
        fastk_period=14,
        slowk_period=3,
        slowd_period=3,
    )
    df = df.fillna(0)
    return df


def plot_html_with_annotations(trace_list, time_ind, annotations):
    fig = go.Figure()

    # Add all traces to the figure
    for trace in trace_list:
        fig.add_trace(trace)

    # Add annotations for profit/loss at sell points
    fig.update_layout(annotations=annotations)

    # Customize layout
    fig.update_layout(
        legend=dict(
            x=0,
            y=1,
            traceorder="normal",
            font=dict(family="sans-serif", size=15, color="black"),
            bgcolor="White",
            bordercolor="white",
            borderwidth=2,
        ),
        title={
            "text": "Price with Buy/Sell Signals and Profit/Loss",
            "y": 0.85,
            "x": 0.5,
            "xanchor": "center",
            "yanchor": "top",
        },
        paper_bgcolor="rgba(1,1,0,0)",
        plot_bgcolor="rgba(1, 1, 0, 0)",
        yaxis_title="Price",
        xaxis={
            "type": "date",
            "tick0": time_ind[0],
            "tickmode": "linear",
            "dtick": 86400000.0 * 80,  # 80 days interval
        },
    )

    fig.show()


# Assuming the data `df`, `long_ticks`, `short_ticks`, and `profit_diffs` are available
def plot_price_with_actions_and_profits(df, long_ticks, short_ticks, profit_diffs):
    """
    使用 Plotly 繪製價格與每筆交易的利潤圖形，並輸出為 HTML 格式。

    Parameters:
    - df: 包含價格數據的 DataFrame，必須包含 'Close' 和 'date' 列。
    - long_ticks: 買入信號（Long）的位置。
    - short_ticks: 賣出信號（Short）的位置。
    - profit_diffs: 每筆交易的利潤差異。
    """
    # Initialize the trace list
    trace_list = []

    # Plot price line
    price_trace = go.Scatter(
        x=df["date"], y=df["Close"], mode="lines", name="Price", line=dict(color="blue")
    )
    trace_list.append(price_trace)

    # Add buy signals (green triangle-up)
    buy_trace = go.Scatter(
        x=df["date"].iloc[long_ticks],
        y=df["Close"].iloc[long_ticks],
        mode="markers",
        name="Buy Signal",
        marker=dict(symbol="triangle-up", size=10, color="green"),
    )
    trace_list.append(buy_trace)

    # Add sell signals (red triangle-down)
    sell_trace = go.Scatter(
        x=df["date"].iloc[short_ticks],
        y=df["Close"].iloc[short_ticks],
        mode="markers",
        name="Sell Signal",
        marker=dict(symbol="triangle-down", size=10, color="red"),
    )
    trace_list.append(sell_trace)

    # Add annotations for profits/losses
    annotations = []
    for idx in long_ticks:
        profit = 0
        if idx < len(profit_diffs):
            profit = profit_diffs[idx]
        if profit == 0:
            continue
        color = "green" if profit > 0 else "red"
        annotations.append(
            dict(
                x=df["date"].iloc[idx],
                y=df["Close"].iloc[idx],
                xref="x",
                yref="y",
                text=f"{profit*1000000:.2f}",
                showarrow=True,
                arrowhead=2,
                ax=0,
                ay=-30,
                font=dict(color=color),
            )
        )

    for idx in short_ticks:
        profit = 0
        if idx < len(profit_diffs):
            profit = profit_diffs[idx]
        if profit == 0:
            continue
        color = "green" if profit > 0 else "red"
        annotations.append(
            dict(
                x=df["date"].iloc[idx],
                y=df["Close"].iloc[idx],
                xref="x",
                yref="y",
                text=f"{profit:.2f}",
                showarrow=True,
                arrowhead=2,
                ax=0,
                ay=-30,
                font=dict(color=color),
            )
        )

    # Generate the plot in HTML format with annotations
    plot_html_with_annotations(trace_list, df["date"], annotations)


def main():
    from finrl.meta.preprocessor.preprocessors import data_split

    DATA_START_DATE = "2017-01-01"
    DATA_END_DATE = "2024-10-09"
    TRAIN_START_DATE = "2024-03-01"
    TRAIN_END_DATE = "2024-10-09"
    TRADE_START_DATE = "2024-03-01"
    TRADE_END_DATE = "2024-10-09"

    TRAINED_MODEL_DIR = f"BTCUSD_15_minute_{TRAIN_START_DATE}_{TRAIN_END_DATE}_ForexEnv"
    TRAINED_MODEL_DIR = os.path.join("trained_models", TRAINED_MODEL_DIR)
    os.makedirs(TRAINED_MODEL_DIR, exist_ok=True)

    # Fetch data from PolygonIO
    df_ohlcv = PolygonIODownloader().fetch_ohlcv(
        ["X:BTCUSD"], DATA_START_DATE, DATA_END_DATE, "minute", 15
    )
    df_ohlcv = df_ohlcv.rename(columns={"timestamp": "date", "ticker": "tic"})

    df = df_ohlcv.sort_values(["date", "tic"]).reset_index(drop=True)

    # Adding custom features to the dataset
    # df = extract_custom_features(df)

    df = df.rename(
        columns={
            "close": "Close",  # 確認 'close' 被正確映射為 'Close'
            "high": "High",
            "low": "Low",
            "open": "Open",
            "volume": "Volume",  # 確認 'volume' 被正確映射
        }
    )

    train = data_split(df, TRAIN_START_DATE, TRAIN_END_DATE)
    trade = data_split(df, TRADE_START_DATE, TRADE_END_DATE)

    df = trade

    print(f"Training Data Length: {len(train)}")
    print(f"Trading Data Length: {len(trade)}")

    # Define the Forex environment using gym-anytrading's ForexEnv
    window_size = 256  # Set appropriate window size
    frame_bound = (window_size, len(df))  # Set frame boundaries based on data length

    # Initialize Forex environment
    env = Monitor(
        gym.make("forex-v0", df=df, window_size=window_size, frame_bound=frame_bound),
        filename="monitor_log",
    )

    # Initialize and train A2C model
    model = A2C("MlpPolicy", env, verbose=1)
    model.learn(total_timesteps=1000000)

    # 開始回測
    obs, info = env.reset()
    actions = []
    total_profits = []  # 儲存每一步的總利潤
    profit_diffs = []
    total_reward = 0
    for step in tqdm(range(len(df)), desc="Backtesting Progress"):
        action, _states = model.predict(obs)
        actions.append(action)
        obs, reward, done, truncated, info = env.step(action)

        # 保存每一步的 total_profit
        total_reward += reward  # 累積每一步的 reward
        total_profits.append(info["total_profit"])

        if step > 0 and actions[step] != actions[step - 1]:
            # print(f"Step: {step}, Action: {action}, Reward: {reward}, Info: {info}")
            profit_diffs.append(total_profits[step] - total_profits[step - 1])
            # print(f"Profit Diff[{step}]: {profit_diffs[-1]}")
        else:
            profit_diffs.append(0)

        if done or truncated:
            break

    # 回測結束後印出總獎勵和總利潤
    print(f"Total Reward: {total_reward}")
    print(f"Final Total Profit: {total_profits[-1]}")

    import numpy as np

    position_history = env.get_wrapper_attr("get_position_history")()
    window_ticks = np.arange(len(position_history))
    prices = env.prices

    short_ticks = []
    long_ticks = []
    for i, tick in enumerate(window_ticks):
        if i == 0:
            continue
        if position_history[i] == position_history[i - 1]:
            continue
        if position_history[i] == Positions.Short:
            short_ticks.append(tick)
        elif position_history[i] == Positions.Long:
            long_ticks.append(tick)

    ### cut off the last few steps to match the length of the total_profits
    # backtestdf = df.iloc[: len(total_profits)].copy()
    # position_history = position_history[: len(total_profits)].copy()
    # long_ticks = long_ticks[: len(total_profits)].copy()
    # short_ticks = short_ticks[: len(total_profits)].copy()
    # profit_diffs = profit_diffs[: len(total_profits)].copy()

    plot_price_with_actions_and_profits(df, long_ticks, short_ticks, profit_diffs)

    # # 繪製結果
    # plot_price_with_actions_and_profits(
    #     backtestdf, long_ticks, short_ticks, profit_diffs
    # )
    input("Press Enter to exit...")


if __name__ == "__main__":
    main()