依據文檔建議, 入門先看
Stock_NeurIPS2018
系列教學
參考資料 Stock_NeurIPS2018_1_Data.ipynb
close
的值其實是原始資料中的adjust close
,而原始的close
資料會被拋棄
date open high low close volume tic day
0 2020-01-02 74.059998 75.150002 73.797501 72.876114 135480400 aapl 3
1 2020-01-03 74.287498 75.144997 74.125000 72.167610 146322800 aapl 4
2 2020-01-06 73.447502 74.989998 73.187500 72.742683 118387200 aapl 0
import pandas as pd
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl import config_tickers
from finrl.config import INDICATORS
import itertools
aapl_df_finrl = YahooDownloader(start_date = '2020-01-01',
end_date = '2020-01-31',
ticker_list = ['aapl']).fetch_data()
aapl_df_finrl.head()
TRAIN_START_DATE = '2009-01-01'
TRAIN_END_DATE = '2020-07-01'
TRADE_START_DATE = '2020-07-01'
TRADE_END_DATE = '2021-10-29'
df_raw = YahooDownloader(start_date = TRAIN_START_DATE,
end_date = TRADE_END_DATE,
ticker_list = config_tickers.DOW_30_TICKER).fetch_data()
df_raw.head()
fe = FeatureEngineer(use_technical_indicator=True,
tech_indicator_list = INDICATORS,
use_vix=True,
use_turbulence=True,
user_defined_feature = False)
processed = fe.preprocess_data(df_raw)
list_ticker = processed["tic"].unique().tolist()
list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str))
combination = list(itertools.product(list_date,list_ticker))
processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left")
processed_full = processed_full[processed_full['date'].isin(processed['date'])]
processed_full = processed_full.sort_values(['date','tic'])
processed_full = processed_full.fillna(0)
processed_full.head()
train = data_split(processed_full, TRAIN_START_DATE,TRAIN_END_DATE)
trade = data_split(processed_full, TRADE_START_DATE,TRADE_END_DATE)
print(len(train))
print(len(trade))
train.to_csv('train_data.csv')
trade.to_csv('trade_data.csv')