# -*- coding: utf-8 -*-
import pandas as pd
from tkinter import filedialog
import tkinter as tk
from tkinter import StringVar
from tkinter import ttk
from statsmodels.tsa.arima_model import ARIMA
from datetime import datetime,timedelta,date,time
import warnings
warnings.filterwarnings('ignore')
def op():
try:
afew = int(gap01_entry.get())
except:
print('沒輸入天數')
result_label.configure(text = '沒輸入天數')
sfname = filedialog.askopenfilename(title='選擇 yahoo finance excel ', filetypes=[('Excel', '*.xlsx'), ('All Files', '*')])
filename = sfname
df = pd.read_excel(filename,header=0,parse_dates=True,index_col=0,squeeze=True)
d = pd.date_range(df.index.min(),df.index.max())
res = pd.Series(df['Close'],d)
res = res.dropna()
'''
B 是按工作日:星期一至五來排
D 是按每天來排
'''
mode = 'B'
res.index = pd.DatetimeIndex(res.index).to_period(mode)
'''
panda serie to data frame
'''
df1 = pd.DataFrame(res)
ddf1 = df1.diff().dropna()
print(ddf1)
cols = list(df1.columns)
tree = ttk.Treeview(root)
tree.pack()
tree["columns"] = cols
for i in cols:
tree.column(i,anchor="center")
tree.heading(i,text=i,anchor='center')
for index, row in df.iterrows():
tree.insert("",'end',text = index,values=list(row))
tree.place(relx=0,rely=0.1,relheight=0.5,relwidth=1)
r = ARIMA(ddf1,(1,2,0))
model_fit = r.fit(disp=0)
pred = model_fit.predict(start = df.index.max()+timedelta(0), end = df.index.max()+timedelta(afew),typ='levels',dynamic =True)
'''
cumsum 還原
'''
tsr = pd.Series([res[0]], index=[res.index[0]]).append(pred).cumsum()
dfp = pd.DataFrame(tsr)
'''
dfp = dfp.loc[df.index.max()+timedelta(1):df.index.max()+timedelta(afew)]
加這行或不加這行的做用
加: 預測結果可能已算好所有日期的預測值,只是選擇從原始資料的最後斷點日期繼續開始,往後預測值而已。
不加:預測結果可能會重疊到原始資料後期某段時間,二者開始重合,預測結果開始接觸原始資料某段時間,繼續往後產出預測值,兩兩可以比較。
'''
dfp = dfp.loc[df.index.max()+timedelta(1):df.index.max()+timedelta(afew)]
dfp.columns = [str(afew)+' 天左右的預測價格']
print(res)
print(dfp)
c =list(dfp.columns)
treep = ttk.Treeview(root)
treep.pack()
treep['columns'] = c
for i in c:
treep.column(i,anchor="center")
treep.heading(i,text=i,anchor='center')
for index, row in dfp.iterrows():
treep.insert("",'end',text = index,values=list(row))
treep.place(relx=0,rely=0.6,relheight=0.5,relwidth=1)
#print('-----'+ str(afew) +' day forecast predict data-----\n'+str(pred))
def main():
global root
root = tk.Tk()
root.geometry("800x600")
# 只能輸入數字的數字框
def test(content):
if content.isdigit() or (content==""):return True
else:return False
#請輸入想要幾天的資料
v01=StringVar()
testCMD=root.register(test)
gap01_frame = ttk.Frame(root)
gap01_frame.pack(side=tk.TOP)
gap01_label = ttk.Label(gap01_frame, text='請輸入想要幾天的資料(預設六十二天 約略預測二個月)')
gap01_label.pack(side=tk.LEFT)
global gap01_entry
gap01_entry = ttk.Entry(gap01_frame,textvariable=v01,validate='key',validatecommand=(testCMD,'%P'))
gap01_entry.pack(side=tk.LEFT)
gap01_entry.insert(0, "62")
global result_label
result_label = tk.Label(root)
result_label.pack()
B1 = tk.Button(root, text="打開 yahoo finance excel ",command = op).pack()
root.mainloop()
if __name__=='__main__':
main()
有沒有 .cumsum() 還原差分的差別
有:先將原始資料差分,再將差分後的原始資料置入 arima , arima 預測完的資料用 .cumsum()還原回來
預測結果漲跌幅度大
沒有:直接將原始資料置入 arima
預測結果波動不大,趨近於某一值
不好意思打擾各位大大了!請問各位大大,有什麼還原差分的辦法呢?謝謝!