目前已經寫好對路徑C:/Users/011328/Desktop/python/globle/grid_j2yzvjrl.xlsx"
的運算,目前困難點是 我要對資料目錄(C:/Users/011328/Desktop/python/globle/)
所有檔案進行以下的腳本運算,要如何寫?是要用迴圈嗎,因為每天檔案不固定,pandas的路徑pd.read_excel似乎只能指定檔名?
import pandas as pd
data = pd.read_excel("C:/Users/011328/Desktop/python/globle/grid_j2yzvjrl.xlsx") # 把 excel 格式的檔案讀取成一個 DataFrame
print(data.columns) # 印出欄位名稱
TQ = data["Qty"].sum()
print("股數總和",TQ)
amount = data['Px']*data['Qty']
data['Amount'] = amount
TAmount = amount.sum()
#新增均價欄位 並計算均價
Aver_P = TAmount/TQ
print(Aver_P)
new = pd.DataFrame({'Security':'',
'交易所代碼':'',
'Qty':TQ,
'Px':'',
'Fill Time':'',
'Exec Brkr':'',
'Maker/Taker':'',
'Amount':TAmount,
'N':Aver_P},index=[1])
data1 = data.append(new, ignore_index= True)
print(data1)
之前寫過類似的作法提供樓主參考
(函式 process_xlsx_with_pandas 請據需求自行改寫)
import os
import pandas as pd
# 列出dir_loc目錄所有檔案,可用allow_file_type指定檔案副檔名類型
def dir_list(dir_loc,allow_file_type=[]):
tmp_list=[]
#print('allow_file_type length:'+len(allow_file_type))
for path, subdirs, files in os.walk(dir_loc):
for name in files:
is_match=False
if len(allow_file_type)>0:
if get_file_type(name) in allow_file_type:
is_match=True
else:
is_match=True
if is_match==True:
tmp_list.append(os.path.join(path, name))
return tmp_list
# 取得副檔名格式
def get_file_type(file_path_and_name):
return file_path_and_name.split(".")[-1]
# 這裡請樓主自己改寫
def process_xlsx_with_pandas(xlsx_loc):
print("PROCESSED XLSX NAME: "+xlsx_loc)
data = pd.read_excel(xlsx_loc)
... 略 ...
print(data1)
if __name__ == '__main__':
xlsx_dir_path=r"C:\Users\011328\Desktop\python\globle\"
xlsx_list=dir_list(xlsx_dir_path,["xlsx"])
for i in xlsx_list:
process_xlsx_with_pandas(i)
尋找多層目錄會用
import os
os.walk(path)
只有一層目錄會用
import glob
list_array = glob.glob(path+"*.xlsx")
哈 .... 我用 linux ...
windows 理論上也可以