每個禮拜公佈一次,可以知道千張大戶的變化,通常跌的時候大戶持股減少散戶增多,不過也只有周末才能知道這個禮拜的變化,如果大戶大量買進或賣出,股價還沒反應是個不錯用來判斷要買或賣的依據。我常常看這個來決定是否買賣,之前是去神秘金字塔這個網站看。
import requests
import pandas as pd
url='https://smart.tdcc.com.tw/opendata/getOD.ashx?id=1-5'
data=pd.read_csv(url)
data[:17]
有關本公司集保戶股權分散表持股分級的定義,說明如下:
pd.date_range('2017/10/06', '2018/10/05', freq='W-FRI')
DatetimeIndex(['2017-10-06', '2017-10-13', '2017-10-20', '2017-10-27',
'2017-11-03', '2017-11-10', '2017-11-17', '2017-11-24',
'2017-12-01', '2017-12-08', '2017-12-15', '2017-12-22',
'2017-12-29', '2018-01-05', '2018-01-12', '2018-01-19',
'2018-01-26', '2018-02-02', '2018-02-09', '2018-02-16',
'2018-02-23', '2018-03-02', '2018-03-09', '2018-03-16',
'2018-03-23', '2018-03-30', '2018-04-06', '2018-04-13',
'2018-04-20', '2018-04-27', '2018-05-04', '2018-05-11',
'2018-05-18', '2018-05-25', '2018-06-01', '2018-06-08',
'2018-06-15', '2018-06-22', '2018-06-29', '2018-07-06',
'2018-07-13', '2018-07-20', '2018-07-27', '2018-08-03',
'2018-08-10', '2018-08-17', '2018-08-24', '2018-08-31',
'2018-09-07', '2018-09-14', '2018-09-21', '2018-09-28',
'2018-10-05'],
dtype='datetime64[ns]', freq='W-FRI')
from htmltable_df.extractor import Extractor
url='https://www.tdcc.com.tw/smWeb/QryStockAjax.do'
payload={
'scaDates':'20181005',
'scaDate':'20181005',
'SqlMethod':'StockNo',
'StockNo':'2330',
'radioStockNo':'2330',
'StockName':'',
'REQ_OPR':'SELECT',
'clkStockNo':'2330',
'clkStockName':''
}
html=requests.post(url,data=payload).text
data=Extractor(html,'table.mt:eq(1)').df(1)
data
del data['持股/單位數分級']
data.loc[15,'序']=17
用scrapy重寫加入排程
# -*- coding: utf-8 -*-
from datetime import datetime, timedelta
import pandas as pd
import scrapy
from htmltable_df.extractor import Extractor
from skhome.extensions import MongoDatabase
class StockDaySpider(scrapy.Spider):
name = 'stock_hold'
custom_settings = {
'DOWNLOAD_DELAY': 2,
'CONCURRENT_REQUESTS': 1,
'MONGODB_COLLECTION': name,
'MONGODB_ITEM_CACHE': 1,
'MONGODB_UNIQ_KEY': [("date", -1), ("code", 1), ("持股分級", -1)],
'COOKIES_ENABLED': False
}
def __init__(self, beginDate=None, endDate=None, *args, **kwargs):
super(StockDaySpider, self).__init__(beginDate=beginDate, endDate=endDate, *args, **kwargs)
def start_requests(self):
if not self.beginDate and not self.endDate:
date = (datetime.today() - timedelta(days=2)).strftime("%Y/%m/%d")
self.beginDate = date
self.endDate = date
with MongoDatabase('stock_code') as collection:
stock_code = collection.export_df({'dtype': {'$in': ['股票', 'ETF']}})
url = 'https://www.tdcc.com.tw/smWeb/QryStockAjax.do'
for date in pd.date_range(self.beginDate, self.endDate, freq='W-FRI')[::-1]:
scaDate = '{}{:02d}{:02d}'.format(date.year, date.month, date.day)
date = '{}/{:02d}/{:02d}'.format(date.year, date.month, date.day)
for s in stock_code.itertuples():
payload = {
'scaDates': scaDate,
'scaDate': scaDate,
'SqlMethod': 'StockNo',
'StockNo': s.code,
'radioStockNo': s.code,
'StockName': '',
'REQ_OPR': 'SELECT',
'clkStockNo': s.code,
'clkStockName': ''
}
yield scrapy.FormRequest(url, formdata=payload, meta={'code': s.code, 'date': date},
dont_filter=True)
def parse(self, response):
m = response.meta
data = Extractor(response.dom, 'table.mt:eq(1)').df(1)
del data['持股/單位數分級']
data.loc[15, '序'] = 17
data.columns = ['持股分級', '人數', '股數', '佔集保庫存數比例%']
data.insert(0, 'code', m['code'])
data.insert(0, 'date', m['date'])
for item in data.to_dict('row'):
yield item