上市欄位比較少,所以上櫃對齊上市的欄位
其中有個註記欄位
上市
備註欄符號依序說明
1. 數字(1、2、3…):合計降低融資比率、提高融券保證金成數
2. O:停止融資
3. X:停止融券
4. @:融資分配
5. %:融券分配
6. *:融券餘額占融資餘額百分之六十以上者
7. A:股價波動過度劇烈
8. B:股權過度集中
9. C:成交量過度異常
10. D:監視第二次處置
11. 數字(1、2、3…):監視業務督導會報決議降低融資比率、提高融券保證金成數
上櫃
說明:
寫入兩張表,每日個股融資券添加到stock_day
{
"_id" : "2018/10/30_0050",
"市場別" : "上市",
"產業別" : "",
"name" : "元大台灣50",
"code" : "0050",
"date" : "2018/10/30",
"成交股數" : 6119,
"成交金額" : 464489,
"開盤價" : 75.8,
"最高價" : 76.25,
"最低價" : 75.5,
"收盤價" : 75.95,
"漲跌價差" : 0.2,
"成交筆數" : 2313,
"三大法人買賣超" : -1777.0,
"外資自營商買賣超" : 0.0,
"外資自營商買進" : 0.0,
"外資自營商賣出" : 0.0,
"外陸資買賣超" : -1749.0,
"外陸資買進" : 33.0,
"外陸資賣出" : 1782.0,
"投信買賣超" : 100.0,
"投信買進" : 100.0,
"投信賣出" : 0.0,
"自營商買賣超" : 186.0,
"自營商買賣超避險" : -314.0,
"自營商買進" : 292.0,
"自營商買進避險" : 1747.0,
"自營商賣出" : 106.0,
"自營商賣出避險" : 2061.0,
"融券今日餘額" : 1056,
"融券前日餘額" : 962,
"融券現金償還" : 0,
"融券買進" : 50,
"融券賣出" : 144,
"融券限額" : 201500,
"融資今日餘額" : 4386,
"融資前日餘額" : 4375,
"融資現金償還" : 2,
"融資買進" : 96,
"融資賣出" : 83,
"融資限額" : 201500,
"資券互抵" : 3,
"資券註記" : ""
}
當天的統計總額寫入到stock_margin_trading,其中金額單位是(仟元)
{
"_id" : ObjectId("5bd9a7ae8998344b6ec63912"),
"date" : "2018/10/30",
"市場別" : "上市",
"融資買進" : "149,018",
"融資賣出" : "169,272",
"融資現金償還" : "5,538",
"融資前日餘額" : "7,257,679",
"融資今日餘額" : "7,231,887",
"融券買進" : "57,292",
"融券賣出" : "79,670",
"融券現金償還" : "635",
"融券前日餘額" : "579,350",
"融券今日餘額" : "601,093",
"融資買進金額" : "2,992,718",
"融資賣出金額" : "3,697,797",
"融資現金償還金額" : "356,826",
"融資前日餘額金額" : "128,263,246",
"融資今日餘額金額" : "127,201,340"
}
"_id" : ObjectId("5bd9a7948998344b6ec63908"),
"date" : "2018/10/30",
"市場別" : "上櫃",
"融資買進" : "26,163",
"融資賣出" : "31,834",
"融資現金償還" : "1,771",
"融資前日餘額" : "1,809,526",
"融資今日餘額" : "1,802,084",
"融券買進" : "12,880",
"融券賣出" : "14,698",
"融券現金償還" : "165",
"融券前日餘額" : "142,193",
"融券今日餘額" : "143,846",
"融資買進金額" : "768,781",
"融資賣出金額" : "991,894",
"融資現金償還金額" : "122,714",
"融資前日餘額金額" : "43,860,143",
"融資今日餘額金額" : "43,514,316"
}
# -*- coding: utf-8 -*-
import json
import time
from datetime import datetime
import pandas as pd
import scrapy
from skhome.extensions import MongoDatabase
TWSE_URL = 'http://www.tse.com.tw/exchangeReport/MI_MARGN?response=json&date={y}{m:02d}{d:02d}&selectType=ALL'
TPEX_URL = 'http://www.tpex.org.tw/web/stock/margin_trading/margin_balance/margin_bal_result.php?l=zh-tw&o=json&d={y}/{m:02d}/{d:02d}'
columns = [
"_id",
"融資買進",
"融資賣出",
"融資現金償還",
"融資前日餘額",
"融資今日餘額",
"融資限額",
"融券買進",
"融券賣出",
"融券現金償還",
"融券前日餘額",
"融券今日餘額",
"融券限額",
"資券互抵",
"資券註記",
]
creditFields = [
"date",
"市場別",
"融資買進",
"融資賣出",
"融資現金償還",
"融資前日餘額",
"融資今日餘額",
"融券買進",
"融券賣出",
"融券現金償還",
"融券前日餘額",
"融券今日餘額",
"融資買進金額",
"融資賣出金額",
"融資現金償還金額",
"融資前日餘額金額",
"融資今日餘額金額",
]
def parse_info(d, m):
_id = m['date'] + '_' + d[0]
if m['市場別'] == '上市':
note = d[-1]
d = d[2:-1]
d = [int(x.replace(',', '')) for x in d]
d.append(note)
else:
note = d[-1]
d = [d[3], d[4], d[5], d[2], d[6], d[9], d[10 + 2], d[10 + 1], d[10 + 3], d[10], d[10 + 4], d[10 + 7],
d[10 + 8]]
d = [int(x.replace(',', '')) for x in d]
d.append(note)
return dict(zip(columns, [_id, *d]))
class StockDaySpider(scrapy.Spider):
name = 'stock_margin_trading'
custom_settings = {
'DOWNLOAD_DELAY': 8,
'CONCURRENT_REQUESTS': 1,
'MONGODB_COLLECTION': 'stock_day',
'MONGODB_ITEM_CACHE': 1,
'MONGODB_HAS_ID_FIELD': True,
'COOKIES_ENABLED': False
}
def __init__(self, beginDate=None, endDate=None, *args, **kwargs):
super(StockDaySpider, self).__init__(beginDate=beginDate, endDate=endDate, *args, **kwargs)
def start_requests(self):
if self.beginDate and self.endDate:
start = self.beginDate
end = self.endDate
else:
date = datetime.today().strftime("%Y-%m-%d")
start = date
end = date
for date in pd.date_range(start, end)[::-1]:
today = '{}/{:02d}/{:02d}'.format(date.year, date.month, date.day)
with MongoDatabase('stock_day') as conn:
isopen = list(conn.collection.find({'date': today}, {'_id': 1}).limit(1))
if isopen:
today = '{}/{:02d}/{:02d}'.format(date.year, date.month, date.day)
y = date.year
m = date.month
d = date.day
url = TWSE_URL.format(y=y, m=m, d=d)
time.sleep(8)
yield scrapy.Request(url, meta={'date': today, '市場別': '上市'})
y = y - 1911
url = TPEX_URL.format(y=y, m=m, d=d)
yield scrapy.Request(url, meta={'date': today, '市場別': '上櫃'})
def parse(self, response):
m = response.meta
json_data = json.loads(response.text)
if m['市場別'] == '上市':
try:
data = json_data['data']
for d in data:
yield parse_info(d, m)
d = json_data['creditList']
item = dict(zip(creditFields, [m['date'], m['市場別'], *d[0][1:], *d[1][1:], *d[2][1:]]))
with MongoDatabase('stock_margin_trading') as conn:
conn.collection.insert_one(item)
except KeyError:
pass
else:
try:
data = json_data['aaData']
for d in data:
yield parse_info(d, m)
d1 = json_data['tfootData_one']
d2 = json_data['tfootData_two']
item = dict(zip(creditFields,
[m['date'], m['市場別'], d1[1], d1[2], d1[3], d1[0], d1[4], d1[10], d1[9], d1[11], d1[8],
d1[12], d2[1], d2[2], d2[3], d2[0], d2[4]]))
with MongoDatabase('stock_margin_trading') as conn:
conn.collection.insert_one(item)
except KeyError:
pass