mirror of
https://github.com/microsoft/qlib.git
synced 2026-06-06 05:51:17 +08:00
support collecting yahoo 1min data
This commit is contained in:
@@ -136,7 +136,7 @@ After conversion, users can find their Qlib format data in the directory `~/.qli
|
||||
- `volume`
|
||||
The adjusted trading volume
|
||||
- `factor`
|
||||
The Restoration factor. Normally, original_price = adj_price / factor
|
||||
The Restoration factor. Normally, ``factor = adjusted_price / original_price``, `adjusted price` reference: `split adjusted <https://www.investopedia.com/terms/s/splitadjusted.asp>`_
|
||||
|
||||
In the convention of `Qlib` data processing, `open, close, high, low, volume, money and factor` will be set to NaN if the stock is suspended.
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ import re
|
||||
import time
|
||||
import bisect
|
||||
import pickle
|
||||
import random
|
||||
import requests
|
||||
import functools
|
||||
from pathlib import Path
|
||||
@@ -17,6 +18,7 @@ from yahooquery import Ticker
|
||||
HS_SYMBOLS_URL = "http://app.finance.ifeng.com/hq/list.php?type=stock_a&class={s_type}"
|
||||
|
||||
CALENDAR_URL_BASE = "http://push2his.eastmoney.com/api/qt/stock/kline/get?secid={market}.{bench_code}&fields1=f1%2Cf2%2Cf3%2Cf4%2Cf5&fields2=f51%2Cf52%2Cf53%2Cf54%2Cf55%2Cf56%2Cf57%2Cf58&klt=101&fqt=0&beg=19900101&end=20991231"
|
||||
SZSE_CALENDAR_URL = "http://www.szse.cn/api/report/exchange/onepersistenthour/monthList?month={month}&random={random}"
|
||||
|
||||
CALENDAR_BENCH_URL_MAP = {
|
||||
"CSI300": CALENDAR_URL_BASE.format(market=1, bench_code="000300"),
|
||||
@@ -63,7 +65,29 @@ def get_calendar_list(bench_code="CSI300") -> list:
|
||||
df = Ticker(CALENDAR_BENCH_URL_MAP[bench_code]).history(interval="1d", period="max")
|
||||
calendar = df.index.get_level_values(level="date").map(pd.Timestamp).unique().tolist()
|
||||
else:
|
||||
calendar = _get_calendar(CALENDAR_BENCH_URL_MAP[bench_code])
|
||||
if bench_code.upper() == "ALL":
|
||||
|
||||
@deco_retry
|
||||
def _get_calendar(month):
|
||||
_cal = []
|
||||
try:
|
||||
resp = requests.get(SZSE_CALENDAR_URL.format(month=month, random=random.random)).json()
|
||||
for _r in resp["data"]:
|
||||
if int(_r["jybz"]):
|
||||
_cal.append(pd.Timestamp(_r["jyrq"]))
|
||||
except Exception as e:
|
||||
raise ValueError(f"{month}-->{e}")
|
||||
return _cal
|
||||
|
||||
month_range = pd.date_range(start="2000-01", end=pd.Timestamp.now() + pd.Timedelta(days=31), freq="M")
|
||||
calendar = []
|
||||
for _m in month_range:
|
||||
cal = _get_calendar(_m.strftime("%Y-%m"))
|
||||
if cal:
|
||||
calendar += cal
|
||||
calendar = list(filter(lambda x: x <= pd.Timestamp.now(), calendar))
|
||||
else:
|
||||
calendar = _get_calendar(CALENDAR_BENCH_URL_MAP[bench_code])
|
||||
_CALENDAR_MAP[bench_code] = calendar
|
||||
logger.info(f"end of get calendar list: {bench_code}.")
|
||||
return calendar
|
||||
|
||||
@@ -18,23 +18,81 @@ pip install -r requirements.txt
|
||||
|
||||
## Collector Data
|
||||
|
||||
### Download data and Normalize data
|
||||
```bash
|
||||
python collector.py collector_data --source_dir ~/.qlib/stock_data/source --region CN --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1d --normalize_dir ~/.qlib/stock_data/normalize
|
||||
```
|
||||
|
||||
### Download Data
|
||||
### CN Data
|
||||
|
||||
#### 1d
|
||||
|
||||
```bash
|
||||
python collector.py download_data --source_dir ~/.qlib/stock_data/source --region CN --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1d
|
||||
|
||||
# download from yahoo finance
|
||||
python collector.py download_data --source_dir ~/.qlib/stock_data/source/cn_1d --region CN --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1d
|
||||
|
||||
# normalize
|
||||
python collector.py normalize_data --source_dir ~/.qlib/stock_data/source/cn_1d --normalize_dir ~/.qlib/stock_data/source/cn_1d_nor --region CN --interval 1d
|
||||
|
||||
# dump data
|
||||
cd qlib/scripts
|
||||
python dump_bin.py dump_all --csv_path ~/.qlib/stock_data/source/cn_1d_nor --qlib_dir ~/.qlib/stock_data/source/qlib_cn_1d --freq day --exclude_fields date,adjclose,dividends,splits,symbol
|
||||
|
||||
# using
|
||||
import qlib
|
||||
from qlib.data import D
|
||||
|
||||
qlib.init(provider_uri="~/.qlib/stock_data/source/qlib_cn_1d", region="CN")
|
||||
df = D.features(D.instruments("all"), ["$close"], freq="day")
|
||||
|
||||
```
|
||||
|
||||
### Normalize Data
|
||||
#### 1min
|
||||
|
||||
```bash
|
||||
python collector.py normalize_data --source_dir ~/.qlib/stock_data/source --normalize_dir ~/.qlib/stock_data/normalize --region CN
|
||||
|
||||
# download from yahoo finance
|
||||
python collector.py download_data --source_dir ~/.qlib/stock_data/source/cn_1min --region CN --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1min
|
||||
|
||||
# normalize
|
||||
python collector.py normalize_data --source_dir ~/.qlib/stock_data/source/cn_1min --normalize_dir ~/.qlib/stock_data/source/cn_1min_nor --region CN --interval 1min
|
||||
|
||||
# dump data
|
||||
cd qlib/scripts
|
||||
python dump_bin.py dump_all --csv_path ~/.qlib/stock_data/source/cn_1min_nor --qlib_dir ~/.qlib/stock_data/source/qlib_cn_1min --freq 1min --exclude_fields date,adjclose,dividends,splits,symbol
|
||||
|
||||
# using
|
||||
import qlib
|
||||
from qlib.data import D
|
||||
|
||||
qlib.init(provider_uri="~/.qlib/stock_data/source/qlib_cn_1min", region="CN")
|
||||
df = D.features(D.instruments("all"), ["$close"], freq="1min")
|
||||
|
||||
```
|
||||
|
||||
### US Data
|
||||
|
||||
#### 1d
|
||||
|
||||
```bash
|
||||
|
||||
# download from yahoo finance
|
||||
python collector.py download_data --source_dir ~/.qlib/stock_data/source/us_1d --region US --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1d
|
||||
|
||||
# normalize
|
||||
python collector.py normalize_data --source_dir ~/.qlib/stock_data/source/us_1d --normalize_dir ~/.qlib/stock_data/source/us_1d_nor --region US --interval 1d
|
||||
|
||||
# dump data
|
||||
cd qlib/scripts
|
||||
python dump_bin.py dump_all --csv_path ~/.qlib/stock_data/source/cn_1d_nor --qlib_dir ~/.qlib/stock_data/source/qlib_us_1d --freq day --exclude_fields date,adjclose,dividends,splits,symbol
|
||||
|
||||
# using
|
||||
import qlib
|
||||
from qlib.data import D
|
||||
|
||||
qlib.init(provider_uri="~/.qlib/stock_data/source/qlib_us_1d", region="US")
|
||||
df = D.features(D.instruments("all"), ["$close"], freq="day")
|
||||
|
||||
```
|
||||
|
||||
|
||||
### Help
|
||||
```bash
|
||||
pythono collector.py collector_data --help
|
||||
@@ -42,5 +100,5 @@ pythono collector.py collector_data --help
|
||||
|
||||
## Parameters
|
||||
|
||||
- interval: 1m or 1d
|
||||
- interval: 1min or 1d
|
||||
- region: CN or US
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user