1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00

Add future trading date collector

This commit is contained in:
zhupr
2021-04-27 21:20:47 +08:00
parent e15ea06122
commit 8b8d21107c
6 changed files with 165 additions and 16 deletions

View File

@@ -0,0 +1,24 @@
# Get future trading days
> `D.calendar(future=True)` will be used
## Requirements
```bash
pip install -r requirements.txt
```
## Collector Data
```bash
# parse instruments, using in qlib/instruments.
python future_trading_date_collector.py --qlib_dir ~/.qlib/qlib_data/cn_data --freq day
```
## Parameters
- qlib_dir: qlib data directory
- freq: value from [`day`, `1min`], default `day`

View File

@@ -0,0 +1,87 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import sys
from typing import List
from pathlib import Path
import fire
import numpy as np
import pandas as pd
from loguru import logger
# get data from baostock
import baostock as bs
CUR_DIR = Path(__file__).resolve().parent
sys.path.append(str(CUR_DIR.parent.parent))
from data_collector.utils import generate_minutes_calendar_from_daily
def read_calendar_from_qlib(qlib_dir: Path) -> pd.DataFrame:
calendar_path = qlib_dir.joinpath("calendars").joinpath("day.txt")
if not calendar_path.exists():
return pd.DataFrame()
return pd.read_csv(calendar_path, header=None)
def write_calendar_to_qlib(qlib_dir: Path, date_list: List[str], freq: str = "day"):
calendar_path = str(qlib_dir.joinpath("calendars").joinpath(f"{freq}_future.txt"))
np.savetxt(calendar_path, date_list, fmt="%s", encoding="utf-8")
logger.info(f"write future calendars success: {calendar_path}")
def generate_qlib_calendar(date_list: List[str], freq: str) -> List[str]:
print(freq)
if freq == "day":
return date_list
elif freq == "1min":
date_list = generate_minutes_calendar_from_daily(date_list, freq=freq).tolist()
return list(map(lambda x: pd.Timestamp(x).strftime("%Y-%m-%d %H:%M:%S"), date_list))
else:
raise ValueError(f"Unsupported freq: {freq}")
def future_calendar_collector(qlib_dir: [str, Path], freq: str = "day"):
"""get future calendar
Parameters
----------
qlib_dir: str or Path
qlib data directory
freq: str
value from ["day", "1min"], by default day
"""
qlib_dir = Path(qlib_dir).expanduser().resolve()
if not qlib_dir.exists():
raise FileNotFoundError(str(qlib_dir))
lg = bs.login()
if lg.error_code != "0":
logger.error(f"login error: {lg.error_msg}")
return
# read daily calendar
daily_calendar = read_calendar_from_qlib(qlib_dir)
end_year = pd.Timestamp.now().year
if daily_calendar.empty:
start_year = pd.Timestamp.now().year
else:
start_year = pd.Timestamp(daily_calendar.iloc[-1, 0]).year
rs = bs.query_trade_dates(start_date=pd.Timestamp(f"{start_year}-01-01"), end_date=f"{end_year}-12-31")
data_list = []
while (rs.error_code == "0") & rs.next():
_row_data = rs.get_row_data()
if int(_row_data[1]) == 1:
data_list.append(_row_data[0])
data_list = sorted(data_list)
date_list = generate_qlib_calendar(data_list, freq=freq)
write_calendar_to_qlib(qlib_dir, date_list, freq=freq)
bs.logout()
logger.info(f"get trading dates success: {start_year}-01-01 to {end_year}-12-31")
if __name__ == "__main__":
fire.Fire(future_calendar_collector)

View File

@@ -0,0 +1,5 @@
baostock
fire
numpy
pandas
loguru

View File

@@ -10,7 +10,9 @@ import random
import requests
import functools
from pathlib import Path
from typing import Iterable, Tuple
import numpy as np
import pandas as pd
from lxml import etree
from loguru import logger
@@ -418,5 +420,40 @@ def get_trading_date_by_shift(trading_list: list, trading_date: pd.Timestamp, sh
return res
def generate_minutes_calendar_from_daily(
calendars: Iterable,
freq: str = "1min",
am_range: Tuple[str, str] = ("09:30:00", "11:29:00"),
pm_range: Tuple[str, str] = ("13:00:00", "14:59:00"),
) -> pd.Index:
"""generate minutes calendar
Parameters
----------
calendars: Iterable
daily calendar
freq: str
by default 1min
am_range: Tuple[str, str]
AM Time Range, by default China-Stock: ("09:30:00", "11:29:00")
pm_range: Tuple[str, str]
PM Time Range, by default China-Stock: ("13:00:00", "14:59:00")
"""
daily_format: str = "%Y-%m-%d"
res = []
for _day in calendars:
for _range in [am_range, pm_range]:
res.append(
pd.date_range(
f"{pd.Timestamp(_day).strftime(daily_format)} {_range[0]}",
f"{pd.Timestamp(_day).strftime(daily_format)} {_range[1]}",
freq=freq,
)
)
return pd.Index(sorted(set(np.hstack(res))))
if __name__ == "__main__":
assert len(get_hs_stock_symbols()) >= MINIMUM_SYMBOLS_NUM

View File

@@ -24,7 +24,12 @@ from qlib.config import REG_CN as REGION_CN
CUR_DIR = Path(__file__).resolve().parent
sys.path.append(str(CUR_DIR.parent.parent))
from data_collector.base import BaseCollector, BaseNormalize, BaseRun
from data_collector.utils import get_calendar_list, get_hs_stock_symbols, get_us_stock_symbols
from data_collector.utils import (
get_calendar_list,
get_hs_stock_symbols,
get_us_stock_symbols,
generate_minutes_calendar_from_daily,
)
INDEX_BENCH_URL = "http://push2his.eastmoney.com/api/qt/stock/kline/get?secid=1.{index_code}&fields1=f1%2Cf2%2Cf3%2Cf4%2Cf5&fields2=f51%2Cf52%2Cf53%2Cf54%2Cf55%2Cf56%2Cf57%2Cf58&klt=101&fqt=0&beg={begin}&end={end}"
@@ -418,21 +423,9 @@ class YahooNormalize1min(YahooNormalize, ABC):
return calendar_list_1d
def generate_1min_from_daily(self, calendars: Iterable) -> pd.Index:
res = []
daily_format = self.DAILY_FORMAT
am_range = self.AM_RANGE
pm_range = self.PM_RANGE
for _day in calendars:
for _range in [am_range, pm_range]:
res.append(
pd.date_range(
f"{_day.strftime(daily_format)} {_range[0]}",
f"{_day.strftime(daily_format)} {_range[1]}",
freq="1min",
)
)
return pd.Index(sorted(set(np.hstack(res))))
return generate_minutes_calendar_from_daily(
calendars, freq="1min", am_range=self.AM_RANGE, pm_range=self.PM_RANGE
)
def adjusted_price(self, df: pd.DataFrame) -> pd.DataFrame:
# TODO: using daily data factor