1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-07-01 10:01:19 +08:00

Merge branch 'microsoft:main' into stale

This commit is contained in:
Dingsu Wang
2021-05-09 17:59:46 +08:00
13 changed files with 236 additions and 28 deletions

View File

@@ -49,7 +49,7 @@ Your feedbacks about the features are very important.
| Planning-based portfolio optimization | Under review: https://github.com/microsoft/qlib/pull/280 |
| Fund data supporting and analysis | Under review: https://github.com/microsoft/qlib/pull/292 |
| Point-in-Time database | Under review: https://github.com/microsoft/qlib/pull/343 |
| High-frequency trading | Initial opensource version under development |
| High-frequency trading | Under review: https://github.com/microsoft/qlib/pull/408 |
| Meta-Learning-based data selection | Initial opensource version under development |
Recent released features

View File

@@ -182,6 +182,11 @@ The `trade unit` defines the unit number of stocks can be used in a trade, and t
qlib.init(provider_uri='~/.qlib/qlib_data/us_data', region=REG_US)
.. note::
PRs for new data source are highly welcome! Users could commit the code to crawl data as a PR like `the examples here <https://github.com/microsoft/qlib/tree/main/scripts>`_. And then we will use the code to create data cache on our server which other users could use directly.
Data API
========================

View File

@@ -522,6 +522,9 @@ class LocalCalendarProvider(CalendarProvider):
# if future calendar not exists, return current calendar
if not os.path.exists(fname):
get_module_logger("data").warning(f"{freq}_future.txt not exists, return current calendar!")
get_module_logger("data").warning(
"You can get future calendar by referring to the following document: https://github.com/microsoft/qlib/blob/main/scripts/data_collector/contrib/README.md"
)
fname = self._uri_cal.format(freq)
else:
fname = self._uri_cal.format(freq)

View File

@@ -15,9 +15,10 @@ from .config import C
class MetaLogger(type):
def __new__(cls, name, bases, dict):
wrapper_dict = logging.Logger.__dict__.copy()
wrapper_dict.update(dict)
wrapper_dict["__doc__"] = logging.Logger.__doc__
return type.__new__(cls, name, bases, wrapper_dict)
for key in wrapper_dict:
if key not in dict and key != "__reduce__":
dict[key] = wrapper_dict[key]
return type.__new__(cls, name, bases, dict)
class QlibLogger(metaclass=MetaLogger):
@@ -39,6 +40,9 @@ class QlibLogger(metaclass=MetaLogger):
self.level = level
def __getattr__(self, name):
# During unpickling, python will call __getattr__. Use this line to avoid maximum recursion error.
if name in {"__setstate__"}:
raise AttributeError
return self.logger.__getattribute__(name)
@@ -159,3 +163,10 @@ class LogFilter(logging.Filter):
elif isinstance(self.param, list):
allow = not any([self.match_msg(p, record.msg) for p in self.param])
return allow
def set_global_logger_level(level: int):
qlib_logger = logging.root.manager.loggerDict.get("qlib", None)
if qlib_logger is not None:
for _handler in qlib_logger.handlers:
_handler.level = level

View File

@@ -23,7 +23,10 @@ class QlibRecorder:
@contextmanager
def start(
self,
*,
experiment_id: Optional[Text] = None,
experiment_name: Optional[Text] = None,
recorder_id: Optional[Text] = None,
recorder_name: Optional[Text] = None,
uri: Optional[Text] = None,
resume: bool = False,
@@ -45,8 +48,12 @@ class QlibRecorder:
Parameters
----------
experiment_id : str
id of the experiment one wants to start.
experiment_name : str
name of the experiment one wants to start.
recorder_id : str
id of the recorder under the experiment one wants to start.
recorder_name : str
name of the recorder under the experiment one wants to start.
uri : str
@@ -57,7 +64,14 @@ class QlibRecorder:
resume : bool
whether to resume the specific recorder with given name under the given experiment.
"""
run = self.start_exp(experiment_name, recorder_name, uri, resume)
run = self.start_exp(
experiment_id=experiment_id,
experiment_name=experiment_name,
recorder_id=recorder_id,
recorder_name=recorder_name,
uri=uri,
resume=resume,
)
try:
yield run
except Exception as e:
@@ -65,7 +79,9 @@ class QlibRecorder:
raise e
self.end_exp(Recorder.STATUS_FI)
def start_exp(self, experiment_name=None, recorder_name=None, uri=None, resume=False):
def start_exp(
self, *, experiment_id=None, experiment_name=None, recorder_id=None, recorder_name=None, uri=None, resume=False
):
"""
Lower level method for starting an experiment. When use this method, one should end the experiment manually
and the status of the recorder may not be handled properly. Here is the example code:
@@ -79,8 +95,12 @@ class QlibRecorder:
Parameters
----------
experiment_id : str
id of the experiment one wants to start.
experiment_name : str
the name of the experiment to be started
recorder_id : str
id of the recorder under the experiment one wants to start.
recorder_name : str
name of the recorder under the experiment one wants to start.
uri : str
@@ -93,7 +113,14 @@ class QlibRecorder:
-------
An experiment instance being started.
"""
return self.exp_manager.start_exp(experiment_name, recorder_name, uri, resume)
return self.exp_manager.start_exp(
experiment_id=experiment_id,
experiment_name=experiment_name,
recorder_id=recorder_id,
recorder_name=recorder_name,
uri=uri,
resume=resume,
)
def end_exp(self, recorder_status=Recorder.STATUS_FI):
"""

View File

@@ -39,12 +39,14 @@ class Experiment:
output["recorders"] = list(recorders.keys())
return output
def start(self, recorder_name=None, resume=False):
def start(self, *, recorder_id=None, recorder_name=None, resume=False):
"""
Start the experiment and set it to be active. This method will also start a new recorder.
Parameters
----------
recorder_id : str
the id of the recorder to be created.
recorder_name : str
the name of the recorder to be created.
resume : bool
@@ -238,14 +240,14 @@ class MLflowExperiment(Experiment):
def __repr__(self):
return "{name}(id={id}, info={info})".format(name=self.__class__.__name__, id=self.id, info=self.info)
def start(self, recorder_name=None, resume=False):
def start(self, *, recorder_id=None, recorder_name=None, resume=False):
logger.info(f"Experiment {self.id} starts running ...")
# Get or create recorder
if recorder_name is None:
recorder_name = self._default_rec_name
# resume the recorder
if resume:
recorder, _ = self._get_or_create_rec(recorder_name=recorder_name)
recorder, _ = self._get_or_create_rec(recorder_id=recorder_id, recorder_name=recorder_name)
# create a new recorder
else:
recorder = self.create_recorder(recorder_name)

View File

@@ -33,7 +33,10 @@ class ExpManager:
def start_exp(
self,
*,
experiment_id: Optional[Text] = None,
experiment_name: Optional[Text] = None,
recorder_id: Optional[Text] = None,
recorder_name: Optional[Text] = None,
uri: Optional[Text] = None,
resume: bool = False,
@@ -45,8 +48,12 @@ class ExpManager:
Parameters
----------
experiment_id : str
id of the active experiment.
experiment_name : str
name of the active experiment.
recorder_id : str
id of the recorder to be started.
recorder_name : str
name of the recorder to be started.
uri : str
@@ -298,7 +305,10 @@ class MLflowExpManager(ExpManager):
def start_exp(
self,
*,
experiment_id: Optional[Text] = None,
experiment_name: Optional[Text] = None,
recorder_id: Optional[Text] = None,
recorder_name: Optional[Text] = None,
uri: Optional[Text] = None,
resume: bool = False,
@@ -308,11 +318,11 @@ class MLflowExpManager(ExpManager):
# Create experiment
if experiment_name is None:
experiment_name = self._default_exp_name
experiment, _ = self._get_or_create_exp(experiment_name=experiment_name)
experiment, _ = self._get_or_create_exp(experiment_id=experiment_id, experiment_name=experiment_name)
# Set up active experiment
self.active_experiment = experiment
# Start the experiment
self.active_experiment.start(recorder_name, resume)
self.active_experiment.start(recorder_id=recorder_id, recorder_name=recorder_name, resume=resume)
return self.active_experiment

View File

@@ -15,7 +15,11 @@
### Download CN Data
```bash
# daily data
python get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn
# 1min data (Optional for running non-high-frequency strategies)
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_cn_1min --region cn --interval 1min
```
### Downlaod US Data

View File

@@ -0,0 +1,24 @@
# Get future trading days
> `D.calendar(future=True)` will be used
## Requirements
```bash
pip install -r requirements.txt
```
## Collector Data
```bash
# parse instruments, using in qlib/instruments.
python future_trading_date_collector.py --qlib_dir ~/.qlib/qlib_data/cn_data --freq day
```
## Parameters
- qlib_dir: qlib data directory
- freq: value from [`day`, `1min`], default `day`

View File

@@ -0,0 +1,87 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import sys
from typing import List
from pathlib import Path
import fire
import numpy as np
import pandas as pd
from loguru import logger
# get data from baostock
import baostock as bs
CUR_DIR = Path(__file__).resolve().parent
sys.path.append(str(CUR_DIR.parent.parent))
from data_collector.utils import generate_minutes_calendar_from_daily
def read_calendar_from_qlib(qlib_dir: Path) -> pd.DataFrame:
calendar_path = qlib_dir.joinpath("calendars").joinpath("day.txt")
if not calendar_path.exists():
return pd.DataFrame()
return pd.read_csv(calendar_path, header=None)
def write_calendar_to_qlib(qlib_dir: Path, date_list: List[str], freq: str = "day"):
calendar_path = str(qlib_dir.joinpath("calendars").joinpath(f"{freq}_future.txt"))
np.savetxt(calendar_path, date_list, fmt="%s", encoding="utf-8")
logger.info(f"write future calendars success: {calendar_path}")
def generate_qlib_calendar(date_list: List[str], freq: str) -> List[str]:
print(freq)
if freq == "day":
return date_list
elif freq == "1min":
date_list = generate_minutes_calendar_from_daily(date_list, freq=freq).tolist()
return list(map(lambda x: pd.Timestamp(x).strftime("%Y-%m-%d %H:%M:%S"), date_list))
else:
raise ValueError(f"Unsupported freq: {freq}")
def future_calendar_collector(qlib_dir: [str, Path], freq: str = "day"):
"""get future calendar
Parameters
----------
qlib_dir: str or Path
qlib data directory
freq: str
value from ["day", "1min"], by default day
"""
qlib_dir = Path(qlib_dir).expanduser().resolve()
if not qlib_dir.exists():
raise FileNotFoundError(str(qlib_dir))
lg = bs.login()
if lg.error_code != "0":
logger.error(f"login error: {lg.error_msg}")
return
# read daily calendar
daily_calendar = read_calendar_from_qlib(qlib_dir)
end_year = pd.Timestamp.now().year
if daily_calendar.empty:
start_year = pd.Timestamp.now().year
else:
start_year = pd.Timestamp(daily_calendar.iloc[-1, 0]).year
rs = bs.query_trade_dates(start_date=pd.Timestamp(f"{start_year}-01-01"), end_date=f"{end_year}-12-31")
data_list = []
while (rs.error_code == "0") & rs.next():
_row_data = rs.get_row_data()
if int(_row_data[1]) == 1:
data_list.append(_row_data[0])
data_list = sorted(data_list)
date_list = generate_qlib_calendar(data_list, freq=freq)
write_calendar_to_qlib(qlib_dir, date_list, freq=freq)
bs.logout()
logger.info(f"get trading dates success: {start_year}-01-01 to {end_year}-12-31")
if __name__ == "__main__":
fire.Fire(future_calendar_collector)

View File

@@ -0,0 +1,5 @@
baostock
fire
numpy
pandas
loguru

View File

@@ -10,7 +10,9 @@ import random
import requests
import functools
from pathlib import Path
from typing import Iterable, Tuple
import numpy as np
import pandas as pd
from lxml import etree
from loguru import logger
@@ -418,5 +420,40 @@ def get_trading_date_by_shift(trading_list: list, trading_date: pd.Timestamp, sh
return res
def generate_minutes_calendar_from_daily(
calendars: Iterable,
freq: str = "1min",
am_range: Tuple[str, str] = ("09:30:00", "11:29:00"),
pm_range: Tuple[str, str] = ("13:00:00", "14:59:00"),
) -> pd.Index:
"""generate minutes calendar
Parameters
----------
calendars: Iterable
daily calendar
freq: str
by default 1min
am_range: Tuple[str, str]
AM Time Range, by default China-Stock: ("09:30:00", "11:29:00")
pm_range: Tuple[str, str]
PM Time Range, by default China-Stock: ("13:00:00", "14:59:00")
"""
daily_format: str = "%Y-%m-%d"
res = []
for _day in calendars:
for _range in [am_range, pm_range]:
res.append(
pd.date_range(
f"{pd.Timestamp(_day).strftime(daily_format)} {_range[0]}",
f"{pd.Timestamp(_day).strftime(daily_format)} {_range[1]}",
freq=freq,
)
)
return pd.Index(sorted(set(np.hstack(res))))
if __name__ == "__main__":
assert len(get_hs_stock_symbols()) >= MINIMUM_SYMBOLS_NUM

View File

@@ -24,7 +24,12 @@ from qlib.config import REG_CN as REGION_CN
CUR_DIR = Path(__file__).resolve().parent
sys.path.append(str(CUR_DIR.parent.parent))
from data_collector.base import BaseCollector, BaseNormalize, BaseRun
from data_collector.utils import get_calendar_list, get_hs_stock_symbols, get_us_stock_symbols
from data_collector.utils import (
get_calendar_list,
get_hs_stock_symbols,
get_us_stock_symbols,
generate_minutes_calendar_from_daily,
)
INDEX_BENCH_URL = "http://push2his.eastmoney.com/api/qt/stock/kline/get?secid=1.{index_code}&fields1=f1%2Cf2%2Cf3%2Cf4%2Cf5&fields2=f51%2Cf52%2Cf53%2Cf54%2Cf55%2Cf56%2Cf57%2Cf58&klt=101&fqt=0&beg={begin}&end={end}"
@@ -418,21 +423,9 @@ class YahooNormalize1min(YahooNormalize, ABC):
return calendar_list_1d
def generate_1min_from_daily(self, calendars: Iterable) -> pd.Index:
res = []
daily_format = self.DAILY_FORMAT
am_range = self.AM_RANGE
pm_range = self.PM_RANGE
for _day in calendars:
for _range in [am_range, pm_range]:
res.append(
pd.date_range(
f"{_day.strftime(daily_format)} {_range[0]}",
f"{_day.strftime(daily_format)} {_range[1]}",
freq="1min",
)
)
return pd.Index(sorted(set(np.hstack(res))))
return generate_minutes_calendar_from_daily(
calendars, freq="1min", am_range=self.AM_RANGE, pm_range=self.PM_RANGE
)
def adjusted_price(self, df: pd.DataFrame) -> pd.DataFrame:
# TODO: using daily data factor