mirror of
https://github.com/microsoft/qlib.git
synced 2026-07-01 10:01:19 +08:00
Merge branch 'microsoft:main' into stale
This commit is contained in:
@@ -49,7 +49,7 @@ Your feedbacks about the features are very important.
|
||||
| Planning-based portfolio optimization | Under review: https://github.com/microsoft/qlib/pull/280 |
|
||||
| Fund data supporting and analysis | Under review: https://github.com/microsoft/qlib/pull/292 |
|
||||
| Point-in-Time database | Under review: https://github.com/microsoft/qlib/pull/343 |
|
||||
| High-frequency trading | Initial opensource version under development |
|
||||
| High-frequency trading | Under review: https://github.com/microsoft/qlib/pull/408 |
|
||||
| Meta-Learning-based data selection | Initial opensource version under development |
|
||||
|
||||
Recent released features
|
||||
|
||||
@@ -182,6 +182,11 @@ The `trade unit` defines the unit number of stocks can be used in a trade, and t
|
||||
qlib.init(provider_uri='~/.qlib/qlib_data/us_data', region=REG_US)
|
||||
|
||||
|
||||
.. note::
|
||||
|
||||
PRs for new data source are highly welcome! Users could commit the code to crawl data as a PR like `the examples here <https://github.com/microsoft/qlib/tree/main/scripts>`_. And then we will use the code to create data cache on our server which other users could use directly.
|
||||
|
||||
|
||||
Data API
|
||||
========================
|
||||
|
||||
|
||||
@@ -522,6 +522,9 @@ class LocalCalendarProvider(CalendarProvider):
|
||||
# if future calendar not exists, return current calendar
|
||||
if not os.path.exists(fname):
|
||||
get_module_logger("data").warning(f"{freq}_future.txt not exists, return current calendar!")
|
||||
get_module_logger("data").warning(
|
||||
"You can get future calendar by referring to the following document: https://github.com/microsoft/qlib/blob/main/scripts/data_collector/contrib/README.md"
|
||||
)
|
||||
fname = self._uri_cal.format(freq)
|
||||
else:
|
||||
fname = self._uri_cal.format(freq)
|
||||
|
||||
17
qlib/log.py
17
qlib/log.py
@@ -15,9 +15,10 @@ from .config import C
|
||||
class MetaLogger(type):
|
||||
def __new__(cls, name, bases, dict):
|
||||
wrapper_dict = logging.Logger.__dict__.copy()
|
||||
wrapper_dict.update(dict)
|
||||
wrapper_dict["__doc__"] = logging.Logger.__doc__
|
||||
return type.__new__(cls, name, bases, wrapper_dict)
|
||||
for key in wrapper_dict:
|
||||
if key not in dict and key != "__reduce__":
|
||||
dict[key] = wrapper_dict[key]
|
||||
return type.__new__(cls, name, bases, dict)
|
||||
|
||||
|
||||
class QlibLogger(metaclass=MetaLogger):
|
||||
@@ -39,6 +40,9 @@ class QlibLogger(metaclass=MetaLogger):
|
||||
self.level = level
|
||||
|
||||
def __getattr__(self, name):
|
||||
# During unpickling, python will call __getattr__. Use this line to avoid maximum recursion error.
|
||||
if name in {"__setstate__"}:
|
||||
raise AttributeError
|
||||
return self.logger.__getattribute__(name)
|
||||
|
||||
|
||||
@@ -159,3 +163,10 @@ class LogFilter(logging.Filter):
|
||||
elif isinstance(self.param, list):
|
||||
allow = not any([self.match_msg(p, record.msg) for p in self.param])
|
||||
return allow
|
||||
|
||||
|
||||
def set_global_logger_level(level: int):
|
||||
qlib_logger = logging.root.manager.loggerDict.get("qlib", None)
|
||||
if qlib_logger is not None:
|
||||
for _handler in qlib_logger.handlers:
|
||||
_handler.level = level
|
||||
|
||||
@@ -23,7 +23,10 @@ class QlibRecorder:
|
||||
@contextmanager
|
||||
def start(
|
||||
self,
|
||||
*,
|
||||
experiment_id: Optional[Text] = None,
|
||||
experiment_name: Optional[Text] = None,
|
||||
recorder_id: Optional[Text] = None,
|
||||
recorder_name: Optional[Text] = None,
|
||||
uri: Optional[Text] = None,
|
||||
resume: bool = False,
|
||||
@@ -45,8 +48,12 @@ class QlibRecorder:
|
||||
|
||||
Parameters
|
||||
----------
|
||||
experiment_id : str
|
||||
id of the experiment one wants to start.
|
||||
experiment_name : str
|
||||
name of the experiment one wants to start.
|
||||
recorder_id : str
|
||||
id of the recorder under the experiment one wants to start.
|
||||
recorder_name : str
|
||||
name of the recorder under the experiment one wants to start.
|
||||
uri : str
|
||||
@@ -57,7 +64,14 @@ class QlibRecorder:
|
||||
resume : bool
|
||||
whether to resume the specific recorder with given name under the given experiment.
|
||||
"""
|
||||
run = self.start_exp(experiment_name, recorder_name, uri, resume)
|
||||
run = self.start_exp(
|
||||
experiment_id=experiment_id,
|
||||
experiment_name=experiment_name,
|
||||
recorder_id=recorder_id,
|
||||
recorder_name=recorder_name,
|
||||
uri=uri,
|
||||
resume=resume,
|
||||
)
|
||||
try:
|
||||
yield run
|
||||
except Exception as e:
|
||||
@@ -65,7 +79,9 @@ class QlibRecorder:
|
||||
raise e
|
||||
self.end_exp(Recorder.STATUS_FI)
|
||||
|
||||
def start_exp(self, experiment_name=None, recorder_name=None, uri=None, resume=False):
|
||||
def start_exp(
|
||||
self, *, experiment_id=None, experiment_name=None, recorder_id=None, recorder_name=None, uri=None, resume=False
|
||||
):
|
||||
"""
|
||||
Lower level method for starting an experiment. When use this method, one should end the experiment manually
|
||||
and the status of the recorder may not be handled properly. Here is the example code:
|
||||
@@ -79,8 +95,12 @@ class QlibRecorder:
|
||||
|
||||
Parameters
|
||||
----------
|
||||
experiment_id : str
|
||||
id of the experiment one wants to start.
|
||||
experiment_name : str
|
||||
the name of the experiment to be started
|
||||
recorder_id : str
|
||||
id of the recorder under the experiment one wants to start.
|
||||
recorder_name : str
|
||||
name of the recorder under the experiment one wants to start.
|
||||
uri : str
|
||||
@@ -93,7 +113,14 @@ class QlibRecorder:
|
||||
-------
|
||||
An experiment instance being started.
|
||||
"""
|
||||
return self.exp_manager.start_exp(experiment_name, recorder_name, uri, resume)
|
||||
return self.exp_manager.start_exp(
|
||||
experiment_id=experiment_id,
|
||||
experiment_name=experiment_name,
|
||||
recorder_id=recorder_id,
|
||||
recorder_name=recorder_name,
|
||||
uri=uri,
|
||||
resume=resume,
|
||||
)
|
||||
|
||||
def end_exp(self, recorder_status=Recorder.STATUS_FI):
|
||||
"""
|
||||
|
||||
@@ -39,12 +39,14 @@ class Experiment:
|
||||
output["recorders"] = list(recorders.keys())
|
||||
return output
|
||||
|
||||
def start(self, recorder_name=None, resume=False):
|
||||
def start(self, *, recorder_id=None, recorder_name=None, resume=False):
|
||||
"""
|
||||
Start the experiment and set it to be active. This method will also start a new recorder.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
recorder_id : str
|
||||
the id of the recorder to be created.
|
||||
recorder_name : str
|
||||
the name of the recorder to be created.
|
||||
resume : bool
|
||||
@@ -238,14 +240,14 @@ class MLflowExperiment(Experiment):
|
||||
def __repr__(self):
|
||||
return "{name}(id={id}, info={info})".format(name=self.__class__.__name__, id=self.id, info=self.info)
|
||||
|
||||
def start(self, recorder_name=None, resume=False):
|
||||
def start(self, *, recorder_id=None, recorder_name=None, resume=False):
|
||||
logger.info(f"Experiment {self.id} starts running ...")
|
||||
# Get or create recorder
|
||||
if recorder_name is None:
|
||||
recorder_name = self._default_rec_name
|
||||
# resume the recorder
|
||||
if resume:
|
||||
recorder, _ = self._get_or_create_rec(recorder_name=recorder_name)
|
||||
recorder, _ = self._get_or_create_rec(recorder_id=recorder_id, recorder_name=recorder_name)
|
||||
# create a new recorder
|
||||
else:
|
||||
recorder = self.create_recorder(recorder_name)
|
||||
|
||||
@@ -33,7 +33,10 @@ class ExpManager:
|
||||
|
||||
def start_exp(
|
||||
self,
|
||||
*,
|
||||
experiment_id: Optional[Text] = None,
|
||||
experiment_name: Optional[Text] = None,
|
||||
recorder_id: Optional[Text] = None,
|
||||
recorder_name: Optional[Text] = None,
|
||||
uri: Optional[Text] = None,
|
||||
resume: bool = False,
|
||||
@@ -45,8 +48,12 @@ class ExpManager:
|
||||
|
||||
Parameters
|
||||
----------
|
||||
experiment_id : str
|
||||
id of the active experiment.
|
||||
experiment_name : str
|
||||
name of the active experiment.
|
||||
recorder_id : str
|
||||
id of the recorder to be started.
|
||||
recorder_name : str
|
||||
name of the recorder to be started.
|
||||
uri : str
|
||||
@@ -298,7 +305,10 @@ class MLflowExpManager(ExpManager):
|
||||
|
||||
def start_exp(
|
||||
self,
|
||||
*,
|
||||
experiment_id: Optional[Text] = None,
|
||||
experiment_name: Optional[Text] = None,
|
||||
recorder_id: Optional[Text] = None,
|
||||
recorder_name: Optional[Text] = None,
|
||||
uri: Optional[Text] = None,
|
||||
resume: bool = False,
|
||||
@@ -308,11 +318,11 @@ class MLflowExpManager(ExpManager):
|
||||
# Create experiment
|
||||
if experiment_name is None:
|
||||
experiment_name = self._default_exp_name
|
||||
experiment, _ = self._get_or_create_exp(experiment_name=experiment_name)
|
||||
experiment, _ = self._get_or_create_exp(experiment_id=experiment_id, experiment_name=experiment_name)
|
||||
# Set up active experiment
|
||||
self.active_experiment = experiment
|
||||
# Start the experiment
|
||||
self.active_experiment.start(recorder_name, resume)
|
||||
self.active_experiment.start(recorder_id=recorder_id, recorder_name=recorder_name, resume=resume)
|
||||
|
||||
return self.active_experiment
|
||||
|
||||
|
||||
@@ -15,7 +15,11 @@
|
||||
### Download CN Data
|
||||
|
||||
```bash
|
||||
# daily data
|
||||
python get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn
|
||||
|
||||
# 1min data (Optional for running non-high-frequency strategies)
|
||||
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_cn_1min --region cn --interval 1min
|
||||
```
|
||||
|
||||
### Downlaod US Data
|
||||
|
||||
24
scripts/data_collector/contrib/README.md
Normal file
24
scripts/data_collector/contrib/README.md
Normal file
@@ -0,0 +1,24 @@
|
||||
# Get future trading days
|
||||
|
||||
> `D.calendar(future=True)` will be used
|
||||
|
||||
## Requirements
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
## Collector Data
|
||||
|
||||
```bash
|
||||
# parse instruments, using in qlib/instruments.
|
||||
python future_trading_date_collector.py --qlib_dir ~/.qlib/qlib_data/cn_data --freq day
|
||||
```
|
||||
|
||||
## Parameters
|
||||
|
||||
- qlib_dir: qlib data directory
|
||||
- freq: value from [`day`, `1min`], default `day`
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,87 @@
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import sys
|
||||
from typing import List
|
||||
from pathlib import Path
|
||||
|
||||
import fire
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from loguru import logger
|
||||
|
||||
# get data from baostock
|
||||
import baostock as bs
|
||||
|
||||
CUR_DIR = Path(__file__).resolve().parent
|
||||
sys.path.append(str(CUR_DIR.parent.parent))
|
||||
|
||||
|
||||
from data_collector.utils import generate_minutes_calendar_from_daily
|
||||
|
||||
|
||||
def read_calendar_from_qlib(qlib_dir: Path) -> pd.DataFrame:
|
||||
calendar_path = qlib_dir.joinpath("calendars").joinpath("day.txt")
|
||||
if not calendar_path.exists():
|
||||
return pd.DataFrame()
|
||||
return pd.read_csv(calendar_path, header=None)
|
||||
|
||||
|
||||
def write_calendar_to_qlib(qlib_dir: Path, date_list: List[str], freq: str = "day"):
|
||||
calendar_path = str(qlib_dir.joinpath("calendars").joinpath(f"{freq}_future.txt"))
|
||||
|
||||
np.savetxt(calendar_path, date_list, fmt="%s", encoding="utf-8")
|
||||
logger.info(f"write future calendars success: {calendar_path}")
|
||||
|
||||
|
||||
def generate_qlib_calendar(date_list: List[str], freq: str) -> List[str]:
|
||||
print(freq)
|
||||
if freq == "day":
|
||||
return date_list
|
||||
elif freq == "1min":
|
||||
date_list = generate_minutes_calendar_from_daily(date_list, freq=freq).tolist()
|
||||
return list(map(lambda x: pd.Timestamp(x).strftime("%Y-%m-%d %H:%M:%S"), date_list))
|
||||
else:
|
||||
raise ValueError(f"Unsupported freq: {freq}")
|
||||
|
||||
|
||||
def future_calendar_collector(qlib_dir: [str, Path], freq: str = "day"):
|
||||
"""get future calendar
|
||||
|
||||
Parameters
|
||||
----------
|
||||
qlib_dir: str or Path
|
||||
qlib data directory
|
||||
freq: str
|
||||
value from ["day", "1min"], by default day
|
||||
"""
|
||||
qlib_dir = Path(qlib_dir).expanduser().resolve()
|
||||
if not qlib_dir.exists():
|
||||
raise FileNotFoundError(str(qlib_dir))
|
||||
|
||||
lg = bs.login()
|
||||
if lg.error_code != "0":
|
||||
logger.error(f"login error: {lg.error_msg}")
|
||||
return
|
||||
# read daily calendar
|
||||
daily_calendar = read_calendar_from_qlib(qlib_dir)
|
||||
end_year = pd.Timestamp.now().year
|
||||
if daily_calendar.empty:
|
||||
start_year = pd.Timestamp.now().year
|
||||
else:
|
||||
start_year = pd.Timestamp(daily_calendar.iloc[-1, 0]).year
|
||||
rs = bs.query_trade_dates(start_date=pd.Timestamp(f"{start_year}-01-01"), end_date=f"{end_year}-12-31")
|
||||
data_list = []
|
||||
while (rs.error_code == "0") & rs.next():
|
||||
_row_data = rs.get_row_data()
|
||||
if int(_row_data[1]) == 1:
|
||||
data_list.append(_row_data[0])
|
||||
data_list = sorted(data_list)
|
||||
date_list = generate_qlib_calendar(data_list, freq=freq)
|
||||
write_calendar_to_qlib(qlib_dir, date_list, freq=freq)
|
||||
bs.logout()
|
||||
logger.info(f"get trading dates success: {start_year}-01-01 to {end_year}-12-31")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
fire.Fire(future_calendar_collector)
|
||||
5
scripts/data_collector/contrib/requirements.txt
Normal file
5
scripts/data_collector/contrib/requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
baostock
|
||||
fire
|
||||
numpy
|
||||
pandas
|
||||
loguru
|
||||
@@ -10,7 +10,9 @@ import random
|
||||
import requests
|
||||
import functools
|
||||
from pathlib import Path
|
||||
from typing import Iterable, Tuple
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from lxml import etree
|
||||
from loguru import logger
|
||||
@@ -418,5 +420,40 @@ def get_trading_date_by_shift(trading_list: list, trading_date: pd.Timestamp, sh
|
||||
return res
|
||||
|
||||
|
||||
def generate_minutes_calendar_from_daily(
|
||||
calendars: Iterable,
|
||||
freq: str = "1min",
|
||||
am_range: Tuple[str, str] = ("09:30:00", "11:29:00"),
|
||||
pm_range: Tuple[str, str] = ("13:00:00", "14:59:00"),
|
||||
) -> pd.Index:
|
||||
"""generate minutes calendar
|
||||
|
||||
Parameters
|
||||
----------
|
||||
calendars: Iterable
|
||||
daily calendar
|
||||
freq: str
|
||||
by default 1min
|
||||
am_range: Tuple[str, str]
|
||||
AM Time Range, by default China-Stock: ("09:30:00", "11:29:00")
|
||||
pm_range: Tuple[str, str]
|
||||
PM Time Range, by default China-Stock: ("13:00:00", "14:59:00")
|
||||
|
||||
"""
|
||||
daily_format: str = "%Y-%m-%d"
|
||||
res = []
|
||||
for _day in calendars:
|
||||
for _range in [am_range, pm_range]:
|
||||
res.append(
|
||||
pd.date_range(
|
||||
f"{pd.Timestamp(_day).strftime(daily_format)} {_range[0]}",
|
||||
f"{pd.Timestamp(_day).strftime(daily_format)} {_range[1]}",
|
||||
freq=freq,
|
||||
)
|
||||
)
|
||||
|
||||
return pd.Index(sorted(set(np.hstack(res))))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
assert len(get_hs_stock_symbols()) >= MINIMUM_SYMBOLS_NUM
|
||||
|
||||
@@ -24,7 +24,12 @@ from qlib.config import REG_CN as REGION_CN
|
||||
CUR_DIR = Path(__file__).resolve().parent
|
||||
sys.path.append(str(CUR_DIR.parent.parent))
|
||||
from data_collector.base import BaseCollector, BaseNormalize, BaseRun
|
||||
from data_collector.utils import get_calendar_list, get_hs_stock_symbols, get_us_stock_symbols
|
||||
from data_collector.utils import (
|
||||
get_calendar_list,
|
||||
get_hs_stock_symbols,
|
||||
get_us_stock_symbols,
|
||||
generate_minutes_calendar_from_daily,
|
||||
)
|
||||
|
||||
INDEX_BENCH_URL = "http://push2his.eastmoney.com/api/qt/stock/kline/get?secid=1.{index_code}&fields1=f1%2Cf2%2Cf3%2Cf4%2Cf5&fields2=f51%2Cf52%2Cf53%2Cf54%2Cf55%2Cf56%2Cf57%2Cf58&klt=101&fqt=0&beg={begin}&end={end}"
|
||||
|
||||
@@ -418,21 +423,9 @@ class YahooNormalize1min(YahooNormalize, ABC):
|
||||
return calendar_list_1d
|
||||
|
||||
def generate_1min_from_daily(self, calendars: Iterable) -> pd.Index:
|
||||
res = []
|
||||
daily_format = self.DAILY_FORMAT
|
||||
am_range = self.AM_RANGE
|
||||
pm_range = self.PM_RANGE
|
||||
for _day in calendars:
|
||||
for _range in [am_range, pm_range]:
|
||||
res.append(
|
||||
pd.date_range(
|
||||
f"{_day.strftime(daily_format)} {_range[0]}",
|
||||
f"{_day.strftime(daily_format)} {_range[1]}",
|
||||
freq="1min",
|
||||
)
|
||||
)
|
||||
|
||||
return pd.Index(sorted(set(np.hstack(res))))
|
||||
return generate_minutes_calendar_from_daily(
|
||||
calendars, freq="1min", am_range=self.AM_RANGE, pm_range=self.PM_RANGE
|
||||
)
|
||||
|
||||
def adjusted_price(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
# TODO: using daily data factor
|
||||
|
||||
Reference in New Issue
Block a user