mirror of
https://github.com/microsoft/qlib.git
synced 2026-07-05 03:50:57 +08:00
add docs & fix reinit of datatset
This commit is contained in:
28
examples/highfreq/README.md
Normal file
28
examples/highfreq/README.md
Normal file
@@ -0,0 +1,28 @@
|
||||
# High-Frequency Dataset
|
||||
|
||||
This dataset is an example for RL high frequency trading.
|
||||
|
||||
## Get High-Frequency Data
|
||||
|
||||
Get high-frequency data by running the following command:
|
||||
```bash
|
||||
python workflow.py get_data
|
||||
```
|
||||
|
||||
## Dump & Reload & Reinitialize the Dataset
|
||||
|
||||
|
||||
The High-Frequency Dataset is implemented as `qlib.data.dataset.DatasetH` in the `workflow.py`. `DatatsetH` is the subclass of `qlib.utils.serial.Serializable`, which supports being dumped in or loaded from disk in `pickle` format.
|
||||
|
||||
### About Reinitialization
|
||||
|
||||
After reloading `Dataset` from disk, `Qlib` also support reinitialize the dataset. It means that users can reset some config of `Dataset` or `DataHandler` such as `instruments`, `start_time`, `end_time` and `segmens`, etc.
|
||||
|
||||
The example is given in `workflow.py`, users can run the code as follows.
|
||||
|
||||
### Run the Code
|
||||
|
||||
Run the example by running the following command:
|
||||
```bash
|
||||
python workflow.py dump_and_load_dataset
|
||||
```
|
||||
@@ -9,7 +9,7 @@ import qlib
|
||||
import pickle
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from qlib.config import HIGH_FREQ_CONFIG
|
||||
from qlib.config import REG_CN, HIGH_FREQ_CONFIG
|
||||
from qlib.contrib.model.gbdt import LGBModel
|
||||
from qlib.contrib.data.handler import Alpha158
|
||||
from qlib.contrib.strategy.strategy import TopkDropoutStrategy
|
||||
@@ -26,7 +26,6 @@ from qlib.tests.data import GetData
|
||||
|
||||
from highfreq_ops import get_calendar_day, DayLast, FFillNan, BFillNan, Date, Select, IsNull
|
||||
|
||||
|
||||
class HighfreqWorkflow(object):
|
||||
|
||||
SPEC_CONF = {"custom_ops": [DayLast, FFillNan, BFillNan, Date, Select, IsNull], "expression_cache": None}
|
||||
@@ -123,8 +122,7 @@ class HighfreqWorkflow(object):
|
||||
backtest_train, backtest_test = dataset_backtest.prepare(["train", "test"])
|
||||
print(backtest_train, backtest_test)
|
||||
|
||||
del xtrain, xtest
|
||||
del backtest_train, backtest_test
|
||||
return
|
||||
|
||||
def dump_and_load_dataset(self):
|
||||
"""dump and load dataset state on disk"""
|
||||
@@ -146,18 +144,39 @@ class HighfreqWorkflow(object):
|
||||
dataset_backtest = pickle.load(file_dataset_backtest)
|
||||
|
||||
self._prepare_calender_cache()
|
||||
##=============reload_dataset=============
|
||||
dataset.init(init_type=DataHandlerLP.IT_LS)
|
||||
dataset_backtest.init()
|
||||
##=============reinit dataset=============
|
||||
dataset.init(
|
||||
handler_kwargs = {
|
||||
"init_type" : DataHandlerLP.IT_LS,
|
||||
"start_time" : "2021-01-19 00:00:00",
|
||||
"end_time" : "2021-01-25 16:00:00",
|
||||
},
|
||||
segment_kwargs = {
|
||||
"test": (
|
||||
"2021-01-19 00:00:00",
|
||||
"2021-01-25 16:00:00",
|
||||
),
|
||||
}
|
||||
)
|
||||
dataset_backtest.init(
|
||||
handler_kwargs = {
|
||||
"start_time" : "2021-01-19 00:00:00",
|
||||
"end_time" : "2021-01-25 16:00:00",
|
||||
},
|
||||
segment_kwargs = {
|
||||
"test": (
|
||||
"2021-01-19 00:00:00",
|
||||
"2021-01-25 16:00:00",
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
##=============get data=============
|
||||
xtrain, xtest = dataset.prepare(["train", "test"])
|
||||
backtest_train, backtest_test = dataset_backtest.prepare(["train", "test"])
|
||||
xtest = dataset.prepare(["test"])
|
||||
backtest_test = dataset_backtest.prepare(["test"])
|
||||
|
||||
print(xtrain, xtest)
|
||||
print(backtest_train, backtest_test)
|
||||
del xtrain, xtest
|
||||
del backtest_train, backtest_test
|
||||
print(xtest, backtest_test)
|
||||
return
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -87,9 +87,42 @@ class DatasetH(Dataset):
|
||||
"""
|
||||
super().__init__(handler, segments)
|
||||
|
||||
def init(self, **kwargs):
|
||||
"""Initialize the DatasetH, Only parameters belonging to handler.init will be passed in"""
|
||||
self.handler.init(**kwargs)
|
||||
def init(self, handler_kwargs:dict = None, segment_kwargs:dict = None):
|
||||
"""
|
||||
Initialize the DatasetH
|
||||
|
||||
Parameters
|
||||
----------
|
||||
handler_kwargs : dict
|
||||
Config of DataHanlder, which could include the following arguments:
|
||||
|
||||
- arguments of DataHandler.conf_data, such as 'instruments', 'start_time' and 'end_time'.
|
||||
|
||||
- arguments of DataHandler.init, such as 'enable_cache', etc.
|
||||
|
||||
segment_kwargs : dict
|
||||
Config of segments which is same as 'segments' in DatasetH.setup_data
|
||||
|
||||
"""
|
||||
if handler_kwargs:
|
||||
if not isinstance(handler_kwargs, dict):
|
||||
raise TypeError(f"param handler_kwargs must be type dict, not {type(handler_kwargs)}")
|
||||
kwargs_init = {}
|
||||
kwargs_conf_data = {}
|
||||
conf_data_arg = {"instruments", "start_time", "end_time"}
|
||||
for k, v in handler_kwargs.items():
|
||||
if k in conf_data_arg:
|
||||
kwargs_conf_data.update({k:v})
|
||||
else:
|
||||
kwargs_init.update({k:v})
|
||||
|
||||
self.handler.conf_data(**kwargs_conf_data)
|
||||
self.handler.init(**kwargs_init)
|
||||
|
||||
if segment_kwargs:
|
||||
if not isinstance(segment_kwargs, dict):
|
||||
raise TypeError(f"param handler_kwargs must be type dict, not {type(segment_kwargs)}")
|
||||
self.segments = segment_kwargs.copy()
|
||||
|
||||
def setup_data(self, handler: Union[dict, DataHandler], segments: dict):
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user