diff --git a/qlib/contrib/data/handler.py b/qlib/contrib/data/handler.py index ce052f550..e69dad53a 100644 --- a/qlib/contrib/data/handler.py +++ b/qlib/contrib/data/handler.py @@ -1,6 +1,8 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. +from typing import Optional +from qlib.utils.data import update_config from ...data.dataset.handler import DataHandlerLP from ...data.dataset.processor import Processor from ...utils import get_callable_kwargs @@ -57,12 +59,13 @@ class Alpha360(DataHandlerLP): fit_end_time=None, filter_pipe=None, inst_processors=None, + data_loader: Optional[dict]=None, **kwargs ): infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time) learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time) - data_loader = { + _data_loader = { "class": "QlibDataLoader", "kwargs": { "config": { @@ -74,12 +77,14 @@ class Alpha360(DataHandlerLP): "inst_processors": inst_processors, }, } + if data_loader is not None: + update_config(_data_loader, data_loader) super().__init__( instruments=instruments, start_time=start_time, end_time=end_time, - data_loader=data_loader, + data_loader=_data_loader, learn_processors=learn_processors, infer_processors=infer_processors, **kwargs @@ -153,12 +158,13 @@ class Alpha158(DataHandlerLP): process_type=DataHandlerLP.PTYPE_A, filter_pipe=None, inst_processors=None, + data_loader: Optional[dict]=None, **kwargs ): infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time) learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time) - data_loader = { + _data_loader = { "class": "QlibDataLoader", "kwargs": { "config": { @@ -170,11 +176,13 @@ class Alpha158(DataHandlerLP): "inst_processors": inst_processors, }, } + if data_loader is not None: + update_config(_data_loader, data_loader) super().__init__( instruments=instruments, start_time=start_time, end_time=end_time, - data_loader=data_loader, + data_loader=_data_loader, infer_processors=infer_processors, learn_processors=learn_processors, process_type=process_type, diff --git a/qlib/finco/tpl/README.md b/qlib/finco/tpl/README.md new file mode 100644 index 000000000..27e4aba71 --- /dev/null +++ b/qlib/finco/tpl/README.md @@ -0,0 +1,12 @@ +This is a set of templates that should be copied for a new project. + +Here are the explanations for the templates folder. + +| folder | explanations | +|--------|------------------------------------------------------------------| +| sl | Default configuration for supervised learning | +| sl-cfg | Like configuration in sl. But the dataset is highly configurable | + + +# TODO +- [ ] [Copier](https://copier.readthedocs.io/en/stable/#quick-start) may be useful if the generation process becomes complicated diff --git a/qlib/finco/tpl/__init__.py b/qlib/finco/tpl/__init__.py new file mode 100644 index 000000000..2b8a5f12f --- /dev/null +++ b/qlib/finco/tpl/__init__.py @@ -0,0 +1,12 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +from pathlib import Path +DIRNAME = Path(__file__).absolute().resolve().parent + + +def get_tpl_path() -> Path: + """ + return the template path + Because the template path is located in the folder. We don't know where it is located. So __file__ for this module will be used. + """ + return DIRNAME diff --git a/qlib/finco/tpl/sl-cfg/workflow_config_ds.yaml b/qlib/finco/tpl/sl-cfg/workflow_config_ds.yaml new file mode 100644 index 000000000..9f86fee53 --- /dev/null +++ b/qlib/finco/tpl/sl-cfg/workflow_config_ds.yaml @@ -0,0 +1,83 @@ +qlib_init: + provider_uri: "~/.qlib/qlib_data/cn_data" + region: cn +market: &market csi300 +benchmark: &benchmark SH000300 +data_handler_config: &data_handler_config + start_time: 2008-01-01 + end_time: 2020-08-01 + fit_start_time: 2008-01-01 + fit_end_time: 2014-12-31 + instruments: *market + data_loader: + class: QlibDataLoader + kwargs: + config: + feature: + - [($close-$open)/$open, ($high-$low)/$open, ($close-$open)/($high-$low+1e-12), '($high-Greater($open, $close))/$open', '($high-Greater($open, $close))/($high-$low+1e-12)', '(Less($open, $close)-$low)/$open', '(Less($open, $close)-$low)/($high-$low+1e-12)', (2*$close-$high-$low)/$open, (2*$close-$high-$low)/($high-$low+1e-12), $open/$close, $high/$close, $low/$close, $vwap/$close, 'Ref($close, 5)/$close', 'Ref($close, 10)/$close', 'Ref($close, 20)/$close', 'Ref($close, 30)/$close', 'Ref($close, 60)/$close', 'Mean($close, 5)/$close', 'Mean($close, 10)/$close', 'Mean($close, 20)/$close', 'Mean($close, 30)/$close', 'Mean($close, 60)/$close', 'Std($close, 5)/$close', 'Std($close, 10)/$close', 'Std($close, 20)/$close', 'Std($close, 30)/$close', 'Std($close, 60)/$close', 'Slope($close, 5)/$close', 'Slope($close, 10)/$close', 'Slope($close, 20)/$close', 'Slope($close, 30)/$close', 'Slope($close, 60)/$close', 'Rsquare($close, 5)', 'Rsquare($close, 10)', 'Rsquare($close, 20)', 'Rsquare($close, 30)', 'Rsquare($close, 60)', 'Resi($close, 5)/$close', 'Resi($close, 10)/$close', 'Resi($close, 20)/$close', 'Resi($close, 30)/$close', 'Resi($close, 60)/$close', 'Max($high, 5)/$close', 'Max($high, 10)/$close', 'Max($high, 20)/$close', 'Max($high, 30)/$close', 'Max($high, 60)/$close', 'Min($low, 5)/$close', 'Min($low, 10)/$close', 'Min($low, 20)/$close', 'Min($low, 30)/$close', 'Min($low, 60)/$close', 'Quantile($close, 5, 0.8)/$close', 'Quantile($close, 10, 0.8)/$close', 'Quantile($close, 20, 0.8)/$close', 'Quantile($close, 30, 0.8)/$close', 'Quantile($close, 60, 0.8)/$close', 'Quantile($close, 5, 0.2)/$close', 'Quantile($close, 10, 0.2)/$close', 'Quantile($close, 20, 0.2)/$close', 'Quantile($close, 30, 0.2)/$close', 'Quantile($close, 60, 0.2)/$close', 'Rank($close, 5)', 'Rank($close, 10)', 'Rank($close, 20)', 'Rank($close, 30)', 'Rank($close, 60)', '($close-Min($low, 5))/(Max($high, 5)-Min($low, 5)+1e-12)', '($close-Min($low, 10))/(Max($high, 10)-Min($low, 10)+1e-12)', '($close-Min($low, 20))/(Max($high, 20)-Min($low, 20)+1e-12)', '($close-Min($low, 30))/(Max($high, 30)-Min($low, 30)+1e-12)', '($close-Min($low, 60))/(Max($high, 60)-Min($low, 60)+1e-12)', 'IdxMax($high, 5)/5', 'IdxMax($high, 10)/10', 'IdxMax($high, 20)/20', 'IdxMax($high, 30)/30', 'IdxMax($high, 60)/60', 'IdxMin($low, 5)/5', 'IdxMin($low, 10)/10', 'IdxMin($low, 20)/20', 'IdxMin($low, 30)/30', 'IdxMin($low, 60)/60', '(IdxMax($high, 5)-IdxMin($low, 5))/5', '(IdxMax($high, 10)-IdxMin($low, 10))/10', '(IdxMax($high, 20)-IdxMin($low, 20))/20', '(IdxMax($high, 30)-IdxMin($low, 30))/30', '(IdxMax($high, 60)-IdxMin($low, 60))/60', 'Corr($close, Log($volume+1), 5)', 'Corr($close, Log($volume+1), 10)', 'Corr($close, Log($volume+1), 20)', 'Corr($close, Log($volume+1), 30)', 'Corr($close, Log($volume+1), 60)', 'Corr($close/Ref($close,1), Log($volume/Ref($volume, 1)+1), 5)', 'Corr($close/Ref($close,1), Log($volume/Ref($volume, 1)+1), 10)', 'Corr($close/Ref($close,1), Log($volume/Ref($volume, 1)+1), 20)', 'Corr($close/Ref($close,1), Log($volume/Ref($volume, 1)+1), 30)', 'Corr($close/Ref($close,1), Log($volume/Ref($volume, 1)+1), 60)', 'Mean($close>Ref($close, 1), 5)', 'Mean($close>Ref($close, 1), 10)', 'Mean($close>Ref($close, 1), 20)', 'Mean($close>Ref($close, 1), 30)', 'Mean($close>Ref($close, 1), 60)', 'Mean($closeRef($close, 1), 5)-Mean($closeRef($close, 1), 10)-Mean($closeRef($close, 1), 20)-Mean($closeRef($close, 1), 30)-Mean($closeRef($close, 1), 60)-Mean($close + dataset: + topk: 50 + n_drop: 5 + backtest: + start_time: 2017-01-01 + end_time: 2020-08-01 + account: 100000000 + benchmark: *benchmark + exchange_kwargs: + limit_threshold: 0.095 + deal_price: close + open_cost: 0.0005 + close_cost: 0.0015 + min_cost: 5 +task: + model: + class: LGBModel + module_path: qlib.contrib.model.gbdt + kwargs: + loss: mse + colsample_bytree: 0.8879 + learning_rate: 0.2 + subsample: 0.8789 + lambda_l1: 205.6999 + lambda_l2: 580.9768 + max_depth: 8 + num_leaves: 210 + num_threads: 20 + dataset: + class: DatasetH + module_path: qlib.data.dataset + kwargs: + handler: + class: Alpha158 + module_path: qlib.contrib.data.handler + kwargs: *data_handler_config + segments: + train: [2008-01-01, 2014-12-31] + valid: [2015-01-01, 2016-12-31] + test: [2017-01-01, 2020-08-01] + record: + - class: SignalRecord + module_path: qlib.workflow.record_temp + kwargs: + model: + dataset: + - class: SigAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + ana_long_short: False + ann_scaler: 252 + - class: PortAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + config: *port_analysis_config diff --git a/qlib/finco/tpls/sl/workflow_config.yaml b/qlib/finco/tpl/sl/workflow_config.yaml similarity index 100% rename from qlib/finco/tpls/sl/workflow_config.yaml rename to qlib/finco/tpl/sl/workflow_config.yaml diff --git a/qlib/finco/tpls/README.md b/qlib/finco/tpls/README.md deleted file mode 100644 index b7b74547a..000000000 --- a/qlib/finco/tpls/README.md +++ /dev/null @@ -1,6 +0,0 @@ -This is a set of templates that should be copied for a new project. - - - -# TODO -- [ ] [Copier](https://copier.readthedocs.io/en/stable/#quick-start) may be useful if the generation process becomes complicated diff --git a/qlib/finco/utils.py b/qlib/finco/utils.py index 783d387c2..cafc79044 100644 --- a/qlib/finco/utils.py +++ b/qlib/finco/utils.py @@ -1,5 +1,6 @@ import json + class Singleton(): _instance = None def __new__(cls, *args, **kwargs): @@ -7,10 +8,11 @@ class Singleton(): cls._instance = super().__new__(cls, *args, **kwargs) return cls._instance + def parse_json(response): try: return json.loads(response) except json.decoder.JSONDecodeError: pass - raise Exception(f"Failed to parse response: {response}, please report it or help us to fix it.") + raise Exception(f"Failed to parse response: {response}, please report it or help us to fix it.") \ No newline at end of file diff --git a/tests/finco/test_cfg.py b/tests/finco/test_cfg.py new file mode 100644 index 000000000..29b5c40f1 --- /dev/null +++ b/tests/finco/test_cfg.py @@ -0,0 +1,35 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +import unittest +from qlib.finco.tpl import get_tpl_path +import ruamel.yaml as yaml + +from qlib.data.dataset.handler import DataHandlerLP +from qlib.utils import init_instance_by_config +from qlib.tests import TestAutoData + + +class FincoTpl(TestAutoData): + + def test_tpl_consistence(self): + """Motivation: make sure the configuable template is consistent with the default config""" + tpl_p = get_tpl_path() + with (tpl_p / "sl" / "workflow_config.yaml").open("rb") as fp: + config = yaml.safe_load(fp) + # init_data_handler + hd: DataHandlerLP = init_instance_by_config(config["task"]["dataset"]["kwargs"]["handler"]) + # NOTE: The config in workflow_config_ds.yaml is generated by the following code: + # dump in yaml format to file without auto linebreak + # print(yaml.dump(hd.data_loader.fields, width=10000, stream=open("_tmp", "w"))) + + with (tpl_p / "sl-cfg" / "workflow_config_ds.yaml").open("rb") as fp: + config = yaml.safe_load(fp) + hd_ds: DataHandlerLP = init_instance_by_config(config["task"]["dataset"]["kwargs"]["handler"]) + self.assertEqual(hd_ds.data_loader.fields, hd.data_loader.fields) + + check = hd_ds.fetch().fillna(0.) == hd.fetch().fillna(0.) + self.assertTrue(check.all().all()) + + +if __name__ == "__main__": + unittest.main()