From cd4ab998fbba39cff507694bc6159fb68f11b1d5 Mon Sep 17 00:00:00 2001 From: Wendi Li Date: Sat, 3 Jun 2023 08:42:24 +0800 Subject: [PATCH] Update on Dynamic Benchmark (#1539) * move config file to benchmark_dynamic & switch default sim task model to GBDT * Update benchmark_dynamic results * Change the default value of alpha of DDG-DA --- .../benchmarks_dynamic/DDG-DA/workflow.py | 6 +- examples/benchmarks_dynamic/README.md | 12 +-- .../baseline/rolling_benchmark.py | 5 +- .../workflow_config_lightgbm_Alpha158.yaml | 72 +++++++++++++++++ .../workflow_config_linear_Alpha158.yaml | 79 +++++++++++++++++++ 5 files changed, 164 insertions(+), 10 deletions(-) create mode 100644 examples/benchmarks_dynamic/baseline/workflow_config_lightgbm_Alpha158.yaml create mode 100644 examples/benchmarks_dynamic/baseline/workflow_config_linear_Alpha158.yaml diff --git a/examples/benchmarks_dynamic/DDG-DA/workflow.py b/examples/benchmarks_dynamic/DDG-DA/workflow.py index f57080055..fef86726d 100644 --- a/examples/benchmarks_dynamic/DDG-DA/workflow.py +++ b/examples/benchmarks_dynamic/DDG-DA/workflow.py @@ -34,14 +34,14 @@ class DDGDA: def __init__( self, - sim_task_model: Literal["linear", "gbdt"] = "linear", + sim_task_model: Literal["linear", "gbdt"] = "gbdt", forecast_model: Literal["linear", "gbdt"] = "linear", h_path: Optional[str] = None, test_end: Optional[str] = None, train_start: Optional[str] = None, meta_1st_train_end: Optional[str] = None, task_ext_conf: Optional[dict] = None, - alpha: float = 0.0, + alpha: float = 0.01, proxy_hd: str = "handler_proxy.pkl", ): """ @@ -215,7 +215,7 @@ class DDGDA: with R.start(experiment_name=self.meta_exp_name): R.log_params(**kwargs) mm = MetaModelDS( - step=self.step, hist_step_n=kwargs["hist_step_n"], lr=0.001, max_epoch=100, seed=43, alpha=self.alpha + step=self.step, hist_step_n=kwargs["hist_step_n"], lr=0.001, max_epoch=30, seed=43, alpha=self.alpha ) mm.fit(md) R.save_objects(model=mm) diff --git a/examples/benchmarks_dynamic/README.md b/examples/benchmarks_dynamic/README.md index 261fcc035..6f78fa71a 100644 --- a/examples/benchmarks_dynamic/README.md +++ b/examples/benchmarks_dynamic/README.md @@ -8,15 +8,17 @@ The table below shows the performances of different solutions on different forec Here is the [crowd sourced version of qlib data](data_collector/crowd_source/README.md): https://github.com/chenditc/investment_data/releases ```bash wget https://github.com/chenditc/investment_data/releases/download/20220720/qlib_bin.tar.gz +mkdir -p ~/.qlib/qlib_data/cn_data tar -zxvf qlib_bin.tar.gz -C ~/.qlib/qlib_data/cn_data --strip-components=2 +rm -f qlib_bin.tar.gz ``` | Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown | -|------------------|---------|----|------|---------|-----------|-------------------|-------------------|--------------| -| RR[Linear] |Alpha158 |0.089|0.577|0.102 |0.627 |0.093 |1.458 |-0.073 | -| DDG-DA[Linear] |Alpha158 |0.096|0.636|0.107 |0.677 |0.067 |0.996 |-0.091 | -| RR[LightGBM] |Alpha158 |0.082|0.589|0.091 |0.626 |0.077 |1.320 |-0.091 | -| DDG-DA[LightGBM] |Alpha158 |0.085|0.658|0.094 |0.686 |0.115 |1.792 |-0.068 | +|------------------|---------|------|------|---------|-----------|-------------------|-------------------|--------------| +| RR[Linear] |Alpha158 |0.0945|0.5989|0.1069 |0.6495 |0.0857 |1.3682 |-0.0986 | +| DDG-DA[Linear] |Alpha158 |0.0983|0.6157|0.1108 |0.6646 |0.0764 |1.1904 |-0.0769 | +| RR[LightGBM] |Alpha158 |0.0816|0.5887|0.0912 |0.6263 |0.0771 |1.3196 |-0.0909 | +| DDG-DA[LightGBM] |Alpha158 |0.0878|0.6185|0.0975 |0.6524 |0.1261 |2.0096 |-0.0744 | - The label horizon of the `Alpha158` dataset is set to 20. - The rolling time intervals are set to 20 trading days. diff --git a/examples/benchmarks_dynamic/baseline/rolling_benchmark.py b/examples/benchmarks_dynamic/baseline/rolling_benchmark.py index d452957d4..b0c7aea4f 100644 --- a/examples/benchmarks_dynamic/baseline/rolling_benchmark.py +++ b/examples/benchmarks_dynamic/baseline/rolling_benchmark.py @@ -67,11 +67,12 @@ class RollingBenchmark: def basic_task(self): """For fast training rolling""" if self.model_type == "gbdt": - conf_path = DIRNAME.parent.parent / "benchmarks" / "LightGBM" / "workflow_config_lightgbm_Alpha158.yaml" + conf_path = DIRNAME / "workflow_config_lightgbm_Alpha158.yaml" # dump the processed data on to disk for later loading to speed up the processing h_path = DIRNAME / "lightgbm_alpha158_handler_horizon{}.pkl".format(self.horizon) elif self.model_type == "linear": - conf_path = DIRNAME.parent.parent / "benchmarks" / "Linear" / "workflow_config_linear_Alpha158.yaml" + # We use ridge regression to stabilize the performance + conf_path = DIRNAME / "workflow_config_linear_Alpha158.yaml" h_path = DIRNAME / "linear_alpha158_handler_horizon{}.pkl".format(self.horizon) else: raise AssertionError("Model type is not supported!") diff --git a/examples/benchmarks_dynamic/baseline/workflow_config_lightgbm_Alpha158.yaml b/examples/benchmarks_dynamic/baseline/workflow_config_lightgbm_Alpha158.yaml new file mode 100644 index 000000000..2d441dea9 --- /dev/null +++ b/examples/benchmarks_dynamic/baseline/workflow_config_lightgbm_Alpha158.yaml @@ -0,0 +1,72 @@ +qlib_init: + provider_uri: "~/.qlib/qlib_data/cn_data" + region: cn +market: &market csi300 +benchmark: &benchmark SH000300 +data_handler_config: &data_handler_config + start_time: 2008-01-01 + end_time: 2020-08-01 + fit_start_time: 2008-01-01 + fit_end_time: 2014-12-31 + instruments: *market +port_analysis_config: &port_analysis_config + strategy: + class: TopkDropoutStrategy + module_path: qlib.contrib.strategy + kwargs: + model: + dataset: + topk: 50 + n_drop: 5 + backtest: + start_time: 2017-01-01 + end_time: 2020-08-01 + account: 100000000 + benchmark: *benchmark + exchange_kwargs: + limit_threshold: 0.095 + deal_price: close + open_cost: 0.0005 + close_cost: 0.0015 + min_cost: 5 +task: + model: + class: LGBModel + module_path: qlib.contrib.model.gbdt + kwargs: + loss: mse + colsample_bytree: 0.8879 + learning_rate: 0.2 + subsample: 0.8789 + lambda_l1: 205.6999 + lambda_l2: 580.9768 + max_depth: 8 + num_leaves: 210 + num_threads: 20 + dataset: + class: DatasetH + module_path: qlib.data.dataset + kwargs: + handler: + class: Alpha158 + module_path: qlib.contrib.data.handler + kwargs: *data_handler_config + segments: + train: [2008-01-01, 2014-12-31] + valid: [2015-01-01, 2016-12-31] + test: [2017-01-01, 2020-08-01] + record: + - class: SignalRecord + module_path: qlib.workflow.record_temp + kwargs: + model: + dataset: + - class: SigAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + ana_long_short: False + ann_scaler: 252 + - class: PortAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + config: *port_analysis_config diff --git a/examples/benchmarks_dynamic/baseline/workflow_config_linear_Alpha158.yaml b/examples/benchmarks_dynamic/baseline/workflow_config_linear_Alpha158.yaml new file mode 100644 index 000000000..78ec4e612 --- /dev/null +++ b/examples/benchmarks_dynamic/baseline/workflow_config_linear_Alpha158.yaml @@ -0,0 +1,79 @@ +qlib_init: + provider_uri: "~/.qlib/qlib_data/cn_data" + region: cn +market: &market csi300 +benchmark: &benchmark SH000300 +data_handler_config: &data_handler_config + start_time: 2008-01-01 + end_time: 2020-08-01 + fit_start_time: 2008-01-01 + fit_end_time: 2014-12-31 + instruments: *market + infer_processors: + - class: RobustZScoreNorm + kwargs: + fields_group: feature + clip_outlier: true + - class: Fillna + kwargs: + fields_group: feature + learn_processors: + - class: DropnaLabel + - class: CSRankNorm + kwargs: + fields_group: label +port_analysis_config: &port_analysis_config + strategy: + class: TopkDropoutStrategy + module_path: qlib.contrib.strategy + kwargs: + signal: + - + - + topk: 50 + n_drop: 5 + backtest: + start_time: 2017-01-01 + end_time: 2020-08-01 + account: 100000000 + benchmark: *benchmark + exchange_kwargs: + limit_threshold: 0.095 + deal_price: close + open_cost: 0.0005 + close_cost: 0.0015 + min_cost: 5 +task: + model: + class: LinearModel + module_path: qlib.contrib.model.linear + kwargs: + estimator: ridge + alpha: 0.05 + dataset: + class: DatasetH + module_path: qlib.data.dataset + kwargs: + handler: + class: Alpha158 + module_path: qlib.contrib.data.handler + kwargs: *data_handler_config + segments: + train: [2008-01-01, 2014-12-31] + valid: [2015-01-01, 2016-12-31] + test: [2017-01-01, 2020-08-01] + record: + - class: SignalRecord + module_path: qlib.workflow.record_temp + kwargs: + model: + dataset: + - class: SigAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + ana_long_short: True + ann_scaler: 252 + - class: PortAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + config: *port_analysis_config