add infra interface & fix no KeyboardInterpret bug

2026-07-22 11:47:35 +08:00 · 2021-05-31 20:40:11 +08:00
parent bf3b757294
commit 60e082e446
10 changed files with 120 additions and 72 deletions
--- a/examples/nested_decision_execution/README.md
+++ b/examples/nested_decision_execution/README.md
@@ -0,0 +1,30 @@
+# Nested Decision Execution
+
+This worflow is an example for nested decision execution in backtesting. Qlib supports nested decision execution in backtesting. It means that users can use different strategies to make trade decision in different frequencies.
+
+## Weekly Portfolio Generation and Daily Order Execution
+
+This workflow provides an example that uses a DropoutTopkStrategy (a strategy based on the daily frequency Lightgbm model) in weekly frequency for portfolio generation and uses SBBStrategyEMA (a rule-based strategy that uses EMA for decision-making) to execute orders in daily frequency. 
+
+### Usage
+
+Start backtesting by running the following command:
+```bash
+    python workflow.py backtest
+```
+
+Start collecting data by running the following command:
+```bash
+    python workflow.py collect_data
+```
+
+## Daily Portfolio Generation and Minutely Order Execution
+
+This workflow also provides a high-frequency example that uses a DropoutTopkStrategy for portfolio generation in daily frequency and uses SBBStrategyEMA to execute orders in minutely frequency. 
+
+### Usage
+
+Start backtesting by running the following command:
+```bash
+    python workflow.py backtest_highfreq
+```
--- a/examples/nested_decision_execution/workflow.py
+++ b/examples/nested_decision_execution/workflow.py
@@ -0,0 +1,262 @@
+#  Copyright (c) Microsoft Corporation.
+#  Licensed under the MIT License.
+
+
+import qlib
+import fire
+from qlib.config import REG_CN, HIGH_FREQ_CONFIG
+from qlib.data import D
+from qlib.utils import exists_qlib_data, init_instance_by_config, flatten_dict
+from qlib.workflow import R
+from qlib.workflow.record_temp import SignalRecord, PortAnaRecord
+from qlib.tests.data import GetData
+from qlib.backtest import collect_data
+
+
+class NestedDecisonExecutionWorkflow:
+
+    market = "csi300"
+    benchmark = "SH000300"
+
+    data_handler_config = {
+        "start_time": "2008-01-01",
+        "end_time": "2021-01-20",
+        "fit_start_time": "2008-01-01",
+        "fit_end_time": "2014-12-31",
+        "instruments": market,
+    }
+
+    task = {
+        "model": {
+            "class": "LGBModel",
+            "module_path": "qlib.contrib.model.gbdt",
+            "kwargs": {
+                "loss": "mse",
+                "colsample_bytree": 0.8879,
+                "learning_rate": 0.0421,
+                "subsample": 0.8789,
+                "lambda_l1": 205.6999,
+                "lambda_l2": 580.9768,
+                "max_depth": 8,
+                "num_leaves": 210,
+                "num_threads": 20,
+            },
+        },
+        "dataset": {
+            "class": "DatasetH",
+            "module_path": "qlib.data.dataset",
+            "kwargs": {
+                "handler": {
+                    "class": "Alpha158",
+                    "module_path": "qlib.contrib.data.handler",
+                    "kwargs": data_handler_config,
+                },
+                "segments": {
+                    "train": ("2008-01-01", "2014-12-31"),
+                    "valid": ("2015-01-01", "2016-12-31"),
+                    "test": ("2017-01-01", "2021-01-20"),
+                },
+            },
+        },
+    }
+
+    port_analysis_config = {
+        "executor": {
+            "class": "NestedExecutor",
+            "module_path": "qlib.backtest.executor",
+            "kwargs": {
+                "time_per_step": "week",
+                "inner_executor": {
+                    "class": "SimulatorExecutor",
+                    "module_path": "qlib.backtest.executor",
+                    "kwargs": {
+                        "time_per_step": "day",
+                        "verbose": True,
+                        "generate_report": True,
+                    },
+                },
+                "inner_strategy": {
+                    "class": "SBBStrategyEMA",
+                    "module_path": "qlib.contrib.strategy.rule_strategy",
+                    "kwargs": {
+                        "freq": "day",
+                        "instruments": market,
+                    },
+                },
+                "generate_report": True,
+                "track_data": True,
+            },
+        },
+        "backtest": {
+            "start_time": "2017-01-01",
+            "end_time": "2020-08-01",
+            "account": 100000000,
+            "benchmark": benchmark,
+            "exchange_kwargs": {
+                "freq": "day",
+                "limit_threshold": 0.095,
+                "deal_price": "close",
+                "open_cost": 0.0005,
+                "close_cost": 0.0015,
+                "min_cost": 5,
+            },
+        },
+    }
+
+    def _init_qlib(self):
+        """initialize qlib"""
+        provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
+        if not exists_qlib_data(provider_uri):
+            print(f"Qlib data is not found in {provider_uri}")
+            GetData().qlib_data(target_dir=provider_uri, region=REG_CN)
+        qlib.init(provider_uri=provider_uri, region=REG_CN)
+
+    def _train_model(self, model, dataset):
+        with R.start(experiment_name="train"):
+            R.log_params(**flatten_dict(self.task))
+            model.fit(dataset)
+            R.save_objects(**{"params.pkl": model})
+
+            # prediction
+            recorder = R.get_recorder()
+            sr = SignalRecord(model, dataset, recorder)
+            sr.generate()
+
+    def backtest(self):
+        self._init_qlib()
+        model = init_instance_by_config(self.task["model"])
+        dataset = init_instance_by_config(self.task["dataset"])
+        self._train_model(model, dataset)
+        strategy_config = {
+            "class": "TopkDropoutStrategy",
+            "module_path": "qlib.contrib.strategy.model_strategy",
+            "kwargs": {
+                "model": model,
+                "dataset": dataset,
+                "topk": 50,
+                "n_drop": 5,
+            },
+        }
+        self.port_analysis_config["strategy"] = strategy_config
+        with R.start(experiment_name="backtest"):
+
+            recorder = R.get_recorder()
+            par = PortAnaRecord(recorder, self.port_analysis_config, "day")
+            par.generate()
+
+    def collect_data(self):
+        self._init_qlib()
+        model = init_instance_by_config(self.task["model"])
+        dataset = init_instance_by_config(self.task["dataset"])
+        self._train_model(model, dataset)
+        executor_config = self.port_analysis_config["executor"]
+        backtest_config = self.port_analysis_config["backtest"]
+        strategy_config = {
+            "class": "TopkDropoutStrategy",
+            "module_path": "qlib.contrib.strategy.model_strategy",
+            "kwargs": {
+                "model": model,
+                "dataset": dataset,
+                "topk": 50,
+                "n_drop": 5,
+            },
+        }
+        data_generator = collect_data(executor=executor_config, strategy=strategy_config, **backtest_config)
+        for trade_decision in data_generator:
+            print(trade_decision)
+
+    def _init_qlib_with_backend(self):
+        provider_uri_1min = HIGH_FREQ_CONFIG.get("provider_uri")
+        if not exists_qlib_data(provider_uri_1min):
+            print(f"Qlib data is not found in {provider_uri_1min}")
+            GetData().qlib_data(target_dir=provider_uri_1min, interval="1min", region=REG_CN)
+
+        # TODO: update latest data
+        provider_uri_day = "~/.qlib/qlib_data/cn_data"  # target_dir
+        if not exists_qlib_data(provider_uri_day):
+            print(f"Qlib data is not found in {provider_uri_day}")
+            GetData().qlib_data(target_dir=provider_uri_day, region=REG_CN)
+
+        provider_uri_map = {"1min": provider_uri_1min, "day": provider_uri_day}
+        client_config = {
+            "calendar_provider": {
+                "class": "LocalCalendarProvider",
+                "module_path": "qlib.data.data",
+                "kwargs": {
+                    "backend": {
+                        "class": "FileCalendarStorage",
+                        "module_path": "qlib.data.storage.file_storage",
+                        "kwargs": {"provider_uri_map": provider_uri_map},
+                    }
+                },
+            },
+            "feature_provider": {
+                "class": "LocalFeatureProvider",
+                "module_path": "qlib.data.data",
+                "kwargs": {
+                    "backend": {
+                        "class": "FileFeatureStorage",
+                        "module_path": "qlib.data.storage.file_storage",
+                        "kwargs": {"provider_uri_map": provider_uri_map},
+                    }
+                },
+            },
+        }
+        qlib.init(provider_uri=provider_uri_day, **client_config)
+
+    def _get_highfreq_config(self, model, dataset):
+
+        executor_config = self.port_analysis_config["executor"]
+        # update executor with hierarchical decison freq ["day", "1min"]
+        executor_config["kwargs"]["time_per_step"] = "day"
+        executor_config["kwargs"]["inner_executor"]["kwargs"]["time_per_step"] = "15min"
+        backtest_config = self.port_analysis_config["backtest"]
+
+        # yahoo highfreq data time
+        backtest_config["start_time"] = "2020-09-20"
+        backtest_config["end_time"] = "2021-01-20"
+
+        # update benchmark, yahoo data don't have SH000300
+        instruments = D.instruments(market="csi300")
+        instrument_list = D.list_instruments(instruments=instruments, as_list=True)
+        backtest_config["benchmark"] = instrument_list
+
+        # update exchange config
+        backtest_config["exchange_kwargs"]["freq"] = "1min"
+
+        # set strategy
+        strategy_config = {
+            "class": "TopkDropoutStrategy",
+            "module_path": "qlib.contrib.strategy.model_strategy",
+            "kwargs": {
+                "model": model,
+                "dataset": dataset,
+                "topk": 50,
+                "n_drop": 5,
+            },
+        }
+
+        return executor_config, strategy_config, backtest_config
+
+    def backtest_highfreq(self):
+        self._init_qlib_with_backend()
+        model = init_instance_by_config(self.task["model"])
+        dataset = init_instance_by_config(self.task["dataset"])
+        self._train_model(model, dataset)
+        executor_config, strategy_config, backtest_config = self._get_highfreq_config(model, dataset)
+
+        highfreq_port_analysis_config = {
+            "executor": executor_config,
+            "strategy": strategy_config,
+            "backtest": backtest_config,
+        }
+
+        with R.start(experiment_name="backtest_highfreq"):
+
+            recorder = R.get_recorder()
+            par = PortAnaRecord(recorder, highfreq_port_analysis_config, "day")
+            par.generate()
+
+
+if __name__ == "__main__":
+    fire.Fire(NestedDecisonExecutionWorkflow)