Fix backtest (#719)

* modify FileStorage to support multiple freqs * modify backtest's sample documentation * change the logging level of read data exception from error to debug * fix the backtest exception when volume is 0 or np.nan * fix test_storage.py * add backtest_daily * modify backtest_daily's docstring * add __repr__/__str__ to Position * fix the bug of nested_decision_execution example Co-authored-by: Young <afe.young@gmail.com> Co-authored-by: you-n-g <you-n-g@users.noreply.github.com>
2026-07-05 20:11:08 +08:00 · 2021-12-07 19:04:23 +08:00
parent 84103c7d43
commit c276de4040
19 changed files with 663 additions and 232 deletions
--- a/examples/nested_decision_execution/workflow.py
+++ b/examples/nested_decision_execution/workflow.py
@@ -1,9 +1,105 @@
 #  Copyright (c) Microsoft Corporation.
 #  Licensed under the MIT License.
+"""
+The expect result of `backtest` is following in current version
+
+'The following are analysis results of benchmark return(1day).'
+                       risk
+mean               0.000651
+std                0.012472
+annualized_return  0.154967
+information_ratio  0.805422
+max_drawdown      -0.160445
+'The following are analysis results of the excess return without cost(1day).'
+                       risk
+mean               0.001258
+std                0.007575
+annualized_return  0.299303
+information_ratio  2.561219
+max_drawdown      -0.068386
+'The following are analysis results of the excess return with cost(1day).'
+                       risk
+mean               0.001110
+std                0.007575
+annualized_return  0.264280
+information_ratio  2.261392
+max_drawdown      -0.071842
+[1706497:MainThread](2021-12-07 14:08:30,263) INFO - qlib.workflow - [record_temp.py:441] - Portfolio analysis record 'port_analysis_30minute.
+pkl' has been saved as the artifact of the Experiment 2
+'The following are analysis results of benchmark return(30minute).'
+                       risk
+mean               0.000078
+std                0.003646
+annualized_return  0.148787
+information_ratio  0.935252
+max_drawdown      -0.142830
+('The following are analysis results of the excess return without '
+ 'cost(30minute).')
+                       risk
+mean               0.000174
+std                0.003343
+annualized_return  0.331867
+information_ratio  2.275019
+max_drawdown      -0.074752
+'The following are analysis results of the excess return with cost(30minute).'
+                       risk
+mean               0.000155
+std                0.003343
+annualized_return  0.294536
+information_ratio  2.018860
+max_drawdown      -0.075579
+[1706497:MainThread](2021-12-07 14:08:30,277) INFO - qlib.workflow - [record_temp.py:441] - Portfolio analysis record 'port_analysis_5minute.p
+kl' has been saved as the artifact of the Experiment 2
+'The following are analysis results of benchmark return(5minute).'
+                       risk
+mean               0.000015
+std                0.001460
+annualized_return  0.172170
+information_ratio  1.103439
+max_drawdown      -0.144807
+'The following are analysis results of the excess return without cost(5minute).'
+                       risk
+mean               0.000028
+std                0.001412
+annualized_return  0.319771
+information_ratio  2.119563
+max_drawdown      -0.077426
+'The following are analysis results of the excess return with cost(5minute).'
+                       risk
+mean               0.000025
+std                0.001412
+annualized_return  0.281536
+information_ratio  1.866091
+max_drawdown      -0.078194
+[1706497:MainThread](2021-12-07 14:08:30,287) INFO - qlib.workflow - [record_temp.py:466] - Indicator analysis record 'indicator_analysis_1day
+.pkl' has been saved as the artifact of the Experiment 2
+'The following are analysis results of indicators(1day).'
+        value
+ffr  0.945821
+pa   0.000324
+pos  0.542882
+[1706497:MainThread](2021-12-07 14:08:30,293) INFO - qlib.workflow - [record_temp.py:466] - Indicator analysis record 'indicator_analysis_30mi
+nute.pkl' has been saved as the artifact of the Experiment 2
+'The following are analysis results of indicators(30minute).'
+        value
+ffr  0.982910
+pa   0.000037
+pos  0.500806
+[1706497:MainThread](2021-12-07 14:08:30,302) INFO - qlib.workflow - [record_temp.py:466] - Indicator analysis record 'indicator_analysis_5min
+ute.pkl' has been saved as the artifact of the Experiment 2
+'The following are analysis results of indicators(5minute).'
+        value
+ffr  0.991017
+pa   0.000000
+pos  0.000000
+[1706497:MainThread](2021-12-07 14:08:30,627) INFO - qlib.timer - [log.py:113] - Time cost: 0.014s | waiting `async_log` Done
+"""


+from copy import deepcopy
 import qlib
 import fire
+import pandas as pd
 from qlib.config import REG_CN, HIGH_FREQ_CONFIG
 from qlib.data import D
 from qlib.utils import exists_qlib_data, init_instance_by_config, flatten_dict
@@ -14,6 +110,13 @@ from qlib.backtest import collect_data


 class NestedDecisionExecutionWorkflow:
+    # TODO: add test for nested workflow.
+    # 1) comparing same backtest
+    # - Basic test idea: the shared accumulated value are equal in multiple levels
+    #   - Aligning the profit calculation between multiple levels and single levels.
+    # 2) comparing different backtest
+    # - Basic test idea:
+    #   - the daily backtest will be similar as multi-level(the data quality makes this gap samller)

    market = "csi300"
    benchmark = "SH000300"
@@ -167,8 +270,6 @@ class NestedDecisionExecutionWorkflow:
            par = PortAnaRecord(
                recorder,
                self.port_analysis_config,
-                risk_analysis_freq=["day", "30min", "5min"],
-                indicator_analysis_freq=["day", "30min", "5min"],
                indicator_analysis_method="value_weighted",
            )
            par.generate()
@@ -199,6 +300,93 @@ class NestedDecisionExecutionWorkflow:
        for trade_decision in data_generator:
            print(trade_decision)

+    # the code below are for checking, users don't have to care about it
+    def check_diff_freq(self):
+        self._init_qlib()
+        exp = R.get_exp(experiment_name="backtest")
+        rec = next(iter(exp.list_recorders().values()))  # assuming this will get the latest recorder
+        for check_key in "account", "total_turnover", "total_cost":
+            check_key = "total_cost"
+
+            acc_dict = {}
+            for freq in ["30minute", "5minute", "1day"]:
+                acc_dict[freq] = rec.load_object(f"portfolio_analysis/report_normal_{freq}.pkl")[check_key]
+            acc_df = pd.DataFrame(acc_dict)
+            acc_resam = acc_df.resample("1d").last().dropna()
+            assert (acc_resam["30minute"] == acc_resam["1day"]).all()
+
+    def backtest_only_daily(self):
+        """
+        This backtest is used for comparing the nested execution and single layer execution
+        Due to the low quality daily-level and miniute-level data, they are hardly comparable.
+        So it is used for detecting serious bugs which make the results different greatly.
+
+        .. code-block:: shell
+
+            [1724971:MainThread](2021-12-07 16:24:31,156) INFO - qlib.workflow - [record_temp.py:441] - Portfolio analysis record 'port_analysis_1day.pkl'
+            has been saved as the artifact of the Experiment 2
+            'The following are analysis results of benchmark return(1day).'
+                                   risk
+            mean               0.000651
+            std                0.012472
+            annualized_return  0.154967
+            information_ratio  0.805422
+            max_drawdown      -0.160445
+            'The following are analysis results of the excess return without cost(1day).'
+                                   risk
+            mean               0.001375
+            std                0.006103
+            annualized_return  0.327204
+            information_ratio  3.475016
+            max_drawdown      -0.024927
+            'The following are analysis results of the excess return with cost(1day).'
+                                   risk
+            mean               0.001184
+            std                0.006091
+            annualized_return  0.281801
+            information_ratio  2.998749
+            max_drawdown      -0.029568
+            [1724971:MainThread](2021-12-07 16:24:31,170) INFO - qlib.workflow - [record_temp.py:466] - Indicator analysis record 'indicator_analysis_1day.
+            pkl' has been saved as the artifact of the Experiment 2
+            'The following are analysis results of indicators(1day).'
+                 value
+            ffr    1.0
+            pa     0.0
+            pos    0.0
+            [1724971:MainThread](2021-12-07 16:24:31,188) INFO - qlib.timer - [log.py:113] - Time cost: 0.007s | waiting `async_log` Done
+
+        """
+        self._init_qlib()
+        model = init_instance_by_config(self.task["model"])
+        dataset = init_instance_by_config(self.task["dataset"])
+        self._train_model(model, dataset)
+        strategy_config = {
+            "class": "TopkDropoutStrategy",
+            "module_path": "qlib.contrib.strategy.signal_strategy",
+            "kwargs": {
+                "signal": (model, dataset),
+                "topk": 50,
+                "n_drop": 5,
+            },
+        }
+        pa_conf = deepcopy(self.port_analysis_config)
+        pa_conf["strategy"] = strategy_config
+        pa_conf["executor"] = {
+            "class": "SimulatorExecutor",
+            "module_path": "qlib.backtest.executor",
+            "kwargs": {
+                "time_per_step": "day",
+                "generate_portfolio_metrics": True,
+                "verbose": True,
+            },
+        }
+        pa_conf["backtest"]["benchmark"] = self.benchmark
+
+        with R.start(experiment_name="backtest"):
+            recorder = R.get_recorder()
+            par = PortAnaRecord(recorder, pa_conf)
+            par.generate()
+

 if __name__ == "__main__":
    fire.Fire(NestedDecisionExecutionWorkflow)