1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-07-05 20:11:08 +08:00

Fix backtest (#719)

* modify FileStorage to support multiple freqs

* modify backtest's sample documentation

* change the logging level of read data exception from error to debug

* fix the backtest exception when volume is 0 or np.nan

* fix test_storage.py

* add backtest_daily

* modify backtest_daily's docstring

* add __repr__/__str__ to Position

* fix the bug of nested_decision_execution example

Co-authored-by: Young <afe.young@gmail.com>
Co-authored-by: you-n-g <you-n-g@users.noreply.github.com>
This commit is contained in:
Pengrong Zhu
2021-12-07 19:04:23 +08:00
committed by GitHub
parent 84103c7d43
commit c276de4040
19 changed files with 663 additions and 232 deletions

View File

@@ -1,9 +1,105 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""
The expect result of `backtest` is following in current version
'The following are analysis results of benchmark return(1day).'
risk
mean 0.000651
std 0.012472
annualized_return 0.154967
information_ratio 0.805422
max_drawdown -0.160445
'The following are analysis results of the excess return without cost(1day).'
risk
mean 0.001258
std 0.007575
annualized_return 0.299303
information_ratio 2.561219
max_drawdown -0.068386
'The following are analysis results of the excess return with cost(1day).'
risk
mean 0.001110
std 0.007575
annualized_return 0.264280
information_ratio 2.261392
max_drawdown -0.071842
[1706497:MainThread](2021-12-07 14:08:30,263) INFO - qlib.workflow - [record_temp.py:441] - Portfolio analysis record 'port_analysis_30minute.
pkl' has been saved as the artifact of the Experiment 2
'The following are analysis results of benchmark return(30minute).'
risk
mean 0.000078
std 0.003646
annualized_return 0.148787
information_ratio 0.935252
max_drawdown -0.142830
('The following are analysis results of the excess return without '
'cost(30minute).')
risk
mean 0.000174
std 0.003343
annualized_return 0.331867
information_ratio 2.275019
max_drawdown -0.074752
'The following are analysis results of the excess return with cost(30minute).'
risk
mean 0.000155
std 0.003343
annualized_return 0.294536
information_ratio 2.018860
max_drawdown -0.075579
[1706497:MainThread](2021-12-07 14:08:30,277) INFO - qlib.workflow - [record_temp.py:441] - Portfolio analysis record 'port_analysis_5minute.p
kl' has been saved as the artifact of the Experiment 2
'The following are analysis results of benchmark return(5minute).'
risk
mean 0.000015
std 0.001460
annualized_return 0.172170
information_ratio 1.103439
max_drawdown -0.144807
'The following are analysis results of the excess return without cost(5minute).'
risk
mean 0.000028
std 0.001412
annualized_return 0.319771
information_ratio 2.119563
max_drawdown -0.077426
'The following are analysis results of the excess return with cost(5minute).'
risk
mean 0.000025
std 0.001412
annualized_return 0.281536
information_ratio 1.866091
max_drawdown -0.078194
[1706497:MainThread](2021-12-07 14:08:30,287) INFO - qlib.workflow - [record_temp.py:466] - Indicator analysis record 'indicator_analysis_1day
.pkl' has been saved as the artifact of the Experiment 2
'The following are analysis results of indicators(1day).'
value
ffr 0.945821
pa 0.000324
pos 0.542882
[1706497:MainThread](2021-12-07 14:08:30,293) INFO - qlib.workflow - [record_temp.py:466] - Indicator analysis record 'indicator_analysis_30mi
nute.pkl' has been saved as the artifact of the Experiment 2
'The following are analysis results of indicators(30minute).'
value
ffr 0.982910
pa 0.000037
pos 0.500806
[1706497:MainThread](2021-12-07 14:08:30,302) INFO - qlib.workflow - [record_temp.py:466] - Indicator analysis record 'indicator_analysis_5min
ute.pkl' has been saved as the artifact of the Experiment 2
'The following are analysis results of indicators(5minute).'
value
ffr 0.991017
pa 0.000000
pos 0.000000
[1706497:MainThread](2021-12-07 14:08:30,627) INFO - qlib.timer - [log.py:113] - Time cost: 0.014s | waiting `async_log` Done
"""
from copy import deepcopy
import qlib
import fire
import pandas as pd
from qlib.config import REG_CN, HIGH_FREQ_CONFIG
from qlib.data import D
from qlib.utils import exists_qlib_data, init_instance_by_config, flatten_dict
@@ -14,6 +110,13 @@ from qlib.backtest import collect_data
class NestedDecisionExecutionWorkflow:
# TODO: add test for nested workflow.
# 1) comparing same backtest
# - Basic test idea: the shared accumulated value are equal in multiple levels
# - Aligning the profit calculation between multiple levels and single levels.
# 2) comparing different backtest
# - Basic test idea:
# - the daily backtest will be similar as multi-level(the data quality makes this gap samller)
market = "csi300"
benchmark = "SH000300"
@@ -167,8 +270,6 @@ class NestedDecisionExecutionWorkflow:
par = PortAnaRecord(
recorder,
self.port_analysis_config,
risk_analysis_freq=["day", "30min", "5min"],
indicator_analysis_freq=["day", "30min", "5min"],
indicator_analysis_method="value_weighted",
)
par.generate()
@@ -199,6 +300,93 @@ class NestedDecisionExecutionWorkflow:
for trade_decision in data_generator:
print(trade_decision)
# the code below are for checking, users don't have to care about it
def check_diff_freq(self):
self._init_qlib()
exp = R.get_exp(experiment_name="backtest")
rec = next(iter(exp.list_recorders().values())) # assuming this will get the latest recorder
for check_key in "account", "total_turnover", "total_cost":
check_key = "total_cost"
acc_dict = {}
for freq in ["30minute", "5minute", "1day"]:
acc_dict[freq] = rec.load_object(f"portfolio_analysis/report_normal_{freq}.pkl")[check_key]
acc_df = pd.DataFrame(acc_dict)
acc_resam = acc_df.resample("1d").last().dropna()
assert (acc_resam["30minute"] == acc_resam["1day"]).all()
def backtest_only_daily(self):
"""
This backtest is used for comparing the nested execution and single layer execution
Due to the low quality daily-level and miniute-level data, they are hardly comparable.
So it is used for detecting serious bugs which make the results different greatly.
.. code-block:: shell
[1724971:MainThread](2021-12-07 16:24:31,156) INFO - qlib.workflow - [record_temp.py:441] - Portfolio analysis record 'port_analysis_1day.pkl'
has been saved as the artifact of the Experiment 2
'The following are analysis results of benchmark return(1day).'
risk
mean 0.000651
std 0.012472
annualized_return 0.154967
information_ratio 0.805422
max_drawdown -0.160445
'The following are analysis results of the excess return without cost(1day).'
risk
mean 0.001375
std 0.006103
annualized_return 0.327204
information_ratio 3.475016
max_drawdown -0.024927
'The following are analysis results of the excess return with cost(1day).'
risk
mean 0.001184
std 0.006091
annualized_return 0.281801
information_ratio 2.998749
max_drawdown -0.029568
[1724971:MainThread](2021-12-07 16:24:31,170) INFO - qlib.workflow - [record_temp.py:466] - Indicator analysis record 'indicator_analysis_1day.
pkl' has been saved as the artifact of the Experiment 2
'The following are analysis results of indicators(1day).'
value
ffr 1.0
pa 0.0
pos 0.0
[1724971:MainThread](2021-12-07 16:24:31,188) INFO - qlib.timer - [log.py:113] - Time cost: 0.007s | waiting `async_log` Done
"""
self._init_qlib()
model = init_instance_by_config(self.task["model"])
dataset = init_instance_by_config(self.task["dataset"])
self._train_model(model, dataset)
strategy_config = {
"class": "TopkDropoutStrategy",
"module_path": "qlib.contrib.strategy.signal_strategy",
"kwargs": {
"signal": (model, dataset),
"topk": 50,
"n_drop": 5,
},
}
pa_conf = deepcopy(self.port_analysis_config)
pa_conf["strategy"] = strategy_config
pa_conf["executor"] = {
"class": "SimulatorExecutor",
"module_path": "qlib.backtest.executor",
"kwargs": {
"time_per_step": "day",
"generate_portfolio_metrics": True,
"verbose": True,
},
}
pa_conf["backtest"]["benchmark"] = self.benchmark
with R.start(experiment_name="backtest"):
recorder = R.get_recorder()
par = PortAnaRecord(recorder, pa_conf)
par.generate()
if __name__ == "__main__":
fire.Fire(NestedDecisionExecutionWorkflow)