From 4d48c96d30c82fec6450b78df25f33e2e63cfa62 Mon Sep 17 00:00:00 2001 From: bxdd Date: Tue, 1 Jun 2021 18:50:50 +0800 Subject: [PATCH] fix CI --- examples/workflow_by_code.ipynb | 50 +++++++++++++------- examples/workflow_by_code.py | 59 +++-------------------- qlib/backtest/executor.py | 8 ++-- qlib/backtest/report.py | 3 +- qlib/contrib/evaluate.py | 2 +- tests/test_all_pipeline.py | 83 +++++++++++++++++++-------------- 6 files changed, 96 insertions(+), 109 deletions(-) diff --git a/examples/workflow_by_code.ipynb b/examples/workflow_by_code.ipynb index b4da1bfe4..3d99bf1e1 100644 --- a/examples/workflow_by_code.ipynb +++ b/examples/workflow_by_code.ipynb @@ -196,27 +196,40 @@ "# prediction, backtest & analysis\n", "###################################\n", "port_analysis_config = {\n", + " \"executor\": {\n", + " \"class\": \"SimulatorExecutor\",\n", + " \"module_path\": \"qlib.backtest.executor\",\n", + " \"kwargs\": {\n", + " \"time_per_step\": \"day\",\n", + " \"generate_report\": True,\n", + " },\n", + " },\n", " \"strategy\": {\n", " \"class\": \"TopkDropoutStrategy\",\n", - " \"module_path\": \"qlib.contrib.strategy.strategy\",\n", + " \"module_path\": \"qlib.contrib.strategy.model_strategy\",\n", " \"kwargs\": {\n", + " \"model\": model,\n", + " \"dataset\": dataset,\n", " \"topk\": 50,\n", " \"n_drop\": 5,\n", " },\n", " },\n", " \"backtest\": {\n", - " \"verbose\": False,\n", - " \"limit_threshold\": 0.095,\n", + " \"start_time\": \"2017-01-01\",\n", + " \"end_time\": \"2020-08-01\",\n", " \"account\": 100000000,\n", " \"benchmark\": benchmark,\n", - " \"deal_price\": \"close\",\n", - " \"open_cost\": 0.0005,\n", - " \"close_cost\": 0.0015,\n", - " \"min_cost\": 5,\n", + " \"exchange_kwargs\": {\n", + " \"freq\": \"day\",\n", + " \"limit_threshold\": 0.095,\n", + " \"deal_price\": \"close\",\n", + " \"open_cost\": 0.0005,\n", + " \"close_cost\": 0.0015,\n", + " \"min_cost\": 5,\n", + " },\n", " },\n", "}\n", "\n", - "\n", "# backtest and analysis\n", "with R.start(experiment_name=\"backtest_analysis\"):\n", " recorder = R.get_recorder(rid, experiment_name=\"train_model\")\n", @@ -229,7 +242,7 @@ " sr.generate()\n", "\n", " # backtest & analysis\n", - " par = PortAnaRecord(recorder, port_analysis_config)\n", + " par = PortAnaRecord(recorder, port_analysis_config, \"day\")\n", " par.generate()\n" ] }, @@ -249,11 +262,12 @@ "from qlib.contrib.report import analysis_model, analysis_position\n", "from qlib.data import D\n", "recorder = R.get_recorder(ba_rid, experiment_name=\"backtest_analysis\")\n", + "print(recorder)\n", "pred_df = recorder.load_object(\"pred.pkl\")\n", "pred_df_dates = pred_df.index.get_level_values(level='datetime')\n", - "report_normal_df = recorder.load_object(\"portfolio_analysis/report_normal.pkl\")\n", - "positions = recorder.load_object(\"portfolio_analysis/positions_normal.pkl\")\n", - "analysis_df = recorder.load_object(\"portfolio_analysis/port_analysis.pkl\")" + "report_normal_df = recorder.load_object(\"portfolio_analysis/report_normal_1day.pkl\")\n", + "positions = recorder.load_object(\"portfolio_analysis/positions_normal_1day.pkl\")\n", + "analysis_df = recorder.load_object(\"portfolio_analysis/port_analysis_1day.pkl\")" ] }, { @@ -348,9 +362,8 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" + "name": "pythonjvsc74a57bd0fcc004278713aaede7c629a6a43738a929cb09abb52817d4f72eb70db44cd87b", + "display_name": "Python 3.8 ('qlib_backtest': conda)" }, "language_info": { "codemirror_mode": { @@ -362,7 +375,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.3" + "version": "3.8" }, "toc": { "base_numbering": 1, @@ -376,6 +389,11 @@ "toc_position": {}, "toc_section_display": true, "toc_window_display": false + }, + "metadata": { + "interpreter": { + "hash": "fcc004278713aaede7c629a6a43738a929cb09abb52817d4f72eb70db44cd87b" + } } }, "nbformat": 4, diff --git a/examples/workflow_by_code.py b/examples/workflow_by_code.py index b02ea91b1..d7bb544f9 100644 --- a/examples/workflow_by_code.py +++ b/examples/workflow_by_code.py @@ -3,10 +3,12 @@ import qlib from qlib.config import REG_CN -from qlib.utils import exists_qlib_data, init_instance_by_config, flatten_dict +from qlib.utils import init_instance_by_config, flatten_dict from qlib.workflow import R from qlib.workflow.record_temp import SignalRecord, PortAnaRecord from qlib.tests.data import GetData +from qlib.tests.config import CSI300_BENCH, CSI300_GBDT_TASK + if __name__ == "__main__": @@ -15,57 +17,8 @@ if __name__ == "__main__": GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True) qlib.init(provider_uri=provider_uri, region=REG_CN) - market = "csi300" - benchmark = "SH000300" - - ################################### - # train model - ################################### - data_handler_config = { - "start_time": "2008-01-01", - "end_time": "2020-08-01", - "fit_start_time": "2008-01-01", - "fit_end_time": "2014-12-31", - "instruments": market, - } - - task = { - "model": { - "class": "LGBModel", - "module_path": "qlib.contrib.model.gbdt", - "kwargs": { - "loss": "mse", - "colsample_bytree": 0.8879, - "learning_rate": 0.0421, - "subsample": 0.8789, - "lambda_l1": 205.6999, - "lambda_l2": 580.9768, - "max_depth": 8, - "num_leaves": 210, - "num_threads": 20, - }, - }, - "dataset": { - "class": "DatasetH", - "module_path": "qlib.data.dataset", - "kwargs": { - "handler": { - "class": "Alpha158", - "module_path": "qlib.contrib.data.handler", - "kwargs": data_handler_config, - }, - "segments": { - "train": ("2008-01-01", "2014-12-31"), - "valid": ("2015-01-01", "2016-12-31"), - "test": ("2017-01-01", "2020-08-01"), - }, - }, - }, - } - - # model initialization - model = init_instance_by_config(task["model"]) - dataset = init_instance_by_config(task["dataset"]) + model = init_instance_by_config(CSI300_GBDT_TASK["model"]) + dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"]) port_analysis_config = { "executor": { @@ -90,7 +43,7 @@ if __name__ == "__main__": "start_time": "2017-01-01", "end_time": "2020-08-01", "account": 100000000, - "benchmark": benchmark, + "benchmark": CSI300_BENCH, "exchange_kwargs": { "freq": "day", "limit_threshold": 0.095, diff --git a/qlib/backtest/executor.py b/qlib/backtest/executor.py index e68047e38..656073759 100644 --- a/qlib/backtest/executor.py +++ b/qlib/backtest/executor.py @@ -118,7 +118,8 @@ class BaseExecutor: def get_report(self): raise NotImplementedError("get_report is not implemented!") - def get_all_executor(self): + def get_all_executors(self): + """Return all executors""" return [self] @@ -247,8 +248,9 @@ class NestedExecutor(BaseExecutor): sub_env_report_dict.update({f"{_count}{_freq}": (_report, _positions)}) return sub_env_report_dict - def get_all_executor(self): - return [self, *self.inner_executor.get_all_executor()] + def get_all_executors(self): + """Return all executors, including self and inner_executor.get_all_executors()""" + return [self, *self.inner_executor.get_all_executors()] class SimulatorExecutor(BaseExecutor): diff --git a/qlib/backtest/report.py b/qlib/backtest/report.py index 4b9b0ce26..0668f81cf 100644 --- a/qlib/backtest/report.py +++ b/qlib/backtest/report.py @@ -12,6 +12,7 @@ from pandas.core.frame import DataFrame from ..utils.resam import parse_freq, resam_ts_data from ..data import D +from ..tests.config import CSI300_BENCH class Report: @@ -67,7 +68,7 @@ class Report: self.bench = self._cal_benchmark(self.benchmark_config, self.freq) def _cal_benchmark(self, benchmark_config, freq): - benchmark = benchmark_config.get("benchmark", "SH000300") + benchmark = benchmark_config.get("benchmark", CSI300_BENCH) if isinstance(benchmark, pd.Series): return benchmark else: diff --git a/qlib/contrib/evaluate.py b/qlib/contrib/evaluate.py index 8d4052cdb..0ef8f95a5 100644 --- a/qlib/contrib/evaluate.py +++ b/qlib/contrib/evaluate.py @@ -29,7 +29,7 @@ def risk_analysis(r, N: int = None, freq: str = "day"): r : pandas.Series daily return series. N: int - scaler for annualizing information_ratio (day: 250, week: 50, month: 12), at least one of `N` and `freq` should exist + scaler for annualizing information_ratio (day: 252, week: 50, month: 12), at least one of `N` and `freq` should exist freq: str analysis frequency used for calculating the scaler, at least one of `N` and `freq` should exist """ diff --git a/tests/test_all_pipeline.py b/tests/test_all_pipeline.py index 4c20405fa..ea171f31e 100644 --- a/tests/test_all_pipeline.py +++ b/tests/test_all_pipeline.py @@ -14,27 +14,6 @@ from qlib.workflow.record_temp import SignalRecord, SigAnaRecord, PortAnaRecord from qlib.tests import TestAutoData from qlib.tests.config import CSI300_GBDT_TASK, CSI300_BENCH -port_analysis_config = { - "strategy": { - "class": "TopkDropoutStrategy", - "module_path": "qlib.contrib.strategy.strategy", - "kwargs": { - "topk": 50, - "n_drop": 5, - }, - }, - "backtest": { - "verbose": False, - "limit_threshold": 0.095, - "account": 100000000, - "benchmark": CSI300_BENCH, - "deal_price": "close", - "open_cost": 0.0005, - "close_cost": 0.0015, - "min_cost": 5, - }, -} - def train(): """train model @@ -58,7 +37,7 @@ def train(): with R.start(experiment_name="workflow"): R.log_params(**flatten_dict(CSI300_GBDT_TASK)) model.fit(dataset) - + R.save_objects(trained_model=model) # prediction recorder = R.get_recorder() # To test __repr__ @@ -68,7 +47,6 @@ def train(): rid = recorder.id sr = SignalRecord(model, dataset, recorder) sr.generate() - pred_score = sr.load() # calculate ic and ric sar = SigAnaRecord(recorder) @@ -76,7 +54,7 @@ def train(): ic = sar.load(sar.get_path("ic.pkl")) ric = sar.load(sar.get_path("ric.pkl")) - return pred_score, {"ic": ic, "ric": ric}, rid + return {"ic": ic, "ric": ric}, rid def train_with_sigana(): @@ -103,10 +81,9 @@ def train_with_sigana(): sar.generate() ic = sar.load(sar.get_path("ic.pkl")) ric = sar.load(sar.get_path("ric.pkl")) - pred_score = sar.load("pred.pkl") uri_path = R.get_uri() - return pred_score, {"ic": ic, "ric": ric}, uri_path + return {"ic": ic, "ric": ric}, uri_path def fake_experiment(): @@ -130,13 +107,11 @@ def fake_experiment(): return default_uri == default_uri_to_check, current_uri == current_uri_to_check, current_uri -def backtest_analysis(pred, rid): +def backtest_analysis(rid): """backtest and analysis Parameters ---------- - pred : pandas.DataFrame - predict scores rid : str the id of the recorder to be used in this function @@ -147,16 +122,54 @@ def backtest_analysis(pred, rid): """ recorder = R.get_recorder(experiment_name="workflow", recorder_id=rid) + + dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"]) + model = recorder.load_object("trained_model") + + port_analysis_config = { + "executor": { + "class": "SimulatorExecutor", + "module_path": "qlib.backtest.executor", + "kwargs": { + "time_per_step": "day", + "generate_report": True, + }, + }, + "strategy": { + "class": "TopkDropoutStrategy", + "module_path": "qlib.contrib.strategy.model_strategy", + "kwargs": { + "model": model, + "dataset": dataset, + "topk": 50, + "n_drop": 5, + }, + }, + "backtest": { + "start_time": "2017-01-01", + "end_time": "2020-08-01", + "account": 100000000, + "benchmark": CSI300_BENCH, + "exchange_kwargs": { + "freq": "day", + "limit_threshold": 0.095, + "deal_price": "close", + "open_cost": 0.0005, + "close_cost": 0.0015, + "min_cost": 5, + }, + }, + } + # backtest - par = PortAnaRecord(recorder, port_analysis_config) + par = PortAnaRecord(recorder, port_analysis_config, risk_analysis_freq="day") par.generate() - analysis_df = par.load(par.get_path("port_analysis.pkl")) + analysis_df = par.load(par.get_path("port_analysis_1day.pkl")) print(analysis_df) return analysis_df class TestAllFlow(TestAutoData): - PRED_SCORE = None REPORT_NORMAL = None POSITIONS = None RID = None @@ -166,18 +179,18 @@ class TestAllFlow(TestAutoData): shutil.rmtree(str(Path(C["exp_manager"]["kwargs"]["uri"].strip("file:")).resolve())) def test_0_train_with_sigana(self): - TestAllFlow.PRED_SCORE, ic_ric, uri_path = train_with_sigana() + ic_ric, uri_path = train_with_sigana() self.assertGreaterEqual(ic_ric["ic"].all(), 0, "train failed") self.assertGreaterEqual(ic_ric["ric"].all(), 0, "train failed") shutil.rmtree(str(Path(uri_path.strip("file:")).resolve())) def test_1_train(self): - TestAllFlow.PRED_SCORE, ic_ric, TestAllFlow.RID = train() + ic_ric, TestAllFlow.RID = train() self.assertGreaterEqual(ic_ric["ic"].all(), 0, "train failed") self.assertGreaterEqual(ic_ric["ric"].all(), 0, "train failed") def test_2_backtest(self): - analyze_df = backtest_analysis(TestAllFlow.PRED_SCORE, TestAllFlow.RID) + analyze_df = backtest_analysis(TestAllFlow.RID) self.assertGreaterEqual( analyze_df.loc(axis=0)["excess_return_with_cost", "annualized_return"].values[0], 0.10,