fix CI

2026-07-21 11:17:34 +08:00 · 2021-06-01 18:50:50 +08:00
parent 04fff8ca36
commit 4d48c96d30
6 changed files with 96 additions and 109 deletions
--- a/examples/workflow_by_code.ipynb
+++ b/examples/workflow_by_code.ipynb
@@ -196,27 +196,40 @@
    "# prediction, backtest & analysis\n",
    "###################################\n",
    "port_analysis_config = {\n",
+    "    \"executor\": {\n",
+    "        \"class\": \"SimulatorExecutor\",\n",
+    "        \"module_path\": \"qlib.backtest.executor\",\n",
+    "        \"kwargs\": {\n",
+    "            \"time_per_step\": \"day\",\n",
+    "            \"generate_report\": True,\n",
+    "        },\n",
+    "    },\n",
    "    \"strategy\": {\n",
    "        \"class\": \"TopkDropoutStrategy\",\n",
-    "        \"module_path\": \"qlib.contrib.strategy.strategy\",\n",
+    "        \"module_path\": \"qlib.contrib.strategy.model_strategy\",\n",
    "        \"kwargs\": {\n",
+    "            \"model\": model,\n",
+    "            \"dataset\": dataset,\n",
    "            \"topk\": 50,\n",
    "            \"n_drop\": 5,\n",
    "        },\n",
    "    },\n",
    "    \"backtest\": {\n",
-    "        \"verbose\": False,\n",
-    "        \"limit_threshold\": 0.095,\n",
+    "        \"start_time\": \"2017-01-01\",\n",
+    "        \"end_time\": \"2020-08-01\",\n",
    "        \"account\": 100000000,\n",
    "        \"benchmark\": benchmark,\n",
-    "        \"deal_price\": \"close\",\n",
-    "        \"open_cost\": 0.0005,\n",
-    "        \"close_cost\": 0.0015,\n",
-    "        \"min_cost\": 5,\n",
+    "        \"exchange_kwargs\": {\n",
+    "            \"freq\": \"day\",\n",
+    "            \"limit_threshold\": 0.095,\n",
+    "            \"deal_price\": \"close\",\n",
+    "            \"open_cost\": 0.0005,\n",
+    "            \"close_cost\": 0.0015,\n",
+    "            \"min_cost\": 5,\n",
+    "        },\n",
    "    },\n",
    "}\n",
    "\n",
-    "\n",
    "# backtest and analysis\n",
    "with R.start(experiment_name=\"backtest_analysis\"):\n",
    "    recorder = R.get_recorder(rid, experiment_name=\"train_model\")\n",
@@ -229,7 +242,7 @@
    "    sr.generate()\n",
    "\n",
    "    # backtest & analysis\n",
-    "    par = PortAnaRecord(recorder, port_analysis_config)\n",
+    "    par = PortAnaRecord(recorder, port_analysis_config, \"day\")\n",
    "    par.generate()\n"
   ]
  },
@@ -249,11 +262,12 @@
    "from qlib.contrib.report import analysis_model, analysis_position\n",
    "from qlib.data import D\n",
    "recorder = R.get_recorder(ba_rid, experiment_name=\"backtest_analysis\")\n",
+    "print(recorder)\n",
    "pred_df = recorder.load_object(\"pred.pkl\")\n",
    "pred_df_dates = pred_df.index.get_level_values(level='datetime')\n",
-    "report_normal_df = recorder.load_object(\"portfolio_analysis/report_normal.pkl\")\n",
-    "positions = recorder.load_object(\"portfolio_analysis/positions_normal.pkl\")\n",
-    "analysis_df = recorder.load_object(\"portfolio_analysis/port_analysis.pkl\")"
+    "report_normal_df = recorder.load_object(\"portfolio_analysis/report_normal_1day.pkl\")\n",
+    "positions = recorder.load_object(\"portfolio_analysis/positions_normal_1day.pkl\")\n",
+    "analysis_df = recorder.load_object(\"portfolio_analysis/port_analysis_1day.pkl\")"
   ]
  },
  {
@@ -348,9 +362,8 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
+   "name": "pythonjvsc74a57bd0fcc004278713aaede7c629a6a43738a929cb09abb52817d4f72eb70db44cd87b",
+   "display_name": "Python 3.8  ('qlib_backtest': conda)"
  },
  "language_info": {
   "codemirror_mode": {
@@ -362,7 +375,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.8.3"
+   "version": "3.8"
  },
  "toc": {
   "base_numbering": 1,
@@ -376,6 +389,11 @@
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
+  },
+  "metadata": {
+   "interpreter": {
+    "hash": "fcc004278713aaede7c629a6a43738a929cb09abb52817d4f72eb70db44cd87b"
+   }
  }
 },
 "nbformat": 4,
--- a/examples/workflow_by_code.py
+++ b/examples/workflow_by_code.py
@@ -3,10 +3,12 @@

 import qlib
 from qlib.config import REG_CN
-from qlib.utils import exists_qlib_data, init_instance_by_config, flatten_dict
+from qlib.utils import init_instance_by_config, flatten_dict
 from qlib.workflow import R
 from qlib.workflow.record_temp import SignalRecord, PortAnaRecord
 from qlib.tests.data import GetData
+from qlib.tests.config import CSI300_BENCH, CSI300_GBDT_TASK
+

 if __name__ == "__main__":

@@ -15,57 +17,8 @@ if __name__ == "__main__":
    GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
    qlib.init(provider_uri=provider_uri, region=REG_CN)

-    market = "csi300"
-    benchmark = "SH000300"
-
-    ###################################
-    # train model
-    ###################################
-    data_handler_config = {
-        "start_time": "2008-01-01",
-        "end_time": "2020-08-01",
-        "fit_start_time": "2008-01-01",
-        "fit_end_time": "2014-12-31",
-        "instruments": market,
-    }
-
-    task = {
-        "model": {
-            "class": "LGBModel",
-            "module_path": "qlib.contrib.model.gbdt",
-            "kwargs": {
-                "loss": "mse",
-                "colsample_bytree": 0.8879,
-                "learning_rate": 0.0421,
-                "subsample": 0.8789,
-                "lambda_l1": 205.6999,
-                "lambda_l2": 580.9768,
-                "max_depth": 8,
-                "num_leaves": 210,
-                "num_threads": 20,
-            },
-        },
-        "dataset": {
-            "class": "DatasetH",
-            "module_path": "qlib.data.dataset",
-            "kwargs": {
-                "handler": {
-                    "class": "Alpha158",
-                    "module_path": "qlib.contrib.data.handler",
-                    "kwargs": data_handler_config,
-                },
-                "segments": {
-                    "train": ("2008-01-01", "2014-12-31"),
-                    "valid": ("2015-01-01", "2016-12-31"),
-                    "test": ("2017-01-01", "2020-08-01"),
-                },
-            },
-        },
-    }
-
-    # model initialization
-    model = init_instance_by_config(task["model"])
-    dataset = init_instance_by_config(task["dataset"])
+    model = init_instance_by_config(CSI300_GBDT_TASK["model"])
+    dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"])

    port_analysis_config = {
        "executor": {
@@ -90,7 +43,7 @@ if __name__ == "__main__":
            "start_time": "2017-01-01",
            "end_time": "2020-08-01",
            "account": 100000000,
-            "benchmark": benchmark,
+            "benchmark": CSI300_BENCH,
            "exchange_kwargs": {
                "freq": "day",
                "limit_threshold": 0.095,
--- a/qlib/backtest/executor.py
+++ b/qlib/backtest/executor.py
@@ -118,7 +118,8 @@ class BaseExecutor:
    def get_report(self):
        raise NotImplementedError("get_report is not implemented!")

-    def get_all_executor(self):
+    def get_all_executors(self):
+        """Return all executors"""
        return [self]


@@ -247,8 +248,9 @@ class NestedExecutor(BaseExecutor):
            sub_env_report_dict.update({f"{_count}{_freq}": (_report, _positions)})
        return sub_env_report_dict

-    def get_all_executor(self):
-        return [self, *self.inner_executor.get_all_executor()]
+    def get_all_executors(self):
+        """Return all executors, including self and inner_executor.get_all_executors()"""
+        return [self, *self.inner_executor.get_all_executors()]


 class SimulatorExecutor(BaseExecutor):
--- a/qlib/backtest/report.py
+++ b/qlib/backtest/report.py
@@ -12,6 +12,7 @@ from pandas.core.frame import DataFrame

 from ..utils.resam import parse_freq, resam_ts_data
 from ..data import D
+from ..tests.config import CSI300_BENCH


 class Report:
@@ -67,7 +68,7 @@ class Report:
        self.bench = self._cal_benchmark(self.benchmark_config, self.freq)

    def _cal_benchmark(self, benchmark_config, freq):
-        benchmark = benchmark_config.get("benchmark", "SH000300")
+        benchmark = benchmark_config.get("benchmark", CSI300_BENCH)
        if isinstance(benchmark, pd.Series):
            return benchmark
        else:
--- a/qlib/contrib/evaluate.py
+++ b/qlib/contrib/evaluate.py
@@ -29,7 +29,7 @@ def risk_analysis(r, N: int = None, freq: str = "day"):
    r : pandas.Series
        daily return series.
    N: int
-        scaler for annualizing information_ratio (day: 250, week: 50, month: 12), at least one of `N` and `freq` should exist
+        scaler for annualizing information_ratio (day: 252, week: 50, month: 12), at least one of `N` and `freq` should exist
    freq: str
        analysis frequency used for calculating the scaler, at least one of `N` and `freq` should exist
    """
--- a/tests/test_all_pipeline.py
+++ b/tests/test_all_pipeline.py
@@ -14,27 +14,6 @@ from qlib.workflow.record_temp import SignalRecord, SigAnaRecord, PortAnaRecord
 from qlib.tests import TestAutoData
 from qlib.tests.config import CSI300_GBDT_TASK, CSI300_BENCH

-port_analysis_config = {
-    "strategy": {
-        "class": "TopkDropoutStrategy",
-        "module_path": "qlib.contrib.strategy.strategy",
-        "kwargs": {
-            "topk": 50,
-            "n_drop": 5,
-        },
-    },
-    "backtest": {
-        "verbose": False,
-        "limit_threshold": 0.095,
-        "account": 100000000,
-        "benchmark": CSI300_BENCH,
-        "deal_price": "close",
-        "open_cost": 0.0005,
-        "close_cost": 0.0015,
-        "min_cost": 5,
-    },
-}
-

 def train():
    """train model
@@ -58,7 +37,7 @@ def train():
    with R.start(experiment_name="workflow"):
        R.log_params(**flatten_dict(CSI300_GBDT_TASK))
        model.fit(dataset)
-
+        R.save_objects(trained_model=model)
        # prediction
        recorder = R.get_recorder()
        # To test __repr__
@@ -68,7 +47,6 @@ def train():
        rid = recorder.id
        sr = SignalRecord(model, dataset, recorder)
        sr.generate()
-        pred_score = sr.load()

        # calculate ic and ric
        sar = SigAnaRecord(recorder)
@@ -76,7 +54,7 @@ def train():
        ic = sar.load(sar.get_path("ic.pkl"))
        ric = sar.load(sar.get_path("ric.pkl"))

-    return pred_score, {"ic": ic, "ric": ric}, rid
+    return {"ic": ic, "ric": ric}, rid


 def train_with_sigana():
@@ -103,10 +81,9 @@ def train_with_sigana():
        sar.generate()
        ic = sar.load(sar.get_path("ic.pkl"))
        ric = sar.load(sar.get_path("ric.pkl"))
-        pred_score = sar.load("pred.pkl")

        uri_path = R.get_uri()
-    return pred_score, {"ic": ic, "ric": ric}, uri_path
+    return {"ic": ic, "ric": ric}, uri_path


 def fake_experiment():
@@ -130,13 +107,11 @@ def fake_experiment():
    return default_uri == default_uri_to_check, current_uri == current_uri_to_check, current_uri


-def backtest_analysis(pred, rid):
+def backtest_analysis(rid):
    """backtest and analysis

    Parameters
    ----------
-    pred : pandas.DataFrame
-        predict scores
    rid : str
        the id of the recorder to be used in this function

@@ -147,16 +122,54 @@ def backtest_analysis(pred, rid):

    """
    recorder = R.get_recorder(experiment_name="workflow", recorder_id=rid)
+
+    dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"])
+    model = recorder.load_object("trained_model")
+
+    port_analysis_config = {
+        "executor": {
+            "class": "SimulatorExecutor",
+            "module_path": "qlib.backtest.executor",
+            "kwargs": {
+                "time_per_step": "day",
+                "generate_report": True,
+            },
+        },
+        "strategy": {
+            "class": "TopkDropoutStrategy",
+            "module_path": "qlib.contrib.strategy.model_strategy",
+            "kwargs": {
+                "model": model,
+                "dataset": dataset,
+                "topk": 50,
+                "n_drop": 5,
+            },
+        },
+        "backtest": {
+            "start_time": "2017-01-01",
+            "end_time": "2020-08-01",
+            "account": 100000000,
+            "benchmark": CSI300_BENCH,
+            "exchange_kwargs": {
+                "freq": "day",
+                "limit_threshold": 0.095,
+                "deal_price": "close",
+                "open_cost": 0.0005,
+                "close_cost": 0.0015,
+                "min_cost": 5,
+            },
+        },
+    }
+
    # backtest
-    par = PortAnaRecord(recorder, port_analysis_config)
+    par = PortAnaRecord(recorder, port_analysis_config, risk_analysis_freq="day")
    par.generate()
-    analysis_df = par.load(par.get_path("port_analysis.pkl"))
+    analysis_df = par.load(par.get_path("port_analysis_1day.pkl"))
    print(analysis_df)
    return analysis_df


 class TestAllFlow(TestAutoData):
-    PRED_SCORE = None
    REPORT_NORMAL = None
    POSITIONS = None
    RID = None
@@ -166,18 +179,18 @@ class TestAllFlow(TestAutoData):
        shutil.rmtree(str(Path(C["exp_manager"]["kwargs"]["uri"].strip("file:")).resolve()))

    def test_0_train_with_sigana(self):
-        TestAllFlow.PRED_SCORE, ic_ric, uri_path = train_with_sigana()
+        ic_ric, uri_path = train_with_sigana()
        self.assertGreaterEqual(ic_ric["ic"].all(), 0, "train failed")
        self.assertGreaterEqual(ic_ric["ric"].all(), 0, "train failed")
        shutil.rmtree(str(Path(uri_path.strip("file:")).resolve()))

    def test_1_train(self):
-        TestAllFlow.PRED_SCORE, ic_ric, TestAllFlow.RID = train()
+        ic_ric, TestAllFlow.RID = train()
        self.assertGreaterEqual(ic_ric["ic"].all(), 0, "train failed")
        self.assertGreaterEqual(ic_ric["ric"].all(), 0, "train failed")

    def test_2_backtest(self):
-        analyze_df = backtest_analysis(TestAllFlow.PRED_SCORE, TestAllFlow.RID)
+        analyze_df = backtest_analysis(TestAllFlow.RID)
        self.assertGreaterEqual(
            analyze_df.loc(axis=0)["excess_return_with_cost", "annualized_return"].values[0],
            0.10,