Refine DDG-DA (#1472)

* Run ddg-da successfully * Support include valid; More parameters * Support L2 reg & visualization * Blackformat * Enable fill_method * Support specify handler & optim dataset * Fix Pylint
2026-07-22 03:37:34 +08:00 · 2023-04-07 15:00:21 +08:00
parent 40de67265a
commit 32c3070b73
17 changed files with 457 additions and 39 deletions
--- a/examples/benchmarks_dynamic/baseline/rolling_benchmark.py
+++ b/examples/benchmarks_dynamic/baseline/rolling_benchmark.py
@@ -1,13 +1,17 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
+from typing import Optional
 from qlib.model.ens.ensemble import RollingEnsemble
 from qlib.utils import init_instance_by_config
 import fire
 import yaml
+import pandas as pd
 from qlib import auto_init
 from pathlib import Path
 from tqdm.auto import tqdm
 from qlib.model.trainer import TrainerR
+from qlib.log import get_module_logger
+from qlib.utils.data import update_config
 from qlib.workflow import R
 from qlib.tests.data import GetData

@@ -25,11 +29,40 @@ class RollingBenchmark:

    """

-    def __init__(self, rolling_exp="rolling_models", model_type="linear") -> None:
+    def __init__(
+        self,
+        rolling_exp: str = "rolling_models",
+        model_type: str = "linear",
+        h_path: Optional[str] = None,
+        train_start: Optional[str] = None,
+        test_end: Optional[str] = None,
+        task_ext_conf: Optional[dict] = None,
+    ) -> None:
+        """
+        Parameters
+        ----------
+        rolling_exp : str
+            The name for the experiments for rolling
+        model_type : str
+            The model to be boosted.
+        h_path : Optional[str]
+            the dumped data handler;
+        test_end : Optional[str]
+            the test end for the data. It is typically used together with the handler
+        train_start : Optional[str]
+            the train start for the data.  It is typically used together with the handler.
+        task_ext_conf : Optional[dict]
+            some option to update the
+        """
        self.step = 20
        self.horizon = 20
        self.rolling_exp = rolling_exp
        self.model_type = model_type
+        self.h_path = h_path
+        self.train_start = train_start
+        self.test_end = test_end
+        self.logger = get_module_logger("RollingBenchmark")
+        self.task_ext_conf = task_ext_conf

    def basic_task(self):
        """For fast training rolling"""
@@ -42,6 +75,10 @@ class RollingBenchmark:
            h_path = DIRNAME / "linear_alpha158_handler_horizon{}.pkl".format(self.horizon)
        else:
            raise AssertionError("Model type is not supported!")
+
+        if self.h_path is not None:
+            h_path = Path(self.h_path)
+
        with conf_path.open("r") as f:
            conf = yaml.safe_load(f)

@@ -52,6 +89,9 @@ class RollingBenchmark:

        task = conf["task"]

+        if self.task_ext_conf is not None:
+            task = update_config(task, self.task_ext_conf)
+
        if not h_path.exists():
            h_conf = task["dataset"]["kwargs"]["handler"]
            h = init_instance_by_config(h_conf)
@@ -59,6 +99,15 @@ class RollingBenchmark:

        task["dataset"]["kwargs"]["handler"] = f"file://{h_path}"
        task["record"] = ["qlib.workflow.record_temp.SignalRecord"]
+
+        if self.train_start is not None:
+            seg = task["dataset"]["kwargs"]["segments"]["train"]
+            task["dataset"]["kwargs"]["segments"]["train"] = pd.Timestamp(self.train_start), seg[1]
+
+        if self.test_end is not None:
+            seg = task["dataset"]["kwargs"]["segments"]["test"]
+            task["dataset"]["kwargs"]["segments"]["test"] = seg[0], pd.Timestamp(self.test_end)
+        self.logger.info(task)
        return task

    def create_rolling_tasks(self):
@@ -93,7 +142,7 @@ class RollingBenchmark:
        """
        Evaluate the combined rolling results
        """
-        for rid, rec in R.list_recorders(experiment_name=self.COMB_EXP).items():
+        for _, rec in R.list_recorders(experiment_name=self.COMB_EXP).items():
            for rt_cls in SigAnaRecord, PortAnaRecord:
                rt = rt_cls(recorder=rec, skip_existing=True)
                rt.generate()