Add A New Baseline: DoubleEnsemble

2026-07-21 11:17:34 +08:00 · 2021-02-02 11:46:37 +09:00
parent acdc469e39
commit 8c3ec164ff
4 changed files with 29 additions and 28 deletions
--- a/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha158.yaml
+++ b/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha158.yaml
@@ -33,7 +33,7 @@ task:
            base: "gbm"
            loss: mse
            k: 6
-            enable_sr: True
+            enable_sr: False
            enable_fs: True
            alpha1: 1
            alpha2: 1
@@ -53,6 +53,7 @@ task:
                - 0.2
                - 0.2
                - 0.2
+            epochs: 28
            colsample_bytree: 0.8879
            learning_rate: 0.2
            subsample: 0.8789
@@ -62,8 +63,6 @@ task:
            num_leaves: 210
            num_threads: 20
            verbosity: -1
-            num_iterations: 28
-            early_stopping_round: None
    dataset:
        class: DatasetH
        module_path: qlib.data.dataset
--- a/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha360.yaml
+++ b/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha360.yaml
@@ -38,28 +38,29 @@ task:
        module_path: qlib.contrib.model.double_ensemble
        kwargs:
            base: "gbm"
-                loss: mse
-                k: 6
-                enable_sr: True
-                enable_fs: True
-                alpha1: 1
-                alpha2: 1
-                bins_sr: 10
-                bins_fs: 5
-                decay: 0.5
-                sample_ratios:
-                    - 0.8
-                    - 0.7
-                    - 0.6
-                    - 0.5
-                    - 0.4
-                sub_weights:
-                    - 1
-                    - 0.2
-                    - 0.2
-                    - 0.2
-                    - 0.2
-                    - 0.2
+            loss: mse
+            k: 6
+            enable_sr: True
+            enable_fs: True
+            alpha1: 1
+            alpha2: 1
+            bins_sr: 10
+            bins_fs: 5
+            decay: 0.5
+            sample_ratios:
+                - 0.8
+                - 0.7
+                - 0.6
+                - 0.5
+                - 0.4
+            sub_weights:
+                - 1
+                - 0.2
+                - 0.2
+                - 0.2
+                - 0.2
+                - 0.2
+            epochs: 136
            colsample_bytree: 0.8879
            learning_rate: 0.0421
            subsample: 0.8789
@@ -69,8 +70,6 @@ task:
            num_leaves: 210
            num_threads: 20
            verbosity: -1
-            num_iterations: 28
-            early_stopping_round: None
    dataset:
        class: DatasetH
        module_path: qlib.data.dataset
--- a/examples/run_all_model.py
+++ b/examples/run_all_model.py
@@ -265,7 +265,7 @@ def run(times=1, models=None, dataset="Alpha360", exclude=False):
            )  # TODO: FIX ME!
        else:
            execute(
-                f"cd {env_path} && {python_path} -m pip install --upgrade --force-reinstall -e git+https://github.com/microsoft/qlib#egg=pyqlib"
+                f"cd {env_path} && {python_path} -m pip install --upgrade --force-reinstall -e git+https://github.com/meng-ustc/qlib#egg=pyqlib"
            )  # TODO: FIX ME!
        sys.stderr.write("\n")
        # run workflow_by_config for multiple times
--- a/qlib/contrib/model/double_ensemble.py
+++ b/qlib/contrib/model/double_ensemble.py
@@ -28,6 +28,7 @@ class DEnsembleModel(Model):
            decay=None,
            sample_ratios=None,
            sub_weights=None,
+            epochs=100,
            **kwargs):
        self.base = base  # "gbm" or "mlp", specifically, we use lgbm for "gbm"
        self.k = k
@@ -44,6 +45,7 @@ class DEnsembleModel(Model):
        if not len(sub_weights) == k:
            raise ValueError("The length of sub_weights should be equal to k.")
        self.sub_weights = sub_weights
+        self.epochs = epochs
        self.logger = get_module_logger("DEnsembleModel")
        self.logger.info("Double Ensemble Model...")
        self.ensemble = []  # the current ensemble model, a list contains all the sub-models
@@ -97,6 +99,7 @@ class DEnsembleModel(Model):
        model = lgb.train(
            self.params,
            dtrain,
+            num_boost_round=self.epochs,
            valid_sets=[dtrain, dvalid],
            valid_names=["train", "valid"],
            verbose_eval=20,