1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00

Add A New Baseline: DoubleEnsemble

This commit is contained in:
meng-ustc
2021-02-02 11:46:37 +09:00
parent acdc469e39
commit 8c3ec164ff
4 changed files with 29 additions and 28 deletions

View File

@@ -33,7 +33,7 @@ task:
base: "gbm"
loss: mse
k: 6
enable_sr: True
enable_sr: False
enable_fs: True
alpha1: 1
alpha2: 1
@@ -53,6 +53,7 @@ task:
- 0.2
- 0.2
- 0.2
epochs: 28
colsample_bytree: 0.8879
learning_rate: 0.2
subsample: 0.8789
@@ -62,8 +63,6 @@ task:
num_leaves: 210
num_threads: 20
verbosity: -1
num_iterations: 28
early_stopping_round: None
dataset:
class: DatasetH
module_path: qlib.data.dataset

View File

@@ -38,28 +38,29 @@ task:
module_path: qlib.contrib.model.double_ensemble
kwargs:
base: "gbm"
loss: mse
k: 6
enable_sr: True
enable_fs: True
alpha1: 1
alpha2: 1
bins_sr: 10
bins_fs: 5
decay: 0.5
sample_ratios:
- 0.8
- 0.7
- 0.6
- 0.5
- 0.4
sub_weights:
- 1
- 0.2
- 0.2
- 0.2
- 0.2
- 0.2
loss: mse
k: 6
enable_sr: True
enable_fs: True
alpha1: 1
alpha2: 1
bins_sr: 10
bins_fs: 5
decay: 0.5
sample_ratios:
- 0.8
- 0.7
- 0.6
- 0.5
- 0.4
sub_weights:
- 1
- 0.2
- 0.2
- 0.2
- 0.2
- 0.2
epochs: 136
colsample_bytree: 0.8879
learning_rate: 0.0421
subsample: 0.8789
@@ -69,8 +70,6 @@ task:
num_leaves: 210
num_threads: 20
verbosity: -1
num_iterations: 28
early_stopping_round: None
dataset:
class: DatasetH
module_path: qlib.data.dataset

View File

@@ -265,7 +265,7 @@ def run(times=1, models=None, dataset="Alpha360", exclude=False):
) # TODO: FIX ME!
else:
execute(
f"cd {env_path} && {python_path} -m pip install --upgrade --force-reinstall -e git+https://github.com/microsoft/qlib#egg=pyqlib"
f"cd {env_path} && {python_path} -m pip install --upgrade --force-reinstall -e git+https://github.com/meng-ustc/qlib#egg=pyqlib"
) # TODO: FIX ME!
sys.stderr.write("\n")
# run workflow_by_config for multiple times

View File

@@ -28,6 +28,7 @@ class DEnsembleModel(Model):
decay=None,
sample_ratios=None,
sub_weights=None,
epochs=100,
**kwargs):
self.base = base # "gbm" or "mlp", specifically, we use lgbm for "gbm"
self.k = k
@@ -44,6 +45,7 @@ class DEnsembleModel(Model):
if not len(sub_weights) == k:
raise ValueError("The length of sub_weights should be equal to k.")
self.sub_weights = sub_weights
self.epochs = epochs
self.logger = get_module_logger("DEnsembleModel")
self.logger.info("Double Ensemble Model...")
self.ensemble = [] # the current ensemble model, a list contains all the sub-models
@@ -97,6 +99,7 @@ class DEnsembleModel(Model):
model = lgb.train(
self.params,
dtrain,
num_boost_round=self.epochs,
valid_sets=[dtrain, dvalid],
valid_names=["train", "valid"],
verbose_eval=20,