1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00

fix_DDG-DA_workflow_bug (#1516)

* 1.specify group_keys=False to avoid FutureWarning;
2.fix get train_start from dict unexpected problem;

* fix black

* Add comments

* Add make file

---------

Co-authored-by: Young <afe.young@gmail.com>
This commit is contained in:
Fivele-Li
2023-05-24 15:49:58 +08:00
committed by GitHub
parent 94268619c4
commit 370477288d
2 changed files with 9 additions and 2 deletions

View File

@@ -0,0 +1,4 @@
.PHONY: clean
clean:
-rm -r *.pkl mlruns || true

View File

@@ -116,7 +116,9 @@ class DDGDA:
feature_selected = feature_df.loc[:, col_selected.index] feature_selected = feature_df.loc[:, col_selected.index]
feature_selected = feature_selected.groupby("datetime").apply(lambda df: (df - df.mean()).div(df.std())) feature_selected = feature_selected.groupby("datetime", group_keys=False).apply(
lambda df: (df - df.mean()).div(df.std())
)
feature_selected = feature_selected.fillna(0.0) feature_selected = feature_selected.fillna(0.0)
df_all = { df_all = {
@@ -168,7 +170,8 @@ class DDGDA:
# - Only the dataset part is important, in current version of meta model will integrate the # - Only the dataset part is important, in current version of meta model will integrate the
rb = RollingBenchmark(model_type=self.sim_task_model, **self.rb_kwargs) rb = RollingBenchmark(model_type=self.sim_task_model, **self.rb_kwargs)
sim_task = rb.basic_task() sim_task = rb.basic_task()
train_start = self.rb_kwargs.get("train_start", "2008-01-01") # the train_start for training meta model does not necessarily align with final rolling
train_start = "2008-01-01" if self.rb_kwargs.get("train_start") is None else self.rb_kwargs.get("train_start")
train_end = "2010-12-31" if self.meta_1st_train_end is None else self.meta_1st_train_end train_end = "2010-12-31" if self.meta_1st_train_end is None else self.meta_1st_train_end
test_start = (pd.Timestamp(train_end) + pd.Timedelta(days=1)).strftime("%Y-%m-%d") test_start = (pd.Timestamp(train_end) + pd.Timedelta(days=1)).strftime("%Y-%m-%d")
proxy_forecast_model_task = { proxy_forecast_model_task = {