From 65902e424ce1dfb1bda993b11b391cd0b59984fa Mon Sep 17 00:00:00 2001 From: lwwang1995 Date: Sat, 5 Dec 2020 22:44:04 +0800 Subject: [PATCH] Add filter columns. --- qlib/data/dataset/processor.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/qlib/data/dataset/processor.py b/qlib/data/dataset/processor.py index 76cf85c4a..350f75382 100755 --- a/qlib/data/dataset/processor.py +++ b/qlib/data/dataset/processor.py @@ -101,6 +101,23 @@ class DropCol(Processor): mask = df.columns.isin(self.col_list) return df.loc[:, ~mask] +class FilterCol(Processor): + def __init__(self, fields_group="feature", col_list=[]): + self.fields_group = fields_group + self.col_list = col_list + + def __call__(self, df): + + cols = get_group_columns(df, self.fields_group) + all_cols = df.columns + diff_cols = np.setdiff1d(all_cols.get_level_values(-1), cols.get_level_values(-1)) + self.col_list = np.union1d(diff_cols, self.col_list) + + if isinstance(df.columns, pd.MultiIndex): + mask = df.columns.get_level_values(-1).isin(self.col_list) + else: + mask = df.columns.isin(self.col_list) + return df.loc[:, mask] class TanhProcess(Processor): """ Use tanh to process noise data"""