1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00

fast fillna (#1074)

* fast fillna

* fix TSDataSampler bug
This commit is contained in:
you-n-g
2022-04-24 23:24:32 +08:00
committed by GitHub
parent caea495f40
commit cd5e5d5235
2 changed files with 8 additions and 2 deletions

View File

@@ -350,7 +350,7 @@ class TSDataSampler:
flt_data = flt_data.reindex(self.data_index).fillna(False).astype(np.bool)
self.flt_data = flt_data.values
self.idx_map = self.flt_idx_map(self.flt_data, self.idx_map)
self.data_index = self.data_index[np.where(self.flt_data is True)[0]]
self.data_index = self.data_index[np.where(self.flt_data)[0]]
self.idx_map = self.idx_map2arr(self.idx_map)
self.start_idx, self.end_idx = self.data_index.slice_locs(

View File

@@ -187,7 +187,13 @@ class Fillna(Processor):
df.fillna(self.fill_value, inplace=True)
else:
cols = get_group_columns(df, self.fields_group)
df.fillna({col: self.fill_value for col in cols}, inplace=True)
# this implementation is extremely slow
# df.fillna({col: self.fill_value for col in cols}, inplace=True)
# So we use numpy to accelerate filling values
nan_select = np.isnan(df.values)
nan_select[:, ~df.columns.isin(cols)] = False
df.values[nan_select] = self.fill_value
return df