mirror of
https://github.com/microsoft/qlib.git
synced 2026-06-06 05:51:17 +08:00
@@ -350,7 +350,7 @@ class TSDataSampler:
|
||||
flt_data = flt_data.reindex(self.data_index).fillna(False).astype(np.bool)
|
||||
self.flt_data = flt_data.values
|
||||
self.idx_map = self.flt_idx_map(self.flt_data, self.idx_map)
|
||||
self.data_index = self.data_index[np.where(self.flt_data is True)[0]]
|
||||
self.data_index = self.data_index[np.where(self.flt_data)[0]]
|
||||
self.idx_map = self.idx_map2arr(self.idx_map)
|
||||
|
||||
self.start_idx, self.end_idx = self.data_index.slice_locs(
|
||||
|
||||
@@ -187,7 +187,13 @@ class Fillna(Processor):
|
||||
df.fillna(self.fill_value, inplace=True)
|
||||
else:
|
||||
cols = get_group_columns(df, self.fields_group)
|
||||
df.fillna({col: self.fill_value for col in cols}, inplace=True)
|
||||
# this implementation is extremely slow
|
||||
# df.fillna({col: self.fill_value for col in cols}, inplace=True)
|
||||
|
||||
# So we use numpy to accelerate filling values
|
||||
nan_select = np.isnan(df.values)
|
||||
nan_select[:, ~df.columns.isin(cols)] = False
|
||||
df.values[nan_select] = self.fill_value
|
||||
return df
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user