mirror of
https://github.com/microsoft/qlib.git
synced 2026-06-06 05:51:17 +08:00
fixed a problem with multi index caused by the default value of groupkey (#1917)
* fixed a problem with multi index caused by the default value of groupkey * modify group_key default value * limit pandas verion * format with black * fix docs error * fix docs error * fixed bugs caused by pandas upgrade * remove needless code * reformat with black * limit version & add docs
This commit is contained in:
@@ -599,7 +599,7 @@ class TemporalFusionTransformer:
|
||||
print("Getting valid sampling locations.")
|
||||
valid_sampling_locations = []
|
||||
split_data_map = {}
|
||||
for identifier, df in data.groupby(id_col):
|
||||
for identifier, df in data.groupby(id_col, group_key=False):
|
||||
print("Getting locations for {}".format(identifier))
|
||||
num_entries = len(df)
|
||||
if num_entries >= self.time_steps:
|
||||
@@ -678,7 +678,7 @@ class TemporalFusionTransformer:
|
||||
input_cols = [tup[0] for tup in self.column_definition if tup[2] not in {InputTypes.ID, InputTypes.TIME}]
|
||||
|
||||
data_map = {}
|
||||
for _, sliced in data.groupby(id_col):
|
||||
for _, sliced in data.groupby(id_col, group_keys=False):
|
||||
col_mappings = {"identifier": [id_col], "time": [time_col], "outputs": [target_col], "inputs": input_cols}
|
||||
|
||||
for k in col_mappings:
|
||||
|
||||
@@ -78,13 +78,15 @@ DATASET_SETTING = {
|
||||
|
||||
|
||||
def get_shifted_label(data_df, shifts=5, col_shift="LABEL0"):
|
||||
return data_df[[col_shift]].groupby("instrument").apply(lambda df: df.shift(shifts))
|
||||
return data_df[[col_shift]].groupby("instrument", group_keys=False).apply(lambda df: df.shift(shifts))
|
||||
|
||||
|
||||
def fill_test_na(test_df):
|
||||
test_df_res = test_df.copy()
|
||||
feature_cols = ~test_df_res.columns.str.contains("label", case=False)
|
||||
test_feature_fna = test_df_res.loc[:, feature_cols].groupby("datetime").apply(lambda df: df.fillna(df.mean()))
|
||||
test_feature_fna = (
|
||||
test_df_res.loc[:, feature_cols].groupby("datetime", group_keys=False).apply(lambda df: df.fillna(df.mean()))
|
||||
)
|
||||
test_df_res.loc[:, feature_cols] = test_feature_fna
|
||||
return test_df_res
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ def _create_ts_slices(index, seq_len):
|
||||
assert index.is_lexsorted(), "index should be sorted"
|
||||
|
||||
# number of dates for each code
|
||||
sample_count_by_codes = pd.Series(0, index=index).groupby(level=0).size().values
|
||||
sample_count_by_codes = pd.Series(0, index=index).groupby(level=0, group_keys=False).size().values
|
||||
|
||||
# start_index for each code
|
||||
start_index_of_codes = np.roll(np.cumsum(sample_count_by_codes), 1)
|
||||
|
||||
@@ -25,7 +25,7 @@ class DayLast(ElemOperator):
|
||||
def _load_internal(self, instrument, start_index, end_index, freq):
|
||||
_calendar = get_calendar_day(freq=freq)
|
||||
series = self.feature.load(instrument, start_index, end_index, freq)
|
||||
return series.groupby(_calendar[series.index]).transform("last")
|
||||
return series.groupby(_calendar[series.index], group_keys=False).transform("last")
|
||||
|
||||
|
||||
class FFillNan(ElemOperator):
|
||||
@@ -44,7 +44,7 @@ class FFillNan(ElemOperator):
|
||||
|
||||
def _load_internal(self, instrument, start_index, end_index, freq):
|
||||
series = self.feature.load(instrument, start_index, end_index, freq)
|
||||
return series.fillna(method="ffill")
|
||||
return series.ffill()
|
||||
|
||||
|
||||
class BFillNan(ElemOperator):
|
||||
@@ -63,7 +63,7 @@ class BFillNan(ElemOperator):
|
||||
|
||||
def _load_internal(self, instrument, start_index, end_index, freq):
|
||||
series = self.feature.load(instrument, start_index, end_index, freq)
|
||||
return series.fillna(method="bfill")
|
||||
return series.bfill()
|
||||
|
||||
|
||||
class Date(ElemOperator):
|
||||
|
||||
@@ -19,9 +19,9 @@ def generate_order(stock: str, start_idx: int, end_idx: int) -> bool:
|
||||
|
||||
df["date"] = df["datetime"].dt.date.astype("datetime64")
|
||||
df = df.set_index(["instrument", "datetime", "date"])
|
||||
df = df.groupby("date").take(range(start_idx, end_idx)).droplevel(level=0)
|
||||
df = df.groupby("date", group_keys=False).take(range(start_idx, end_idx)).droplevel(level=0)
|
||||
|
||||
order_all = pd.DataFrame(df.groupby(level=(2, 0)).mean().dropna())
|
||||
order_all = pd.DataFrame(df.groupby(level=(2, 0), group_keys=False).mean().dropna())
|
||||
order_all["amount"] = np.random.lognormal(-3.28, 1.14) * order_all["$volume0"]
|
||||
order_all = order_all[order_all["amount"] > 0.0]
|
||||
order_all["order_type"] = 0
|
||||
|
||||
Reference in New Issue
Block a user