1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-07-05 12:00:58 +08:00
Files
qlib/qlib/data/dataset/utils.py

73 lines
1.8 KiB
Python

from typing import Union
import pandas as pd
def get_level_index(df: pd.DataFrame, level=Union[str, int]) -> int:
"""
get the level index of `df` given `level`
Parameters
----------
df : pd.DataFrame
data
level : Union[str, int]
index level
Returns
-------
int:
The level index in the multiple index
"""
if isinstance(level, str):
try:
return df.index.names.index(level)
except (AttributeError, ValueError):
# NOTE: If level index is not given in the data, the default level index will be ('datetime', 'instrument')
return ("datetime", "instrument").index(level)
elif isinstance(level, int):
return level
else:
raise NotImplementedError(f"This type of input is not supported")
def fetch_df_by_index(
df: pd.DataFrame,
selector: Union[pd.Timestamp, slice, str, list],
level: Union[str, int],
fetch_orig=True,
) -> pd.DataFrame:
"""
fetch data from `data` with `selector` and `level`
Parameters
----------
selector : Union[pd.Timestamp, slice, str, list]
selector
level : Union[int, str]
the level to use the selector
Returns
-------
Data of the given index.
"""
# level = None -> use selector directly
if level == None:
return df.loc(axis=0)[selector]
# Try to get the right index
idx_slc = (selector, slice(None, None))
if get_level_index(df, level) == 1:
idx_slc = idx_slc[1], idx_slc[0]
if fetch_orig:
for slc in idx_slc:
if slc != slice(None, None):
return df.loc[
pd.IndexSlice[idx_slc],
]
else:
return df
else:
return df.loc[
pd.IndexSlice[idx_slc],
]