mirror of
https://github.com/microsoft/qlib.git
synced 2026-06-06 05:51:17 +08:00
fix: replace deprecated pandas fillna(method=) with ffill()/bfill() (#1987)
* fix: replace deprecated pandas fillna(method=) with ffill()/bfill() Replace deprecated fillna(method="ffill"/"bfill") calls with modern pandas ffill() and bfill() methods to fix FutureWarnings in pandas 2.x. Also includes black formatting fixes for compliance. This addresses the pandas deprecation warnings portion of issue #1981. Other issues (date parsing, type conversion, timezone handling) will be addressed in separate commits. Fixes: - Yahoo collector: 2 instances in calc_change() and adjusted_price() - BaoStock collector: 1 instance in calc_change() - Core utils: resam.py fillna operations - Backtest: profit_attribution.py stock data processing - High-freq ops: FFillNan and BFillNan operators - Position analysis: parse_position.py weight processing Partially addresses GitHub issue #1981 * lint with black * lint with black * limit minimum version of pandas * limit minimum version of pandas --------- Co-authored-by: Linlang <Lv.Linlang@hotmail.com>
This commit is contained in:
@@ -27,7 +27,10 @@ license = { text = "MIT" }
|
||||
dependencies = [
|
||||
"pyyaml",
|
||||
"numpy",
|
||||
"pandas>=0.24",
|
||||
# Since version 1.1.0, pandas supports the ffill and bfill methods.
|
||||
# Since version 2.1.0, pandas has deprecated the method parameter of the fillna method.
|
||||
# qlib has updated the fillna method in PR 1987 and limited the minimum version of pandas.
|
||||
"pandas>=1.1",
|
||||
# I encoutered an Error that the set_uri does not work when downloading artifacts in mlflow 3.1.1;
|
||||
# But earlier versions of mlflow does not have this problem.
|
||||
# But when I switch to 2.*.* version, another error occurs, which is even more strange...
|
||||
|
||||
@@ -281,13 +281,13 @@ def brinson_pa(
|
||||
|
||||
stock_group_field = stock_df[group_field].unstack().T
|
||||
# FIXME: some attributes of some suspend stock is NAN.
|
||||
stock_group_field = stock_group_field.fillna(method="ffill")
|
||||
stock_group_field = stock_group_field.ffill()
|
||||
stock_group_field = stock_group_field.loc[start_date:end_date]
|
||||
|
||||
stock_group = get_stock_group(stock_group_field, bench_stock_weight, group_method, group_n)
|
||||
|
||||
deal_price_df = stock_df["deal_price"].unstack().T
|
||||
deal_price_df = deal_price_df.fillna(method="ffill")
|
||||
deal_price_df = deal_price_df.ffill()
|
||||
|
||||
# NOTE:
|
||||
# The return will be slightly different from the of the return in the report.
|
||||
|
||||
@@ -135,7 +135,7 @@ class FFillNan(ElemOperator):
|
||||
|
||||
def _load_internal(self, instrument, start_index, end_index, freq):
|
||||
series = self.feature.load(instrument, start_index, end_index, freq)
|
||||
return series.fillna(method="ffill")
|
||||
return series.ffill()
|
||||
|
||||
|
||||
class BFillNan(ElemOperator):
|
||||
@@ -154,7 +154,7 @@ class BFillNan(ElemOperator):
|
||||
|
||||
def _load_internal(self, instrument, start_index, end_index, freq):
|
||||
series = self.feature.load(instrument, start_index, end_index, freq)
|
||||
return series.fillna(method="bfill")
|
||||
return series.bfill()
|
||||
|
||||
|
||||
class Date(ElemOperator):
|
||||
|
||||
@@ -33,7 +33,7 @@ def parse_position(position: dict = None) -> pd.DataFrame:
|
||||
|
||||
position_weight_df = get_stock_weight_df(position)
|
||||
# If the day does not exist, use the last weight
|
||||
position_weight_df.fillna(method="ffill", inplace=True)
|
||||
position_weight_df.ffill(inplace=True)
|
||||
|
||||
previous_data = {"date": None, "code_list": []}
|
||||
|
||||
|
||||
@@ -67,7 +67,6 @@ class NaiveDFStorage(BaseHandlerStorage):
|
||||
col_set: Union[str, List[str]] = DataHandler.CS_ALL,
|
||||
fetch_orig: bool = True,
|
||||
) -> pd.DataFrame:
|
||||
|
||||
# Following conflicts may occur
|
||||
# - Does [20200101", "20210101"] mean selecting this slice or these two days?
|
||||
# To solve this issue
|
||||
|
||||
@@ -161,7 +161,6 @@ def init_instance_by_config(
|
||||
# path like 'file:///<path to pickle file>/obj.pkl'
|
||||
pr = urlparse(config)
|
||||
if pr.scheme == "file":
|
||||
|
||||
# To enable relative path like file://data/a/b/c.pkl. pr.netloc will be data
|
||||
path = pr.path
|
||||
if pr.netloc != "":
|
||||
|
||||
@@ -222,7 +222,7 @@ def get_valid_value(series, last=True):
|
||||
Nan | float
|
||||
the first/last valid value
|
||||
"""
|
||||
return series.fillna(method="ffill").iloc[-1] if last else series.fillna(method="bfill").iloc[0]
|
||||
return series.ffill().iloc[-1] if last else series.bfill().iloc[0]
|
||||
|
||||
|
||||
def _ts_data_valid(ts_feature, last=False):
|
||||
|
||||
@@ -172,7 +172,7 @@ class BaostockNormalizeHS3005min(BaseNormalize):
|
||||
@staticmethod
|
||||
def calc_change(df: pd.DataFrame, last_close: float) -> pd.Series:
|
||||
df = df.copy()
|
||||
_tmp_series = df["close"].fillna(method="ffill")
|
||||
_tmp_series = df["close"].ffill()
|
||||
_tmp_shift_series = _tmp_series.shift(1)
|
||||
if last_close is not None:
|
||||
_tmp_shift_series.iloc[0] = float(last_close)
|
||||
|
||||
@@ -371,7 +371,7 @@ class YahooNormalize(BaseNormalize):
|
||||
@staticmethod
|
||||
def calc_change(df: pd.DataFrame, last_close: float) -> pd.Series:
|
||||
df = df.copy()
|
||||
_tmp_series = df["close"].fillna(method="ffill")
|
||||
_tmp_series = df["close"].ffill()
|
||||
_tmp_shift_series = _tmp_series.shift(1)
|
||||
if last_close is not None:
|
||||
_tmp_shift_series.iloc[0] = float(last_close)
|
||||
@@ -459,7 +459,7 @@ class YahooNormalize1d(YahooNormalize, ABC):
|
||||
df.set_index(self._date_field_name, inplace=True)
|
||||
if "adjclose" in df:
|
||||
df["factor"] = df["adjclose"] / df["close"]
|
||||
df["factor"] = df["factor"].fillna(method="ffill")
|
||||
df["factor"] = df["factor"].ffill()
|
||||
else:
|
||||
df["factor"] = 1
|
||||
for _col in self.COLUMNS:
|
||||
|
||||
Reference in New Issue
Block a user