1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00

fix: replace deprecated pandas fillna(method=) with ffill()/bfill() (#1987)

* fix: replace deprecated pandas fillna(method=) with ffill()/bfill()

  Replace deprecated fillna(method="ffill"/"bfill") calls with modern
  pandas ffill() and bfill() methods to fix FutureWarnings in pandas 2.x.

  Also includes black formatting fixes for compliance.

  This addresses the pandas deprecation warnings portion of issue #1981.
  Other issues (date parsing, type conversion, timezone handling) will be
  addressed in separate commits.

  Fixes:
  - Yahoo collector: 2 instances in calc_change() and adjusted_price()
  - BaoStock collector: 1 instance in calc_change()
  - Core utils: resam.py fillna operations
  - Backtest: profit_attribution.py stock data processing
  - High-freq ops: FFillNan and BFillNan operators
  - Position analysis: parse_position.py weight processing

  Partially addresses GitHub issue #1981

* lint with black

* lint with black

* limit minimum version of pandas

* limit minimum version of pandas

---------

Co-authored-by: Linlang <Lv.Linlang@hotmail.com>
This commit is contained in:
Alaa Kaddour
2025-08-19 09:00:29 +01:00
committed by GitHub
parent 2d05a705e3
commit 7095e755fa
9 changed files with 13 additions and 12 deletions

View File

@@ -27,7 +27,10 @@ license = { text = "MIT" }
dependencies = [
"pyyaml",
"numpy",
"pandas>=0.24",
# Since version 1.1.0, pandas supports the ffill and bfill methods.
# Since version 2.1.0, pandas has deprecated the method parameter of the fillna method.
# qlib has updated the fillna method in PR 1987 and limited the minimum version of pandas.
"pandas>=1.1",
# I encoutered an Error that the set_uri does not work when downloading artifacts in mlflow 3.1.1;
# But earlier versions of mlflow does not have this problem.
# But when I switch to 2.*.* version, another error occurs, which is even more strange...

View File

@@ -281,13 +281,13 @@ def brinson_pa(
stock_group_field = stock_df[group_field].unstack().T
# FIXME: some attributes of some suspend stock is NAN.
stock_group_field = stock_group_field.fillna(method="ffill")
stock_group_field = stock_group_field.ffill()
stock_group_field = stock_group_field.loc[start_date:end_date]
stock_group = get_stock_group(stock_group_field, bench_stock_weight, group_method, group_n)
deal_price_df = stock_df["deal_price"].unstack().T
deal_price_df = deal_price_df.fillna(method="ffill")
deal_price_df = deal_price_df.ffill()
# NOTE:
# The return will be slightly different from the of the return in the report.

View File

@@ -135,7 +135,7 @@ class FFillNan(ElemOperator):
def _load_internal(self, instrument, start_index, end_index, freq):
series = self.feature.load(instrument, start_index, end_index, freq)
return series.fillna(method="ffill")
return series.ffill()
class BFillNan(ElemOperator):
@@ -154,7 +154,7 @@ class BFillNan(ElemOperator):
def _load_internal(self, instrument, start_index, end_index, freq):
series = self.feature.load(instrument, start_index, end_index, freq)
return series.fillna(method="bfill")
return series.bfill()
class Date(ElemOperator):

View File

@@ -33,7 +33,7 @@ def parse_position(position: dict = None) -> pd.DataFrame:
position_weight_df = get_stock_weight_df(position)
# If the day does not exist, use the last weight
position_weight_df.fillna(method="ffill", inplace=True)
position_weight_df.ffill(inplace=True)
previous_data = {"date": None, "code_list": []}

View File

@@ -67,7 +67,6 @@ class NaiveDFStorage(BaseHandlerStorage):
col_set: Union[str, List[str]] = DataHandler.CS_ALL,
fetch_orig: bool = True,
) -> pd.DataFrame:
# Following conflicts may occur
# - Does [20200101", "20210101"] mean selecting this slice or these two days?
# To solve this issue

View File

@@ -161,7 +161,6 @@ def init_instance_by_config(
# path like 'file:///<path to pickle file>/obj.pkl'
pr = urlparse(config)
if pr.scheme == "file":
# To enable relative path like file://data/a/b/c.pkl. pr.netloc will be data
path = pr.path
if pr.netloc != "":

View File

@@ -222,7 +222,7 @@ def get_valid_value(series, last=True):
Nan | float
the first/last valid value
"""
return series.fillna(method="ffill").iloc[-1] if last else series.fillna(method="bfill").iloc[0]
return series.ffill().iloc[-1] if last else series.bfill().iloc[0]
def _ts_data_valid(ts_feature, last=False):

View File

@@ -172,7 +172,7 @@ class BaostockNormalizeHS3005min(BaseNormalize):
@staticmethod
def calc_change(df: pd.DataFrame, last_close: float) -> pd.Series:
df = df.copy()
_tmp_series = df["close"].fillna(method="ffill")
_tmp_series = df["close"].ffill()
_tmp_shift_series = _tmp_series.shift(1)
if last_close is not None:
_tmp_shift_series.iloc[0] = float(last_close)

View File

@@ -371,7 +371,7 @@ class YahooNormalize(BaseNormalize):
@staticmethod
def calc_change(df: pd.DataFrame, last_close: float) -> pd.Series:
df = df.copy()
_tmp_series = df["close"].fillna(method="ffill")
_tmp_series = df["close"].ffill()
_tmp_shift_series = _tmp_series.shift(1)
if last_close is not None:
_tmp_shift_series.iloc[0] = float(last_close)
@@ -459,7 +459,7 @@ class YahooNormalize1d(YahooNormalize, ABC):
df.set_index(self._date_field_name, inplace=True)
if "adjclose" in df:
df["factor"] = df["adjclose"] / df["close"]
df["factor"] = df["factor"].fillna(method="ffill")
df["factor"] = df["factor"].ffill()
else:
df["factor"] = 1
for _col in self.COLUMNS: