From 4001a5d1571cb622315dd6c7ff6034ad42d6cd17 Mon Sep 17 00:00:00 2001 From: qianyun210603 Date: Sun, 13 Nov 2022 19:03:23 +0800 Subject: [PATCH] Bug fix for Rank and WMA operators (#1228) * bug fix: 1) 100 should be used to scale down percentileofscore return to 0-1, not length of array; 2) for (linear) weighted MA(n), weight should be n, n-1, ..., 1 instead of n-1, ..., 0 * use native pandas fucntion for rank * remove useless import * require pandas 1.4+ * rank for py37+pandas 1.3.5 compatibility * lint improvement * lint black fix * use hasattr instead of version to check whether rolling.rank is implemented --- qlib/data/ops.py | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/qlib/data/ops.py b/qlib/data/ops.py index 1cbb1d2e6..fe2ebc9f6 100644 --- a/qlib/data/ops.py +++ b/qlib/data/ops.py @@ -34,8 +34,6 @@ np.seterr(invalid="ignore") #################### Element-Wise Operator #################### - - class ElemOperator(ExpressionOps): """Element-wise Operator @@ -216,9 +214,7 @@ class Not(NpElemOperator): Parameters ---------- - feature_left : Expression - feature instance - feature_right : Expression + feature : Expression feature instance Returns @@ -241,8 +237,6 @@ class PairOperator(ExpressionOps): feature instance or numeric value feature_right : Expression feature instance or numeric value - func : str - operator function Returns ---------- @@ -1155,9 +1149,13 @@ class Rank(Rolling): def __init__(self, feature, N): super(Rank, self).__init__(feature, N, "rank") + # for compatiblity of python 3.7, which doesn't support pandas 1.4.0+ which implements Rolling.rank def _load_internal(self, instrument, start_index, end_index, *args): series = self.feature.load(instrument, start_index, end_index, *args) - # TODO: implement in Cython + + rolling_or_expending = series.expanding(min_periods=1) if self.N == 0 else series.rolling(self.N, min_periods=1) + if hasattr(rolling_or_expending, "rank"): + return rolling_or_expending.rank(pct=True) def rank(x): if np.isnan(x[-1]): @@ -1165,13 +1163,9 @@ class Rank(Rolling): x1 = x[~np.isnan(x)] if x1.shape[0] == 0: return np.nan - return percentileofscore(x1, x1[-1]) / len(x1) + return percentileofscore(x1, x1[-1]) / 100 - if self.N == 0: - series = series.expanding(min_periods=1).apply(rank, raw=True) - else: - series = series.rolling(self.N, min_periods=1).apply(rank, raw=True) - return series + return rolling_or_expending.apply(rank, raw=True) class Count(Rolling): @@ -1341,7 +1335,7 @@ class WMA(Rolling): # TODO: implement in Cython def weighted_mean(x): - w = np.arange(len(x)) + w = np.arange(len(x)) + 1 w = w / w.sum() return np.nanmean(w * x)