Bug fix for Rank and WMA operators (#1228)

* bug fix: 1) 100 should be used to scale down percentileofscore return to 0-1, not length of array; 2) for (linear) weighted MA(n), weight should be n, n-1, ..., 1 instead of n-1, ..., 0 * use native pandas fucntion for rank * remove useless import * require pandas 1.4+ * rank for py37+pandas 1.3.5 compatibility * lint improvement * lint black fix * use hasattr instead of version to check whether rolling.rank is implemented
2026-07-21 19:27:36 +08:00 · 2022-11-13 19:03:23 +08:00
parent ff2154c618
commit 4001a5d157
1 changed files with 9 additions and 15 deletions
--- a/qlib/data/ops.py
+++ b/qlib/data/ops.py
@@ -34,8 +34,6 @@ np.seterr(invalid="ignore")
 #################### Element-Wise Operator ####################
 class ElemOperator(ExpressionOps):
    """Element-wise Operator
@@ -216,9 +214,7 @@ class Not(NpElemOperator):
    Parameters
    ----------
-    feature_left : Expression
+    feature : Expression
        feature instance
    feature_right : Expression
        feature instance
    Returns
@@ -241,8 +237,6 @@ class PairOperator(ExpressionOps):
        feature instance or numeric value
    feature_right : Expression
        feature instance or numeric value
    func : str
        operator function
    Returns
    ----------
@@ -1155,9 +1149,13 @@ class Rank(Rolling):
    def __init__(self, feature, N):
        super(Rank, self).__init__(feature, N, "rank")
    # for compatiblity of python 3.7, which doesn't support pandas 1.4.0+ which implements Rolling.rank
    def _load_internal(self, instrument, start_index, end_index, *args):
        series = self.feature.load(instrument, start_index, end_index, *args)
-        # TODO: implement in Cython
+
        rolling_or_expending = series.expanding(min_periods=1) if self.N == 0 else series.rolling(self.N, min_periods=1)
        if hasattr(rolling_or_expending, "rank"):
            return rolling_or_expending.rank(pct=True)
        def rank(x):
            if np.isnan(x[-1]):
@@ -1165,13 +1163,9 @@ class Rank(Rolling):
            x1 = x[~np.isnan(x)]
            if x1.shape[0] == 0:
                return np.nan
-            return percentileofscore(x1, x1[-1]) / len(x1)
+            return percentileofscore(x1, x1[-1]) / 100
-        if self.N == 0:
+        return rolling_or_expending.apply(rank, raw=True)
            series = series.expanding(min_periods=1).apply(rank, raw=True)
        else:
            series = series.rolling(self.N, min_periods=1).apply(rank, raw=True)
        return series
 class Count(Rolling):
@@ -1341,7 +1335,7 @@ class WMA(Rolling):
        # TODO: implement in Cython
        def weighted_mean(x):
-            w = np.arange(len(x))
+            w = np.arange(len(x)) + 1
            w = w / w.sum()
            return np.nanmean(w * x)