fix ops & EMA support alpha

2026-07-04 11:30:57 +08:00 · 2020-10-30 11:02:32 +08:00
parent da9d1c8ac6
commit 72b5d9abfa
2 changed files with 21 additions and 8 deletions
--- a/qlib/data/_libs/expanding.pyx
+++ b/qlib/data/_libs/expanding.pyx
@@ -14,7 +14,7 @@ cdef class Expanding(object):
    cdef int na_count
    def __init__(self):
        self.na_count = 0
-        
+
    cdef double update(self, double val):
        pass

@@ -25,7 +25,7 @@ cdef class Mean(Expanding):
    def __init__(self):
        super(Mean, self).__init__()
        self.vsum = 0
-        
+
    cdef double update(self, double val):
        self.barv.push_back(val)
        if isnan(val):
@@ -62,7 +62,7 @@ cdef class Slope(Expanding):
        return (N*self.xy_sum - self.x_sum*self.y_sum) / \
            (N*self.x2_sum - self.x_sum*self.x_sum)

-        
+
 cdef class Resi(Expanding):
    """1-D array expanding residuals"""
    cdef double x_sum
@@ -94,7 +94,7 @@ cdef class Resi(Expanding):
        interp = y_mean - slope*x_mean
        return val - (slope*size + interp)

-    
+
 cdef class Rsquare(Expanding):
    """1-D array expanding rsquare"""
    cdef double x_sum
@@ -117,7 +117,7 @@ cdef class Rsquare(Expanding):
            self.na_count += 1
        else:
            self.x_sum  += size
-            self.x2_sum += size
+            self.x2_sum += size * size
            self.y_sum  += val
            self.y2_sum += val * val
            self.xy_sum += size * val
@@ -126,7 +126,7 @@ cdef class Rsquare(Expanding):
            sqrt((N*self.x2_sum - self.x_sum*self.x_sum) * (N*self.y2_sum - self.y_sum*self.y_sum))
        return rvalue * rvalue

-    
+
 cdef np.ndarray[double, ndim=1] expanding(Expanding r, np.ndarray a):
    cdef int  i
    cdef int  N = len(a)
--- a/qlib/data/ops.py
+++ b/qlib/data/ops.py
@@ -8,6 +8,8 @@ from __future__ import print_function
 import numpy as np
 import pandas as pd

+from scipy.stats import percentileofscore
+
 from .base import Expression, ExpressionOps
 from ..log import get_module_logger

@@ -687,6 +689,8 @@ class Rolling(ExpressionOps):
        # isnull = series.isnull() # NOTE: isnull = NaN, inf is not null
        if self.N == 0:
            series = getattr(series.expanding(min_periods=1), self.func)()
+        elif 0 < self.N < 1:
+            series = series.ewm(alpha=self.N, min_periods=1).mean()
        else:
            series = getattr(series.rolling(self.N, min_periods=1), self.func)()
            # series.iloc[:self.N-1] = np.nan
@@ -696,6 +700,8 @@ class Rolling(ExpressionOps):
    def get_longest_back_rolling(self):
        if self.N == 0:
            return np.inf
+        if 0 < self.N < 1:
+            return int(np.log(1e-6) / np.log(1 - self.N)) # (1 - N)**window == 1e-6
        return self.feature.get_longest_back_rolling() + self.N - 1

    def get_extended_window_size(self):
@@ -704,6 +710,11 @@ class Rolling(ExpressionOps):
            # remove such support for N == 0?
            get_module_logger(self.__class__.__name__).warning("The Rolling(ATTR, 0) will not be accurately calculated")
            return self.feature.get_extended_window_size()
+        elif 0 < self.N < 1:
+            lft_etd, rght_etd = self.feature.get_extended_window_size()
+            size = int(np.log(1e-6) / np.log(1 - self.N))
+            lft_etd = max(lft_etd + size - 1, lft_etd)
+            return lft_etd, rght_etd
        else:
            lft_etd, rght_etd = self.feature.get_extended_window_size()
            lft_etd = max(lft_etd + self.N - 1, lft_etd)
@@ -1087,7 +1098,7 @@ class Rank(Rolling):
            x1 = x[~np.isnan(x)]
            if x1.shape[0] == 0:
                return np.nan
-            return (x1.argsort()[-1] + 1) / len(x1)
+            return percentileofscore(x1, x1[-1]) / len(x1)

        if self.N == 0:
            series = series.expanding(min_periods=1).apply(rank, raw=True)
@@ -1273,7 +1284,7 @@ class EMA(Rolling):
    ----------
    feature : Expression
        feature instance
-    N : int
+    N : int, float
        rolling window size

    Returns
@@ -1296,6 +1307,8 @@ class EMA(Rolling):

        if self.N == 0:
            series = series.expanding(min_periods=1).apply(exp_weighted_mean, raw=True)
+        elif 0 < self.N < 1:
+            series = series.ewm(alpha=self.N, min_periods=1).mean()
        else:
            series = series.ewm(span=self.N, min_periods=1).mean()
        return series