From 72b5d9abfa091af88b0dbac5e9bd075969c6386d Mon Sep 17 00:00:00 2001 From: Dong Zhou Date: Fri, 30 Oct 2020 11:02:32 +0800 Subject: [PATCH] fix ops & EMA support alpha --- qlib/data/_libs/expanding.pyx | 12 ++++++------ qlib/data/ops.py | 17 +++++++++++++++-- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/qlib/data/_libs/expanding.pyx b/qlib/data/_libs/expanding.pyx index 76b824c94..47bc49610 100644 --- a/qlib/data/_libs/expanding.pyx +++ b/qlib/data/_libs/expanding.pyx @@ -14,7 +14,7 @@ cdef class Expanding(object): cdef int na_count def __init__(self): self.na_count = 0 - + cdef double update(self, double val): pass @@ -25,7 +25,7 @@ cdef class Mean(Expanding): def __init__(self): super(Mean, self).__init__() self.vsum = 0 - + cdef double update(self, double val): self.barv.push_back(val) if isnan(val): @@ -62,7 +62,7 @@ cdef class Slope(Expanding): return (N*self.xy_sum - self.x_sum*self.y_sum) / \ (N*self.x2_sum - self.x_sum*self.x_sum) - + cdef class Resi(Expanding): """1-D array expanding residuals""" cdef double x_sum @@ -94,7 +94,7 @@ cdef class Resi(Expanding): interp = y_mean - slope*x_mean return val - (slope*size + interp) - + cdef class Rsquare(Expanding): """1-D array expanding rsquare""" cdef double x_sum @@ -117,7 +117,7 @@ cdef class Rsquare(Expanding): self.na_count += 1 else: self.x_sum += size - self.x2_sum += size + self.x2_sum += size * size self.y_sum += val self.y2_sum += val * val self.xy_sum += size * val @@ -126,7 +126,7 @@ cdef class Rsquare(Expanding): sqrt((N*self.x2_sum - self.x_sum*self.x_sum) * (N*self.y2_sum - self.y_sum*self.y_sum)) return rvalue * rvalue - + cdef np.ndarray[double, ndim=1] expanding(Expanding r, np.ndarray a): cdef int i cdef int N = len(a) diff --git a/qlib/data/ops.py b/qlib/data/ops.py index 9f66a88af..d9c657595 100644 --- a/qlib/data/ops.py +++ b/qlib/data/ops.py @@ -8,6 +8,8 @@ from __future__ import print_function import numpy as np import pandas as pd +from scipy.stats import percentileofscore + from .base import Expression, ExpressionOps from ..log import get_module_logger @@ -687,6 +689,8 @@ class Rolling(ExpressionOps): # isnull = series.isnull() # NOTE: isnull = NaN, inf is not null if self.N == 0: series = getattr(series.expanding(min_periods=1), self.func)() + elif 0 < self.N < 1: + series = series.ewm(alpha=self.N, min_periods=1).mean() else: series = getattr(series.rolling(self.N, min_periods=1), self.func)() # series.iloc[:self.N-1] = np.nan @@ -696,6 +700,8 @@ class Rolling(ExpressionOps): def get_longest_back_rolling(self): if self.N == 0: return np.inf + if 0 < self.N < 1: + return int(np.log(1e-6) / np.log(1 - self.N)) # (1 - N)**window == 1e-6 return self.feature.get_longest_back_rolling() + self.N - 1 def get_extended_window_size(self): @@ -704,6 +710,11 @@ class Rolling(ExpressionOps): # remove such support for N == 0? get_module_logger(self.__class__.__name__).warning("The Rolling(ATTR, 0) will not be accurately calculated") return self.feature.get_extended_window_size() + elif 0 < self.N < 1: + lft_etd, rght_etd = self.feature.get_extended_window_size() + size = int(np.log(1e-6) / np.log(1 - self.N)) + lft_etd = max(lft_etd + size - 1, lft_etd) + return lft_etd, rght_etd else: lft_etd, rght_etd = self.feature.get_extended_window_size() lft_etd = max(lft_etd + self.N - 1, lft_etd) @@ -1087,7 +1098,7 @@ class Rank(Rolling): x1 = x[~np.isnan(x)] if x1.shape[0] == 0: return np.nan - return (x1.argsort()[-1] + 1) / len(x1) + return percentileofscore(x1, x1[-1]) / len(x1) if self.N == 0: series = series.expanding(min_periods=1).apply(rank, raw=True) @@ -1273,7 +1284,7 @@ class EMA(Rolling): ---------- feature : Expression feature instance - N : int + N : int, float rolling window size Returns @@ -1296,6 +1307,8 @@ class EMA(Rolling): if self.N == 0: series = series.expanding(min_periods=1).apply(exp_weighted_mean, raw=True) + elif 0 < self.N < 1: + series = series.ewm(alpha=self.N, min_periods=1).mean() else: series = series.ewm(span=self.N, min_periods=1).mean() return series