1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-07-04 11:30:57 +08:00

fix ops & EMA support alpha

This commit is contained in:
Dong Zhou
2020-10-30 11:02:32 +08:00
committed by you-n-g
parent da9d1c8ac6
commit 72b5d9abfa
2 changed files with 21 additions and 8 deletions

View File

@@ -14,7 +14,7 @@ cdef class Expanding(object):
cdef int na_count
def __init__(self):
self.na_count = 0
cdef double update(self, double val):
pass
@@ -25,7 +25,7 @@ cdef class Mean(Expanding):
def __init__(self):
super(Mean, self).__init__()
self.vsum = 0
cdef double update(self, double val):
self.barv.push_back(val)
if isnan(val):
@@ -62,7 +62,7 @@ cdef class Slope(Expanding):
return (N*self.xy_sum - self.x_sum*self.y_sum) / \
(N*self.x2_sum - self.x_sum*self.x_sum)
cdef class Resi(Expanding):
"""1-D array expanding residuals"""
cdef double x_sum
@@ -94,7 +94,7 @@ cdef class Resi(Expanding):
interp = y_mean - slope*x_mean
return val - (slope*size + interp)
cdef class Rsquare(Expanding):
"""1-D array expanding rsquare"""
cdef double x_sum
@@ -117,7 +117,7 @@ cdef class Rsquare(Expanding):
self.na_count += 1
else:
self.x_sum += size
self.x2_sum += size
self.x2_sum += size * size
self.y_sum += val
self.y2_sum += val * val
self.xy_sum += size * val
@@ -126,7 +126,7 @@ cdef class Rsquare(Expanding):
sqrt((N*self.x2_sum - self.x_sum*self.x_sum) * (N*self.y2_sum - self.y_sum*self.y_sum))
return rvalue * rvalue
cdef np.ndarray[double, ndim=1] expanding(Expanding r, np.ndarray a):
cdef int i
cdef int N = len(a)

View File

@@ -8,6 +8,8 @@ from __future__ import print_function
import numpy as np
import pandas as pd
from scipy.stats import percentileofscore
from .base import Expression, ExpressionOps
from ..log import get_module_logger
@@ -687,6 +689,8 @@ class Rolling(ExpressionOps):
# isnull = series.isnull() # NOTE: isnull = NaN, inf is not null
if self.N == 0:
series = getattr(series.expanding(min_periods=1), self.func)()
elif 0 < self.N < 1:
series = series.ewm(alpha=self.N, min_periods=1).mean()
else:
series = getattr(series.rolling(self.N, min_periods=1), self.func)()
# series.iloc[:self.N-1] = np.nan
@@ -696,6 +700,8 @@ class Rolling(ExpressionOps):
def get_longest_back_rolling(self):
if self.N == 0:
return np.inf
if 0 < self.N < 1:
return int(np.log(1e-6) / np.log(1 - self.N)) # (1 - N)**window == 1e-6
return self.feature.get_longest_back_rolling() + self.N - 1
def get_extended_window_size(self):
@@ -704,6 +710,11 @@ class Rolling(ExpressionOps):
# remove such support for N == 0?
get_module_logger(self.__class__.__name__).warning("The Rolling(ATTR, 0) will not be accurately calculated")
return self.feature.get_extended_window_size()
elif 0 < self.N < 1:
lft_etd, rght_etd = self.feature.get_extended_window_size()
size = int(np.log(1e-6) / np.log(1 - self.N))
lft_etd = max(lft_etd + size - 1, lft_etd)
return lft_etd, rght_etd
else:
lft_etd, rght_etd = self.feature.get_extended_window_size()
lft_etd = max(lft_etd + self.N - 1, lft_etd)
@@ -1087,7 +1098,7 @@ class Rank(Rolling):
x1 = x[~np.isnan(x)]
if x1.shape[0] == 0:
return np.nan
return (x1.argsort()[-1] + 1) / len(x1)
return percentileofscore(x1, x1[-1]) / len(x1)
if self.N == 0:
series = series.expanding(min_periods=1).apply(rank, raw=True)
@@ -1273,7 +1284,7 @@ class EMA(Rolling):
----------
feature : Expression
feature instance
N : int
N : int, float
rolling window size
Returns
@@ -1296,6 +1307,8 @@ class EMA(Rolling):
if self.N == 0:
series = series.expanding(min_periods=1).apply(exp_weighted_mean, raw=True)
elif 0 < self.N < 1:
series = series.ewm(alpha=self.N, min_periods=1).mean()
else:
series = series.ewm(span=self.N, min_periods=1).mean()
return series