mirror of
https://github.com/microsoft/qlib.git
synced 2026-07-03 11:00:57 +08:00
init commit
This commit is contained in:
36
qlib/data/__init__.py
Normal file
36
qlib/data/__init__.py
Normal file
@@ -0,0 +1,36 @@
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from .data import (
|
||||
D,
|
||||
CalendarProvider,
|
||||
InstrumentProvider,
|
||||
FeatureProvider,
|
||||
ExpressionProvider,
|
||||
DatasetProvider,
|
||||
LocalCalendarProvider,
|
||||
LocalInstrumentProvider,
|
||||
LocalFeatureProvider,
|
||||
LocalExpressionProvider,
|
||||
LocalDatasetProvider,
|
||||
ClientCalendarProvider,
|
||||
ClientInstrumentProvider,
|
||||
ClientDatasetProvider,
|
||||
BaseProvider,
|
||||
LocalProvider,
|
||||
ClientProvider,
|
||||
)
|
||||
|
||||
from .cache import (
|
||||
ExpressionCache,
|
||||
DatasetCache,
|
||||
ServerExpressionCache,
|
||||
ServerDatasetCache,
|
||||
SimpleDatasetCache,
|
||||
ClientDatasetCache,
|
||||
ClientCalendarCache,
|
||||
)
|
||||
2
qlib/data/_libs/__init__.py
Normal file
2
qlib/data/_libs/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
152
qlib/data/_libs/expanding.pyx
Normal file
152
qlib/data/_libs/expanding.pyx
Normal file
@@ -0,0 +1,152 @@
|
||||
# cython: profile=False
|
||||
# cython: boundscheck=False, wraparound=False, cdivision=True
|
||||
cimport cython
|
||||
cimport numpy as np
|
||||
import numpy as np
|
||||
|
||||
from libc.math cimport sqrt, isnan, NAN
|
||||
from libcpp.vector cimport vector
|
||||
|
||||
|
||||
cdef class Expanding(object):
|
||||
"""1-D array expanding"""
|
||||
cdef vector[double] barv
|
||||
cdef int na_count
|
||||
def __init__(self):
|
||||
self.na_count = 0
|
||||
|
||||
cdef double update(self, double val):
|
||||
pass
|
||||
|
||||
|
||||
cdef class Mean(Expanding):
|
||||
"""1-D array expanding mean"""
|
||||
cdef double vsum
|
||||
def __init__(self):
|
||||
super(Mean, self).__init__()
|
||||
self.vsum = 0
|
||||
|
||||
cdef double update(self, double val):
|
||||
self.barv.push_back(val)
|
||||
if isnan(val):
|
||||
self.na_count += 1
|
||||
else:
|
||||
self.vsum += val
|
||||
return self.vsum / (self.barv.size() - self.na_count)
|
||||
|
||||
|
||||
cdef class Slope(Expanding):
|
||||
"""1-D array expanding slope"""
|
||||
cdef double x_sum
|
||||
cdef double x2_sum
|
||||
cdef double y_sum
|
||||
cdef double xy_sum
|
||||
def __init__(self):
|
||||
super(Slope, self).__init__()
|
||||
self.x_sum = 0
|
||||
self.x2_sum = 0
|
||||
self.y_sum = 0
|
||||
self.xy_sum = 0
|
||||
|
||||
cdef double update(self, double val):
|
||||
self.barv.push_back(val)
|
||||
cdef size_t size = self.barv.size()
|
||||
if isnan(val):
|
||||
self.na_count += 1
|
||||
else:
|
||||
self.x_sum += size
|
||||
self.x2_sum += size * size
|
||||
self.y_sum += val
|
||||
self.xy_sum += size * val
|
||||
cdef int N = size - self.na_count
|
||||
return (N*self.xy_sum - self.x_sum*self.y_sum) / \
|
||||
(N*self.x2_sum - self.x_sum*self.x_sum)
|
||||
|
||||
|
||||
cdef class Resi(Expanding):
|
||||
"""1-D array expanding residuals"""
|
||||
cdef double x_sum
|
||||
cdef double x2_sum
|
||||
cdef double y_sum
|
||||
cdef double xy_sum
|
||||
def __init__(self):
|
||||
super(Resi, self).__init__()
|
||||
self.x_sum = 0
|
||||
self.x2_sum = 0
|
||||
self.y_sum = 0
|
||||
self.xy_sum = 0
|
||||
|
||||
cdef double update(self, double val):
|
||||
self.barv.push_back(val)
|
||||
cdef size_t size = self.barv.size()
|
||||
if isnan(val):
|
||||
self.na_count += 1
|
||||
else:
|
||||
self.x_sum += size
|
||||
self.x2_sum += size * size
|
||||
self.y_sum += val
|
||||
self.xy_sum += size * val
|
||||
cdef int N = size - self.na_count
|
||||
slope = (N*self.xy_sum - self.x_sum*self.y_sum) / \
|
||||
(N*self.x2_sum - self.x_sum*self.x_sum)
|
||||
x_mean = self.x_sum / N
|
||||
y_mean = self.y_sum / N
|
||||
interp = y_mean - slope*x_mean
|
||||
return val - (slope*size + interp)
|
||||
|
||||
|
||||
cdef class Rsquare(Expanding):
|
||||
"""1-D array expanding rsquare"""
|
||||
cdef double x_sum
|
||||
cdef double x2_sum
|
||||
cdef double y_sum
|
||||
cdef double y2_sum
|
||||
cdef double xy_sum
|
||||
def __init__(self):
|
||||
super(Rsquare, self).__init__()
|
||||
self.x_sum = 0
|
||||
self.x2_sum = 0
|
||||
self.y_sum = 0
|
||||
self.y2_sum = 0
|
||||
self.xy_sum = 0
|
||||
|
||||
cdef double update(self, double val):
|
||||
self.barv.push_back(val)
|
||||
cdef size_t size = self.barv.size()
|
||||
if isnan(val):
|
||||
self.na_count += 1
|
||||
else:
|
||||
self.x_sum += size
|
||||
self.x2_sum += size
|
||||
self.y_sum += val
|
||||
self.y2_sum += val * val
|
||||
self.xy_sum += size * val
|
||||
cdef int N = size - self.na_count
|
||||
cdef double rvalue = (N*self.xy_sum - self.x_sum*self.y_sum) / \
|
||||
sqrt((N*self.x2_sum - self.x_sum*self.x_sum) * (N*self.y2_sum - self.y_sum*self.y_sum))
|
||||
return rvalue * rvalue
|
||||
|
||||
|
||||
cdef np.ndarray[double, ndim=1] expanding(Expanding r, np.ndarray a):
|
||||
cdef int i
|
||||
cdef int N = len(a)
|
||||
cdef np.ndarray[double, ndim=1] ret = np.empty(N)
|
||||
for i in range(N):
|
||||
ret[i] = r.update(a[i])
|
||||
return ret
|
||||
|
||||
def expanding_mean(np.ndarray a):
|
||||
cdef Mean r = Mean()
|
||||
return expanding(r, a)
|
||||
|
||||
def expanding_slope(np.ndarray a):
|
||||
cdef Slope r = Slope()
|
||||
return expanding(r, a)
|
||||
|
||||
def expanding_rsquare(np.ndarray a):
|
||||
cdef Rsquare r = Rsquare()
|
||||
return expanding(r, a)
|
||||
|
||||
def expanding_resi(np.ndarray a):
|
||||
cdef Resi r = Resi()
|
||||
return expanding(r, a)
|
||||
207
qlib/data/_libs/rolling.pyx
Normal file
207
qlib/data/_libs/rolling.pyx
Normal file
@@ -0,0 +1,207 @@
|
||||
# cython: profile=False
|
||||
# cython: boundscheck=False, wraparound=False, cdivision=True
|
||||
cimport cython
|
||||
cimport numpy as np
|
||||
import numpy as np
|
||||
|
||||
from libc.math cimport sqrt, isnan, NAN
|
||||
from libcpp.deque cimport deque
|
||||
|
||||
|
||||
cdef class Rolling(object):
|
||||
"""1-D array rolling"""
|
||||
cdef int window
|
||||
cdef deque[double] barv
|
||||
cdef int na_count
|
||||
def __init__(self, int window):
|
||||
self.window = window
|
||||
self.na_count = window
|
||||
cdef int i
|
||||
for i in range(window):
|
||||
self.barv.push_back(NAN)
|
||||
|
||||
cdef double update(self, double val):
|
||||
pass
|
||||
|
||||
|
||||
cdef class Mean(Rolling):
|
||||
"""1-D array rolling mean"""
|
||||
cdef double vsum
|
||||
def __init__(self, int window):
|
||||
super(Mean, self).__init__(window)
|
||||
self.vsum = 0
|
||||
|
||||
cdef double update(self, double val):
|
||||
self.barv.push_back(val)
|
||||
if not isnan(self.barv.front()):
|
||||
self.vsum -= self.barv.front()
|
||||
else:
|
||||
self.na_count -= 1
|
||||
self.barv.pop_front()
|
||||
if isnan(val):
|
||||
self.na_count += 1
|
||||
# return NAN
|
||||
else:
|
||||
self.vsum += val
|
||||
return self.vsum / (self.window - self.na_count)
|
||||
|
||||
|
||||
cdef class Slope(Rolling):
|
||||
"""1-D array rolling slope"""
|
||||
cdef double i_sum # can be used as i2_sum
|
||||
cdef double x_sum
|
||||
cdef double x2_sum
|
||||
cdef double y_sum
|
||||
cdef double xy_sum
|
||||
def __init__(self, int window):
|
||||
super(Slope, self).__init__(window)
|
||||
self.i_sum = 0
|
||||
self.x_sum = 0
|
||||
self.x2_sum = 0
|
||||
self.y_sum = 0
|
||||
self.xy_sum = 0
|
||||
|
||||
cdef double update(self, double val):
|
||||
self.barv.push_back(val)
|
||||
self.xy_sum = self.xy_sum - self.y_sum
|
||||
self.x2_sum = self.x2_sum + self.i_sum - 2*self.x_sum
|
||||
self.x_sum = self.x_sum - self.i_sum
|
||||
cdef double _val
|
||||
_val = self.barv.front()
|
||||
if not isnan(_val):
|
||||
self.i_sum -= 1
|
||||
self.y_sum -= _val
|
||||
else:
|
||||
self.na_count -= 1
|
||||
self.barv.pop_front()
|
||||
if isnan(val):
|
||||
self.na_count += 1
|
||||
# return NAN
|
||||
else:
|
||||
self.i_sum += 1
|
||||
self.x_sum += self.window
|
||||
self.x2_sum += self.window * self.window
|
||||
self.y_sum += val
|
||||
self.xy_sum += self.window * val
|
||||
cdef int N = self.window - self.na_count
|
||||
return (N*self.xy_sum - self.x_sum*self.y_sum) / \
|
||||
(N*self.x2_sum - self.x_sum*self.x_sum)
|
||||
|
||||
|
||||
cdef class Resi(Rolling):
|
||||
"""1-D array rolling residuals"""
|
||||
cdef double i_sum # can be used as i2_sum
|
||||
cdef double x_sum
|
||||
cdef double x2_sum
|
||||
cdef double y_sum
|
||||
cdef double xy_sum
|
||||
def __init__(self, int window):
|
||||
super(Resi, self).__init__(window)
|
||||
self.i_sum = 0
|
||||
self.x_sum = 0
|
||||
self.x2_sum = 0
|
||||
self.y_sum = 0
|
||||
self.xy_sum = 0
|
||||
|
||||
cdef double update(self, double val):
|
||||
self.barv.push_back(val)
|
||||
self.xy_sum = self.xy_sum - self.y_sum
|
||||
self.x2_sum = self.x2_sum + self.i_sum - 2*self.x_sum
|
||||
self.x_sum = self.x_sum - self.i_sum
|
||||
cdef double _val
|
||||
_val = self.barv.front()
|
||||
if not isnan(_val):
|
||||
self.i_sum -= 1
|
||||
self.y_sum -= _val
|
||||
else:
|
||||
self.na_count -= 1
|
||||
self.barv.pop_front()
|
||||
if isnan(val):
|
||||
self.na_count += 1
|
||||
# return NAN
|
||||
else:
|
||||
self.i_sum += 1
|
||||
self.x_sum += self.window
|
||||
self.x2_sum += self.window * self.window
|
||||
self.y_sum += val
|
||||
self.xy_sum += self.window * val
|
||||
cdef int N = self.window - self.na_count
|
||||
slope = (N*self.xy_sum - self.x_sum*self.y_sum) / \
|
||||
(N*self.x2_sum - self.x_sum*self.x_sum)
|
||||
x_mean = self.x_sum / N
|
||||
y_mean = self.y_sum / N
|
||||
interp = y_mean - slope*x_mean
|
||||
return val - (slope*self.window + interp)
|
||||
|
||||
|
||||
cdef class Rsquare(Rolling):
|
||||
"""1-D array rolling rsquare"""
|
||||
cdef double i_sum
|
||||
cdef double x_sum
|
||||
cdef double x2_sum
|
||||
cdef double y_sum
|
||||
cdef double y2_sum
|
||||
cdef double xy_sum
|
||||
def __init__(self, int window):
|
||||
super(Rsquare, self).__init__(window)
|
||||
self.i_sum = 0
|
||||
self.x_sum = 0
|
||||
self.x2_sum = 0
|
||||
self.y_sum = 0
|
||||
self.y2_sum = 0
|
||||
self.xy_sum = 0
|
||||
|
||||
cdef double update(self, double val):
|
||||
self.barv.push_back(val)
|
||||
self.xy_sum = self.xy_sum - self.y_sum
|
||||
self.x2_sum = self.x2_sum + self.i_sum - 2*self.x_sum
|
||||
self.x_sum = self.x_sum - self.i_sum
|
||||
cdef double _val
|
||||
_val = self.barv.front()
|
||||
if not isnan(_val):
|
||||
self.i_sum -= 1
|
||||
self.y_sum -= _val
|
||||
self.y2_sum -= _val * _val
|
||||
else:
|
||||
self.na_count -= 1
|
||||
self.barv.pop_front()
|
||||
if isnan(val):
|
||||
self.na_count += 1
|
||||
# return NAN
|
||||
else:
|
||||
self.i_sum += 1
|
||||
self.x_sum += self.window
|
||||
self.x2_sum += self.window * self.window
|
||||
self.y_sum += val
|
||||
self.y2_sum += val * val
|
||||
self.xy_sum += self.window * val
|
||||
cdef int N = self.window - self.na_count
|
||||
cdef double rvalue
|
||||
rvalue = (N*self.xy_sum - self.x_sum*self.y_sum) / \
|
||||
sqrt((N*self.x2_sum - self.x_sum*self.x_sum) * (N*self.y2_sum - self.y_sum*self.y_sum))
|
||||
return rvalue * rvalue
|
||||
|
||||
|
||||
cdef np.ndarray[double, ndim=1] rolling(Rolling r, np.ndarray a):
|
||||
cdef int i
|
||||
cdef int N = len(a)
|
||||
cdef np.ndarray[double, ndim=1] ret = np.empty(N)
|
||||
for i in range(N):
|
||||
ret[i] = r.update(a[i])
|
||||
return ret
|
||||
|
||||
def rolling_mean(np.ndarray a, int window):
|
||||
cdef Mean r = Mean(window)
|
||||
return rolling(r, a)
|
||||
|
||||
def rolling_slope(np.ndarray a, int window):
|
||||
cdef Slope r = Slope(window)
|
||||
return rolling(r, a)
|
||||
|
||||
def rolling_rsquare(np.ndarray a, int window):
|
||||
cdef Rsquare r = Rsquare(window)
|
||||
return rolling(r, a)
|
||||
|
||||
def rolling_resi(np.ndarray a, int window):
|
||||
cdef Resi r = Resi(window)
|
||||
return rolling(r, a)
|
||||
229
qlib/data/base.py
Normal file
229
qlib/data/base.py
Normal file
@@ -0,0 +1,229 @@
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import abc
|
||||
import six
|
||||
import pandas as pd
|
||||
|
||||
|
||||
@six.add_metaclass(abc.ABCMeta)
|
||||
class Expression(object):
|
||||
"""Expression base class"""
|
||||
|
||||
def __str__(self):
|
||||
return type(self).__name__
|
||||
|
||||
def __repr__(self):
|
||||
return str(self)
|
||||
|
||||
def __gt__(self, other):
|
||||
from .ops import Gt
|
||||
|
||||
return Gt(self, other)
|
||||
|
||||
def __ge__(self, other):
|
||||
from .ops import Ge
|
||||
|
||||
return Ge(self, other)
|
||||
|
||||
def __lt__(self, other):
|
||||
from .ops import Lt
|
||||
|
||||
return Lt(self, other)
|
||||
|
||||
def __le__(self, other):
|
||||
from .ops import Le
|
||||
|
||||
return Le(self, other)
|
||||
|
||||
def __eq__(self, other):
|
||||
from .ops import Eq
|
||||
|
||||
return Eq(self, other)
|
||||
|
||||
def __ne__(self, other):
|
||||
from .ops import Ne
|
||||
|
||||
return Ne(self, other)
|
||||
|
||||
def __add__(self, other):
|
||||
from .ops import Add
|
||||
|
||||
return Add(self, other)
|
||||
|
||||
def __radd__(self, other):
|
||||
from .ops import Add
|
||||
|
||||
return Add(other, self)
|
||||
|
||||
def __sub__(self, other):
|
||||
from .ops import Sub
|
||||
|
||||
return Sub(self, other)
|
||||
|
||||
def __rsub__(self, other):
|
||||
from .ops import Sub
|
||||
|
||||
return Sub(other, self)
|
||||
|
||||
def __mul__(self, other):
|
||||
from .ops import Mul
|
||||
|
||||
return Mul(self, other)
|
||||
|
||||
def __rmul__(self, other):
|
||||
from .ops import Mul
|
||||
|
||||
return Mul(self, other)
|
||||
|
||||
def __div__(self, other):
|
||||
from .ops import Div
|
||||
|
||||
return Div(self, other)
|
||||
|
||||
def __rdiv__(self, other):
|
||||
from .ops import Div
|
||||
|
||||
return Div(other, self)
|
||||
|
||||
def __truediv__(self, other):
|
||||
from .ops import Div
|
||||
|
||||
return Div(self, other)
|
||||
|
||||
def __rtruediv__(self, other):
|
||||
from .ops import Div
|
||||
|
||||
return Div(other, self)
|
||||
|
||||
def __pow__(self, other):
|
||||
from .ops import Power
|
||||
|
||||
return Power(self, other)
|
||||
|
||||
def __and__(self, other):
|
||||
from .ops import And
|
||||
|
||||
return And(self, other)
|
||||
|
||||
def __rand__(self, other):
|
||||
from .ops import And
|
||||
|
||||
return And(other, self)
|
||||
|
||||
def __or__(self, other):
|
||||
from .ops import Or
|
||||
|
||||
return Or(self, other)
|
||||
|
||||
def __ror__(self, other):
|
||||
from .ops import Or
|
||||
|
||||
return Or(other, self)
|
||||
|
||||
def load(self, instrument, start_index, end_index, freq):
|
||||
"""load feature
|
||||
|
||||
Parameters
|
||||
----------
|
||||
instrument : str
|
||||
instrument code
|
||||
start_index : str
|
||||
feature start index [in calendar]
|
||||
end_index : str
|
||||
feature end index [in calendar]
|
||||
freq : str
|
||||
feature frequency
|
||||
|
||||
Returns
|
||||
----------
|
||||
pd.Series
|
||||
feature series: The index of the series is the calendar index
|
||||
"""
|
||||
from .cache import H
|
||||
|
||||
# cache
|
||||
args = str(self), instrument, start_index, end_index, freq
|
||||
if args in H["f"]:
|
||||
return H["f"][args]
|
||||
if start_index is None or end_index is None or start_index > end_index:
|
||||
raise ValueError("Invalid index range: {} {}".format(start_index, end_index))
|
||||
series = self._load_internal(instrument, start_index, end_index, freq)
|
||||
series.name = str(self)
|
||||
H["f"][args] = series
|
||||
return series
|
||||
|
||||
@abc.abstractmethod
|
||||
def _load_internal(self, instrument, start_index, end_index, freq):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_longest_back_rolling(self):
|
||||
"""Get the longest length of historical data the feature has accessed
|
||||
|
||||
This is designed for getting the needed range of the data to calculate
|
||||
the features in specific range at first. However, situations like
|
||||
Ref(Ref($close, -1), 1) can not be handled rightly.
|
||||
|
||||
So this will only used for detecting the length of historical data needed.
|
||||
"""
|
||||
# TODO: forward operator like Ref($close, -1) is not supported yet.
|
||||
raise NotImplementedError("This function must be implemented in your newly defined feature")
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_extended_window_size(self):
|
||||
"""get_extend_window_size
|
||||
|
||||
For to calculate this Operator in range[start_index, end_index]
|
||||
We have to get the *leaf feature* in
|
||||
range[start_index - lft_etd, end_index + rght_etd].
|
||||
|
||||
Returns
|
||||
----------
|
||||
(int, int)
|
||||
lft_etd, rght_etd
|
||||
"""
|
||||
raise NotImplementedError("This function must be implemented in your newly defined feature")
|
||||
|
||||
|
||||
class Feature(Expression):
|
||||
"""Static Expression
|
||||
|
||||
This kind of feature will load data from provider
|
||||
"""
|
||||
|
||||
def __init__(self, name=None):
|
||||
if name:
|
||||
self._name = name.lower()
|
||||
else:
|
||||
self._name = type(self).__name__.lower()
|
||||
|
||||
def __str__(self):
|
||||
return "$" + self._name
|
||||
|
||||
def _load_internal(self, instrument, start_index, end_index, freq):
|
||||
# load
|
||||
from .data import FeatureD
|
||||
|
||||
return FeatureD.feature(instrument, str(self), start_index, end_index, freq)
|
||||
|
||||
def get_longest_back_rolling(self):
|
||||
return 0
|
||||
|
||||
def get_extended_window_size(self):
|
||||
return 0, 0
|
||||
|
||||
|
||||
@six.add_metaclass(abc.ABCMeta)
|
||||
class ExpressionOps(Expression):
|
||||
"""Operator Expression
|
||||
|
||||
This kind of feature will use operator for feature
|
||||
construction on the fly.
|
||||
"""
|
||||
|
||||
pass
|
||||
1149
qlib/data/cache.py
Normal file
1149
qlib/data/cache.py
Normal file
File diff suppressed because it is too large
Load Diff
102
qlib/data/client.py
Normal file
102
qlib/data/client.py
Normal file
@@ -0,0 +1,102 @@
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import socketio
|
||||
|
||||
from .. import __version__
|
||||
from ..log import get_module_logger
|
||||
import pickle
|
||||
|
||||
|
||||
class Client(object):
|
||||
"""A client class
|
||||
|
||||
Provide the connection tool functions for ClientProvider.
|
||||
"""
|
||||
|
||||
def __init__(self, host, port):
|
||||
super(Client, self).__init__()
|
||||
self.sio = socketio.Client()
|
||||
self.server_host = host
|
||||
self.server_port = port
|
||||
self.logger = get_module_logger(self.__class__.__name__)
|
||||
# bind connect/disconnect callbacks
|
||||
self.sio.on(
|
||||
"connect",
|
||||
lambda: self.logger.debug("Connect to server {}".format(self.sio.connection_url)),
|
||||
)
|
||||
self.sio.on("disconnect", lambda: self.logger.debug("Disconnect from server!"))
|
||||
|
||||
def connect_server(self):
|
||||
"""Connect to server."""
|
||||
try:
|
||||
self.sio.connect("ws://" + self.server_host + ":" + str(self.server_port))
|
||||
except socketio.exceptions.ConnectionError:
|
||||
self.logger.error("Cannot connect to server - check your network or server status")
|
||||
|
||||
def disconnect(self):
|
||||
"""Disconnect from server."""
|
||||
try:
|
||||
self.sio.eio.disconnect(True)
|
||||
except Exception as e:
|
||||
self.logger.error("Cannot disconnect from server : %s" % e)
|
||||
|
||||
def send_request(self, request_type, request_content, msg_queue, msg_proc_func=None):
|
||||
"""Send a certain request to server.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
request_type : str
|
||||
type of proposed request, 'calendar'/'instrument'/'feature'
|
||||
request_content : dict
|
||||
records the information of the request
|
||||
msg_proc_func : func
|
||||
the function to process the message when receiving response, should have arg `*args`
|
||||
msg_queue: Queue
|
||||
The queue to pass the messsage after callback
|
||||
"""
|
||||
head_info = {"version": __version__}
|
||||
|
||||
def request_callback(*args):
|
||||
"""callback_wrapper
|
||||
|
||||
:param *args: args[0] is the response content
|
||||
"""
|
||||
# args[0] is the response content
|
||||
self.logger.debug("receive data and enter queue")
|
||||
msg = dict(args[0])
|
||||
if msg["detailed_info"] is not None:
|
||||
if msg["status"] != 0:
|
||||
self.logger.error(msg["detailed_info"])
|
||||
else:
|
||||
self.logger.info(msg["detailed_info"])
|
||||
if msg["status"] != 0:
|
||||
ex = ValueError(f"Bad response(status=={msg['status']}), detailed info: {msg['detailed_info']}")
|
||||
msg_queue.put(ex)
|
||||
else:
|
||||
if msg_proc_func is not None:
|
||||
try:
|
||||
ret = msg_proc_func(msg["result"])
|
||||
except Exception as e:
|
||||
self.logger.exception("Error when processing message.")
|
||||
ret = e
|
||||
else:
|
||||
ret = msg["result"]
|
||||
msg_queue.put(ret)
|
||||
self.disconnect()
|
||||
self.logger.debug("disconnected")
|
||||
|
||||
self.logger.debug("try connecting")
|
||||
self.connect_server()
|
||||
self.logger.debug("connected")
|
||||
# The pickle is for passing some parameters with special type(such as
|
||||
# pd.Timestamp)
|
||||
request_content = {"head": head_info, "body": pickle.dumps(request_content)}
|
||||
self.sio.on(request_type + "_response", request_callback)
|
||||
self.logger.debug("try sending")
|
||||
self.sio.emit(request_type + "_request", request_content)
|
||||
self.sio.wait()
|
||||
1110
qlib/data/data.py
Normal file
1110
qlib/data/data.py
Normal file
File diff suppressed because it is too large
Load Diff
375
qlib/data/filter.py
Normal file
375
qlib/data/filter.py
Normal file
@@ -0,0 +1,375 @@
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
from __future__ import print_function
|
||||
from abc import abstractmethod
|
||||
|
||||
import re
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import six
|
||||
import abc
|
||||
|
||||
from .data import Cal, DatasetD
|
||||
|
||||
|
||||
@six.add_metaclass(abc.ABCMeta)
|
||||
class BaseDFilter(object):
|
||||
"""Dynamic Instruments Filter Abstract class
|
||||
|
||||
Users can override this class to construct their own filter
|
||||
|
||||
Override __init__ to input filter regulations
|
||||
|
||||
Override filter_main to use the regulations to filter instruments
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def from_config(config):
|
||||
"""Construct an instance from config dict.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
config : dict
|
||||
dict of config parameters
|
||||
"""
|
||||
raise NotImplementedError("Subclass of BaseDFilter must reimplement `from_config` method")
|
||||
|
||||
@abstractmethod
|
||||
def to_config(self):
|
||||
"""Construct an instance from config dict.
|
||||
|
||||
Returns
|
||||
----------
|
||||
dict
|
||||
return the dict of config parameters
|
||||
"""
|
||||
raise NotImplementedError("Subclass of BaseDFilter must reimplement `to_config` method")
|
||||
|
||||
|
||||
@six.add_metaclass(abc.ABCMeta)
|
||||
class SeriesDFilter(BaseDFilter):
|
||||
"""Dynamic Instruments Filter Abstract class to filter a series of certain features
|
||||
|
||||
Filters should provide parameters:
|
||||
|
||||
- filter start time
|
||||
- filter end time
|
||||
- filter rule
|
||||
|
||||
Override __init__ to assign a certain rule to filter the series.
|
||||
|
||||
Override _getFilterSeries to use the rule to filter the series and get a dict of {inst => series}, or override filter_main for more advanced series filter rule
|
||||
"""
|
||||
|
||||
def __init__(self, fstart_time=None, fend_time=None):
|
||||
"""Init function for filter base class.
|
||||
Filter a set of instruments based on a certain rule within a certain period assigned by fstart_time and fend_time.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
fstart_time: str
|
||||
the time for the filter rule to start filter the instruments
|
||||
fend_time: str
|
||||
the time for the filter rule to stop filter the instruments
|
||||
"""
|
||||
super(SeriesDFilter, self).__init__()
|
||||
self.filter_start_time = pd.Timestamp(fstart_time) if fstart_time else None
|
||||
self.filter_end_time = pd.Timestamp(fend_time) if fend_time else None
|
||||
|
||||
def _getTimeBound(self, instruments):
|
||||
"""Get time bound for all instruments.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
instruments: dict
|
||||
the dict of instruments in the form {instrument_name => list of timestamp tuple}
|
||||
|
||||
Returns
|
||||
----------
|
||||
pd.Timestamp, pd.Timestamp
|
||||
the lower time bound and upper time bound of all the instruments
|
||||
"""
|
||||
trange = Cal.calendar(freq=self.filter_freq)
|
||||
ubound, lbound = trange[0], trange[-1]
|
||||
for _, timestamp in instruments.items():
|
||||
if timestamp:
|
||||
lbound = timestamp[0][0] if timestamp[0][0] < lbound else lbound
|
||||
ubound = timestamp[-1][-1] if timestamp[-1][-1] > ubound else ubound
|
||||
return lbound, ubound
|
||||
|
||||
def _toSeries(self, time_range, target_timestamp):
|
||||
"""Convert the target timestamp to a pandas series of bool value within a time range.
|
||||
Make the time inside the target_timestamp range TRUE, others FALSE.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
time_range : D.calendar
|
||||
the time range of the instruments
|
||||
target_timestamp : list
|
||||
the list of tuple (timestamp, timestamp)
|
||||
|
||||
Returns
|
||||
----------
|
||||
pd.Series
|
||||
the series of bool value for an instrument
|
||||
"""
|
||||
# Construct a whole dict of {date => bool}
|
||||
timestamp_series = {timestamp: False for timestamp in time_range}
|
||||
# Convert to pd.Series
|
||||
timestamp_series = pd.Series(timestamp_series)
|
||||
# Fill the date within target_timestamp with TRUE
|
||||
for start, end in target_timestamp:
|
||||
timestamp_series[Cal.calendar(start_time=start, end_time=end, freq=self.filter_freq)] = True
|
||||
return timestamp_series
|
||||
|
||||
def _filterSeries(self, timestamp_series, filter_series):
|
||||
"""Filter the timestamp series with filter series by using element-wise AND operation of the two series
|
||||
|
||||
Parameters
|
||||
----------
|
||||
timestamp_series : pd.Series
|
||||
the series of bool value indicating existing time
|
||||
filter_series : pd.Series
|
||||
the series of bool value indicating filter feature
|
||||
|
||||
Returns
|
||||
----------
|
||||
pd.Series
|
||||
the series of bool value indicating whether the date satisfies the filter condition and exists in target timestamp
|
||||
"""
|
||||
fstart, fend = list(filter_series.keys())[0], list(filter_series.keys())[-1]
|
||||
timestamp_series[fstart:fend] = timestamp_series[fstart:fend] & filter_series
|
||||
return timestamp_series
|
||||
|
||||
def _toTimestamp(self, timestamp_series):
|
||||
"""Convert the timestamp series to a list of tuple (timestamp, timestamp) indicating a continuous range of TRUE
|
||||
|
||||
Parameters
|
||||
----------
|
||||
timestamp_series: pd.Series
|
||||
the series of bool value after being filtered
|
||||
|
||||
Returns
|
||||
----------
|
||||
list
|
||||
the list of tuple (timestamp, timestamp)
|
||||
"""
|
||||
# sort the timestamp_series according to the timestamps
|
||||
timestamp_series.sort_index()
|
||||
timestamp = []
|
||||
_lbool = None
|
||||
_ltime = None
|
||||
for _ts, _bool in timestamp_series.items():
|
||||
# there is likely to be NAN when the filter series don't have the
|
||||
# bool value, so we just change the NAN into False
|
||||
if _bool == np.nan:
|
||||
_bool = False
|
||||
if _lbool is None:
|
||||
_cur_start = _ts
|
||||
_lbool = _bool
|
||||
_ltime = _ts
|
||||
continue
|
||||
if (_lbool, _bool) == (True, False):
|
||||
if _cur_start:
|
||||
timestamp.append((_cur_start, _ltime))
|
||||
elif (_lbool, _bool) == (False, True):
|
||||
_cur_start = _ts
|
||||
_lbool = _bool
|
||||
_ltime = _ts
|
||||
if _lbool:
|
||||
timestamp.append((_cur_start, _ltime))
|
||||
return timestamp
|
||||
|
||||
def __call__(self, instruments, start_time=None, end_time=None, freq="day"):
|
||||
"""Call this filter to get filtered instruments list"""
|
||||
self.filter_freq = freq
|
||||
return self.filter_main(instruments, start_time, end_time)
|
||||
|
||||
@abstractmethod
|
||||
def _getFilterSeries(self, instruments, fstart, fend):
|
||||
"""Get filter series based on the rules assigned during the initialization and the input time range.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
instruments : dict
|
||||
the dict of instruments to be filtered
|
||||
fstart : pd.Timestamp
|
||||
start time of filter
|
||||
fend : pd.Timestamp
|
||||
end time of filter
|
||||
|
||||
.. note:: fstart/fend indicates the intersection of instruments start/end time and filter start/end time
|
||||
|
||||
Returns
|
||||
----------
|
||||
pd.Dataframe
|
||||
a series of {pd.Timestamp => bool}
|
||||
"""
|
||||
raise NotImplementedError("Subclass of SeriesDFilter must reimplement `getFilterSeries` method")
|
||||
|
||||
def filter_main(self, instruments, start_time=None, end_time=None):
|
||||
"""Implement this method to filter the instruments.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
instruments: dict
|
||||
input instruments to be filtered
|
||||
start_time: str
|
||||
start of the time range
|
||||
end_time: str
|
||||
end of the time range
|
||||
|
||||
Returns
|
||||
----------
|
||||
dict
|
||||
filtered instruments, same structure as input instruments
|
||||
"""
|
||||
lbound, ubound = self._getTimeBound(instruments)
|
||||
start_time = pd.Timestamp(start_time or lbound)
|
||||
end_time = pd.Timestamp(end_time or ubound)
|
||||
_instruments_filtered = {}
|
||||
_all_calendar = Cal.calendar(start_time=start_time, end_time=end_time, freq=self.filter_freq)
|
||||
_filter_calendar = Cal.calendar(
|
||||
start_time=self.filter_start_time and max(self.filter_start_time, _all_calendar[0]) or _all_calendar[0],
|
||||
end_time=self.filter_end_time and min(self.filter_end_time, _all_calendar[-1]) or _all_calendar[-1],
|
||||
freq=self.filter_freq,
|
||||
)
|
||||
_all_filter_series = self._getFilterSeries(instruments, _filter_calendar[0], _filter_calendar[-1])
|
||||
for inst, timestamp in instruments.items():
|
||||
# Construct a whole map of date
|
||||
_timestamp_series = self._toSeries(_all_calendar, timestamp)
|
||||
# Get filter series
|
||||
if inst in _all_filter_series:
|
||||
_filter_series = _all_filter_series[inst]
|
||||
else:
|
||||
if self.keep:
|
||||
_filter_series = pd.Series({timestamp: True for timestamp in _filter_calendar})
|
||||
else:
|
||||
_filter_series = pd.Series({timestamp: False for timestamp in _filter_calendar})
|
||||
# Calculate bool value within the range of filter
|
||||
_timestamp_series = self._filterSeries(_timestamp_series, _filter_series)
|
||||
# Reform the map to (start_timestamp, end_timestamp) format
|
||||
_timestamp = self._toTimestamp(_timestamp_series)
|
||||
# Remove empty timestamp
|
||||
if _timestamp:
|
||||
_instruments_filtered[inst] = _timestamp
|
||||
return _instruments_filtered
|
||||
|
||||
|
||||
class NameDFilter(SeriesDFilter):
|
||||
"""Name dynamic instrument filter
|
||||
|
||||
Filter the instruments based on a regulated name format.
|
||||
|
||||
A name rule regular expression is required.
|
||||
"""
|
||||
|
||||
def __init__(self, name_rule_re, fstart_time=None, fend_time=None):
|
||||
"""Init function for name filter class
|
||||
|
||||
params:
|
||||
------
|
||||
name_rule_re: str
|
||||
regular expression for the name rule
|
||||
"""
|
||||
super(NameDFilter, self).__init__(fstart_time, fend_time)
|
||||
self.name_rule_re = name_rule_re
|
||||
|
||||
def _getFilterSeries(self, instruments, fstart, fend):
|
||||
all_filter_series = {}
|
||||
filter_calendar = Cal.calendar(start_time=fstart, end_time=fend, freq=self.filter_freq)
|
||||
for inst, timestamp in instruments.items():
|
||||
if re.match(self.name_rule_re, inst):
|
||||
_filter_series = pd.Series({timestamp: True for timestamp in filter_calendar})
|
||||
else:
|
||||
_filter_series = pd.Series({timestamp: False for timestamp in filter_calendar})
|
||||
all_filter_series[inst] = _filter_series
|
||||
return all_filter_series
|
||||
|
||||
@staticmethod
|
||||
def from_config(config):
|
||||
return NameDFilter(
|
||||
name_rule_re=config["name_rule_re"],
|
||||
fstart_time=config["filter_start_time"],
|
||||
fend_time=config["filter_end_time"],
|
||||
)
|
||||
|
||||
def to_config(self):
|
||||
return {
|
||||
"filter_type": "NameDFilter",
|
||||
"name_rule_re": self.name_rule_re,
|
||||
"filter_start_time": str(self.filter_start_time) if self.filter_start_time else self.filter_start_time,
|
||||
"filter_end_time": str(self.filter_end_time) if self.filter_end_time else self.filter_end_time,
|
||||
}
|
||||
|
||||
|
||||
class ExpressionDFilter(SeriesDFilter):
|
||||
"""Expression dynamic instrument filter
|
||||
|
||||
Filter the instruments based on a certain expression.
|
||||
|
||||
An expression rule indicating a certain feature field is required.
|
||||
|
||||
Examples
|
||||
----------
|
||||
- *basic features filter* : rule_expression = '$close/$open>5'
|
||||
- *cross-sectional features filter* : rule_expression = '$rank($close)<10'
|
||||
- *time-sequence features filter* : rule_expression = '$Ref($close, 3)>100'
|
||||
"""
|
||||
|
||||
def __init__(self, rule_expression, fstart_time=None, fend_time=None, keep=False):
|
||||
"""Init function for expression filter class
|
||||
|
||||
params:
|
||||
------
|
||||
fstart_time: str
|
||||
filter the feature starting from this time
|
||||
fend_time: str
|
||||
filter the feature ending by this time
|
||||
rule_expression: str
|
||||
an input expression for the rule
|
||||
keep: bool
|
||||
whether to keep the instruments of which features don't exist in the filter time span
|
||||
"""
|
||||
super(ExpressionDFilter, self).__init__(fstart_time, fend_time)
|
||||
self.rule_expression = rule_expression
|
||||
self.keep = keep
|
||||
|
||||
def _getFilterSeries(self, instruments, fstart, fend):
|
||||
# do not use dataset cache
|
||||
try:
|
||||
_features = DatasetD.dataset(
|
||||
instruments,
|
||||
[self.rule_expression],
|
||||
fstart,
|
||||
fend,
|
||||
freq=self.filter_freq,
|
||||
disk_cache=0,
|
||||
)
|
||||
except TypeError:
|
||||
# use LocalDatasetProvider
|
||||
_features = DatasetD.dataset(instruments, [self.rule_expression], fstart, fend, freq=self.filter_freq)
|
||||
rule_expression_field_name = list(_features.keys())[0]
|
||||
all_filter_series = _features[rule_expression_field_name]
|
||||
return all_filter_series
|
||||
|
||||
def from_config(config):
|
||||
return ExpressionDFilter(
|
||||
rule_expression=config["rule_expression"],
|
||||
fstart_time=config["filter_start_time"],
|
||||
fend_time=config["filter_end_time"],
|
||||
keep=config["keep"],
|
||||
)
|
||||
|
||||
def to_config(self):
|
||||
return {
|
||||
"filter_type": "ExpressionDFilter",
|
||||
"rule_expression": self.rule_expression,
|
||||
"filter_start_time": str(self.filter_start_time) if self.filter_start_time else self.filter_start_time,
|
||||
"filter_end_time": str(self.filter_end_time) if self.filter_end_time else self.filter_end_time,
|
||||
"keep": self.keep,
|
||||
}
|
||||
1405
qlib/data/ops.py
Normal file
1405
qlib/data/ops.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user