mirror of
https://github.com/microsoft/qlib.git
synced 2026-07-05 12:00:58 +08:00
release-0.5.0 (#1)
* init commit * change the version number * rich the docs&fix cache docs * update index readme * Modify cache class name * Modify sharpe to information_ratio * Modify Group- to Group * add the description of graphical results & fix the backtest docs * fix docs in details * update docs * Update introduction.rst * Update README.md * Update introduction.rst * Update introduction.rst * Update introduction.rst * Update installation.rst * Update installation.rst * Update initialization.rst * Update getdata.rst * Update integration.rst * Update initialization.rst * Update getdata.rst * Update estimator.rst Modify some typos. * Update README.md Modify the typos. * Update initialization.rst * Update data.rst * Update report.rst * Update estimator.rst * Update cumulative_return.py * Update model.rst * Update rank_label.py * Update cumulative_return.py * Update strategy.rst * Update getdata.rst * Update backtest.rst * Update integration.rst * Update getdata.rst * Update introduction.rst * Update introduction.rst * Update README.md * Update report.rst * Update integration.rst Fix typos * Update installation.rst Fix typos * Update getdata.rst * Update initialization.rst Fix typos. * add quick start docs&fix detials * fix estimator docs & fix strategy docs * fix the cahce in data.rst * update documents * Fix Corr && Rsquare * fix data retrival example to csi300 & fix a data bug * fix filter bug * Fix data collector * Modift model args * add the log & fix README.md\quick.rst * add enviroment depend & add intoduction of qlib-server online mode * fix image center fomat & set log_only of docs is True * fix README.md format * update data preparation & readme logo image * get_data support version * Modify analysis names * Modify analysis graph * update report.rst & data.rst * commmit estimator for merge * minimal requirements * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Update READEME.md * Update READEME.md * update estimator * Fix doc urls * fix get_data.py docstring * update test_get_data.py * Upate docs * Upate docs * Upate docs Co-authored-by: bxdd <bxddream@gmail.com> Co-authored-by: zhupr <zhu.pengrong@foxmail.com> Co-authored-by: Wendi Li <wendili.academic@qq.com> Co-authored-by: Dingsu Wang <dingsu.wang@gmail.com> Co-authored-by: bxdd <45119470+bxdd@users.noreply.github.com> Co-authored-by: cslwqxx <cslwqxx@users.noreply.github.com>
This commit is contained in:
@@ -28,9 +28,9 @@ from .data import (
|
||||
from .cache import (
|
||||
ExpressionCache,
|
||||
DatasetCache,
|
||||
ServerExpressionCache,
|
||||
ServerDatasetCache,
|
||||
DiskExpressionCache,
|
||||
DiskDatasetCache,
|
||||
SimpleDatasetCache,
|
||||
ClientDatasetCache,
|
||||
ClientCalendarCache,
|
||||
DatasetURICache,
|
||||
MemoryCalendarCache,
|
||||
)
|
||||
|
||||
@@ -385,11 +385,11 @@ class DatasetCache(BaseProviderCache):
|
||||
return instruments, fields, freq
|
||||
|
||||
|
||||
class ServerExpressionCache(ExpressionCache):
|
||||
class DiskExpressionCache(ExpressionCache):
|
||||
"""Prepared cache mechanism for server."""
|
||||
|
||||
def __init__(self, provider, **kwargs):
|
||||
super(ServerExpressionCache, self).__init__(provider)
|
||||
super(DiskExpressionCache, self).__init__(provider)
|
||||
self.r = get_redis_connection()
|
||||
# remote==True means client is using this module, writing behaviour will not be allowed.
|
||||
self.remote = kwargs.get("remote", False)
|
||||
@@ -575,11 +575,11 @@ class ServerExpressionCache(ExpressionCache):
|
||||
return 0
|
||||
|
||||
|
||||
class ServerDatasetCache(DatasetCache):
|
||||
class DiskDatasetCache(DatasetCache):
|
||||
"""Prepared cache mechanism for server."""
|
||||
|
||||
def __init__(self, provider, **kwargs):
|
||||
super(ServerDatasetCache, self).__init__(provider)
|
||||
super(DiskDatasetCache, self).__init__(provider)
|
||||
self.r = get_redis_connection()
|
||||
self.remote = kwargs.get("remote", False)
|
||||
if self.remote:
|
||||
@@ -612,7 +612,7 @@ class ServerDatasetCache(DatasetCache):
|
||||
:return:
|
||||
"""
|
||||
|
||||
im = ServerDatasetCache.IndexManager(cache_path)
|
||||
im = DiskDatasetCache.IndexManager(cache_path)
|
||||
index_data = im.get_index(start_time, end_time)
|
||||
if index_data.shape[0] > 0:
|
||||
start, stop = (
|
||||
@@ -625,9 +625,7 @@ class ServerDatasetCache(DatasetCache):
|
||||
with pd.HDFStore(cache_path, mode="r") as store:
|
||||
if "/{}".format(im.KEY) in store.keys():
|
||||
df = store.select(key=im.KEY, start=start, stop=stop)
|
||||
df.reset_index(inplace=True)
|
||||
df.set_index(["instrument", "datetime"], inplace=True)
|
||||
df.sort_index(inplace=True)
|
||||
df = df.swaplevel("datetime", "instrument").sort_index()
|
||||
# read cache and need to replace not-space fields to field
|
||||
df = cls.cache_to_origin_data(df, fields)
|
||||
|
||||
@@ -684,10 +682,7 @@ class ServerDatasetCache(DatasetCache):
|
||||
freq=freq,
|
||||
)
|
||||
if not features.empty:
|
||||
features.reset_index(inplace=True)
|
||||
features.set_index(["datetime", "instrument"], inplace=True)
|
||||
features.sort_index(inplace=True)
|
||||
features = features.loc[start_time:end_time]
|
||||
features = features.sort_index().loc(axis=0)[:, start_time:end_time]
|
||||
return features
|
||||
|
||||
def _dataset_uri(
|
||||
@@ -851,11 +846,11 @@ class ServerDatasetCache(DatasetCache):
|
||||
|
||||
features = self.provider.dataset(instruments, fields, _calendar[0], _calendar[-1], freq)
|
||||
|
||||
# sort index by datetime
|
||||
if not features.empty:
|
||||
features.reset_index(inplace=True)
|
||||
features.set_index(["datetime", "instrument"], inplace=True)
|
||||
features.sort_index(inplace=True)
|
||||
if features.empty:
|
||||
return features
|
||||
|
||||
# swap index and sorted
|
||||
features = features.swaplevel("instrument", "datetime").sort_index()
|
||||
|
||||
# write cache data
|
||||
with pd.HDFStore(cache_path + ".data") as store:
|
||||
@@ -881,7 +876,7 @@ class ServerDatasetCache(DatasetCache):
|
||||
pickle.dump(meta, f)
|
||||
os.chmod(cache_path + ".meta", stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH)
|
||||
# write index file
|
||||
im = ServerDatasetCache.IndexManager(cache_path)
|
||||
im = DiskDatasetCache.IndexManager(cache_path)
|
||||
index_data = im.build_index_from_data(features)
|
||||
im.update(index_data)
|
||||
|
||||
@@ -890,7 +885,7 @@ class ServerDatasetCache(DatasetCache):
|
||||
# temporarily
|
||||
os.replace(cache_path + ".data", cache_path)
|
||||
# the fields of the cached features are converted to the original fields
|
||||
return features
|
||||
return features.swaplevel("datetime", "instrument")
|
||||
|
||||
def update(self, cache_uri):
|
||||
cp_cache_uri = os.path.join(self.dtst_cache_path, cache_uri)
|
||||
@@ -900,7 +895,7 @@ class ServerDatasetCache(DatasetCache):
|
||||
self.clear_cache(cp_cache_uri)
|
||||
return 2
|
||||
|
||||
im = ServerDatasetCache.IndexManager(cp_cache_uri)
|
||||
im = DiskDatasetCache.IndexManager(cp_cache_uri)
|
||||
with CacheUtils.writer_lock(self.r, "dataset-%s" % cache_uri):
|
||||
with open(cp_cache_uri + ".meta", "rb") as f:
|
||||
d = pickle.load(f)
|
||||
@@ -1061,11 +1056,11 @@ class SimpleDatasetCache(DatasetCache):
|
||||
return self.cache_to_origin_data(data, fields)
|
||||
|
||||
|
||||
class ClientDatasetCache(DatasetCache):
|
||||
class DatasetURICache(DatasetCache):
|
||||
"""Prepared cache mechanism for server."""
|
||||
|
||||
def __init__(self, provider):
|
||||
super(ClientDatasetCache, self).__init__(provider)
|
||||
super(DatasetURICache, self).__init__(provider)
|
||||
|
||||
def _uri(self, instruments, fields, start_time, end_time, freq, disk_cache=1, **kwargs):
|
||||
return hash_args(*self.normalize_uri_args(instruments, fields, freq), disk_cache)
|
||||
@@ -1117,7 +1112,7 @@ class ClientDatasetCache(DatasetCache):
|
||||
get_module_logger("cache").debug(f"get feature from {C.dataset_provider}")
|
||||
else:
|
||||
mnt_feature_uri = os.path.join(C.mount_path, C.dataset_cache_dir_name, feature_uri)
|
||||
df = ServerDatasetCache.read_data_from_cache(mnt_feature_uri, start_time, end_time, fields)
|
||||
df = DiskDatasetCache.read_data_from_cache(mnt_feature_uri, start_time, end_time, fields)
|
||||
get_module_logger("cache").debug("get feature from uri cache")
|
||||
|
||||
return df
|
||||
@@ -1127,7 +1122,7 @@ class CalendarCache(BaseProviderCache):
|
||||
pass
|
||||
|
||||
|
||||
class ClientCalendarCache(CalendarCache):
|
||||
class MemoryCalendarCache(CalendarCache):
|
||||
def calendar(self, start_time=None, end_time=None, freq="day", future=False):
|
||||
uri = self._uri(start_time, end_time, freq, future)
|
||||
result, expire = MemCacheExpire.get_cache(H["c"], uri)
|
||||
|
||||
@@ -24,7 +24,7 @@ from .ops import *
|
||||
from ..log import get_module_logger
|
||||
from ..utils import parse_field, read_bin, hash_args, normalize_cache_fields
|
||||
from .base import Feature
|
||||
from .cache import ServerDatasetCache, ServerExpressionCache
|
||||
from .cache import DiskDatasetCache, DiskExpressionCache
|
||||
|
||||
|
||||
@six.add_metaclass(abc.ABCMeta)
|
||||
@@ -357,7 +357,7 @@ class DatasetProvider(object):
|
||||
whether to skip(0)/use(1)/replace(2) disk_cache
|
||||
|
||||
"""
|
||||
return ServerDatasetCache._uri(instruments, fields, start_time, end_time, freq, disk_cache)
|
||||
return DiskDatasetCache._uri(instruments, fields, start_time, end_time, freq, disk_cache)
|
||||
|
||||
@staticmethod
|
||||
def get_instruments_d(instruments, freq):
|
||||
@@ -452,7 +452,7 @@ class DatasetProvider(object):
|
||||
|
||||
if len(new_data) > 0:
|
||||
data = pd.concat(new_data, names=["instrument"], sort=False)
|
||||
data = ServerDatasetCache.cache_to_origin_data(data, column_names)
|
||||
data = DiskDatasetCache.cache_to_origin_data(data, column_names)
|
||||
else:
|
||||
data = pd.DataFrame(columns=column_names)
|
||||
|
||||
@@ -915,7 +915,7 @@ class ClientDatasetProvider(DatasetProvider):
|
||||
try:
|
||||
# pre-mound nfs, used for demo
|
||||
mnt_feature_uri = os.path.join(C.mount_path, C.dataset_cache_dir_name, feature_uri)
|
||||
df = ServerDatasetCache.read_data_from_cache(mnt_feature_uri, start_time, end_time, fields)
|
||||
df = DiskDatasetCache.read_data_from_cache(mnt_feature_uri, start_time, end_time, fields)
|
||||
get_module_logger("data").debug("finish slicing data")
|
||||
if return_uri:
|
||||
return df, feature_uri
|
||||
|
||||
@@ -142,6 +142,7 @@ class SeriesDFilter(BaseDFilter):
|
||||
the series of bool value indicating whether the date satisfies the filter condition and exists in target timestamp
|
||||
"""
|
||||
fstart, fend = list(filter_series.keys())[0], list(filter_series.keys())[-1]
|
||||
filter_series = filter_series.astype('bool') # Make sure the filter_series is boolean
|
||||
timestamp_series[fstart:fend] = timestamp_series[fstart:fend] & filter_series
|
||||
return timestamp_series
|
||||
|
||||
|
||||
@@ -914,10 +914,7 @@ class IdxMax(Rolling):
|
||||
if self.N == 0:
|
||||
series = series.expanding(min_periods=1).apply(lambda x: x.argmax() + 1, raw=True)
|
||||
else:
|
||||
series = series.rolling(self.N, min_periods=1).apply(
|
||||
lambda x: x.argmax() + 1,
|
||||
raw=True,
|
||||
)
|
||||
series = series.rolling(self.N, min_periods=1).apply(lambda x: x.argmax() + 1, raw=True)
|
||||
return series
|
||||
|
||||
|
||||
@@ -965,10 +962,7 @@ class IdxMin(Rolling):
|
||||
if self.N == 0:
|
||||
series = series.expanding(min_periods=1).apply(lambda x: x.argmin() + 1, raw=True)
|
||||
else:
|
||||
series = series.rolling(self.N, min_periods=1).apply(
|
||||
lambda x: x.argmin() + 1,
|
||||
raw=True,
|
||||
)
|
||||
series = series.rolling(self.N, min_periods=1).apply(lambda x: x.argmin() + 1, raw=True)
|
||||
return series
|
||||
|
||||
|
||||
@@ -1194,11 +1188,12 @@ class Rsquare(Rolling):
|
||||
super(Rsquare, self).__init__(feature, N, "rsquare")
|
||||
|
||||
def _load_internal(self, instrument, start_index, end_index, freq):
|
||||
series = self.feature.load(instrument, start_index, end_index, freq)
|
||||
_series = self.feature.load(instrument, start_index, end_index, freq)
|
||||
if self.N == 0:
|
||||
series = pd.Series(expanding_rsquare(series.values), index=series.index)
|
||||
series = pd.Series(expanding_rsquare(_series.values), index=_series.index)
|
||||
else:
|
||||
series = pd.Series(rolling_rsquare(series.values, self.N), index=series.index)
|
||||
series = pd.Series(rolling_rsquare(_series.values, self.N), index=_series.index)
|
||||
series.loc[np.isclose(_series.rolling(self.N, min_periods=1).std(), 0, atol=2e-05)] = np.nan
|
||||
return series
|
||||
|
||||
|
||||
@@ -1341,12 +1336,7 @@ class PairRolling(ExpressionOps):
|
||||
if self.N == 0:
|
||||
return np.inf
|
||||
return (
|
||||
max(
|
||||
self.feature_left.get_longest_back_rolling(),
|
||||
self.feature_right.get_longest_back_rolling(),
|
||||
)
|
||||
+ self.N
|
||||
- 1
|
||||
max(self.feature_left.get_longest_back_rolling(), self.feature_right.get_longest_back_rolling()) + self.N - 1
|
||||
)
|
||||
|
||||
def get_extended_window_size(self):
|
||||
@@ -1382,6 +1372,18 @@ class Corr(PairRolling):
|
||||
def __init__(self, feature_left, feature_right, N):
|
||||
super(Corr, self).__init__(feature_left, feature_right, N, "corr")
|
||||
|
||||
def _load_internal(self, instrument, start_index, end_index, freq):
|
||||
res = super(Corr, self)._load_internal(instrument, start_index, end_index, freq)
|
||||
|
||||
# NOTE: Load uses MemCache, so calling load again will not cause performance degradation
|
||||
series_left = self.feature_left.load(instrument, start_index, end_index, freq)
|
||||
series_right = self.feature_right.load(instrument, start_index, end_index, freq)
|
||||
res.loc[
|
||||
np.isclose(series_left.rolling(self.N, min_periods=1).std(), 0, atol=2e-05)
|
||||
| np.isclose(series_right.rolling(self.N, min_periods=1).std(), 0, atol=2e-05)
|
||||
] = np.nan
|
||||
return res
|
||||
|
||||
|
||||
class Cov(PairRolling):
|
||||
"""Rolling Covariance
|
||||
@@ -1403,3 +1405,4 @@ class Cov(PairRolling):
|
||||
|
||||
def __init__(self, feature_left, feature_right, N):
|
||||
super(Cov, self).__init__(feature_left, feature_right, N, "cov")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user