1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-07-05 12:00:58 +08:00

release-0.5.0 (#1)

* init commit

* change the version number

* rich the docs&fix cache docs

* update index readme

* Modify cache class name

* Modify sharpe to information_ratio

* Modify Group- to Group

* add the description of graphical results & fix the backtest docs

* fix docs in details

* update docs

* Update introduction.rst

* Update README.md

* Update introduction.rst

* Update introduction.rst

* Update introduction.rst

* Update installation.rst

* Update installation.rst

* Update initialization.rst

* Update getdata.rst

* Update integration.rst

* Update initialization.rst

* Update getdata.rst

* Update estimator.rst

Modify some typos.

* Update README.md

Modify the typos.

* Update initialization.rst

* Update data.rst

* Update report.rst

* Update estimator.rst

* Update cumulative_return.py

* Update model.rst

* Update rank_label.py

* Update cumulative_return.py

* Update strategy.rst

* Update getdata.rst

* Update backtest.rst

* Update integration.rst

* Update getdata.rst

* Update introduction.rst

* Update introduction.rst

* Update README.md

* Update report.rst

* Update integration.rst

Fix typos

* Update installation.rst

Fix typos

* Update getdata.rst

* Update initialization.rst

Fix typos.

* add quick start docs&fix detials

* fix estimator docs & fix strategy docs

* fix the cahce in data.rst

* update documents

* Fix Corr && Rsquare

* fix data retrival example to csi300 & fix a data bug

* fix filter bug

* Fix data collector

* Modift model args

* add the log & fix README.md\quick.rst

* add enviroment depend & add intoduction of qlib-server online mode

* fix image center fomat & set log_only of docs is True

* fix README.md format

* update data preparation & readme logo image

* get_data support version

* Modify analysis names

* Modify analysis graph

* update report.rst & data.rst

* commmit estimator for merge

* minimal requirements

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Update READEME.md

* Update READEME.md

* update estimator

* Fix doc urls

* fix get_data.py docstring

* update test_get_data.py

* Upate docs

* Upate docs

* Upate docs

Co-authored-by: bxdd <bxddream@gmail.com>
Co-authored-by: zhupr <zhu.pengrong@foxmail.com>
Co-authored-by: Wendi Li <wendili.academic@qq.com>
Co-authored-by: Dingsu Wang <dingsu.wang@gmail.com>
Co-authored-by: bxdd <45119470+bxdd@users.noreply.github.com>
Co-authored-by: cslwqxx <cslwqxx@users.noreply.github.com>
This commit is contained in:
you-n-g
2020-09-23 23:01:39 -05:00
committed by GitHub
parent 99ebd87cba
commit de9e13b171
82 changed files with 1580 additions and 1145 deletions

View File

@@ -28,9 +28,9 @@ from .data import (
from .cache import (
ExpressionCache,
DatasetCache,
ServerExpressionCache,
ServerDatasetCache,
DiskExpressionCache,
DiskDatasetCache,
SimpleDatasetCache,
ClientDatasetCache,
ClientCalendarCache,
DatasetURICache,
MemoryCalendarCache,
)

View File

@@ -385,11 +385,11 @@ class DatasetCache(BaseProviderCache):
return instruments, fields, freq
class ServerExpressionCache(ExpressionCache):
class DiskExpressionCache(ExpressionCache):
"""Prepared cache mechanism for server."""
def __init__(self, provider, **kwargs):
super(ServerExpressionCache, self).__init__(provider)
super(DiskExpressionCache, self).__init__(provider)
self.r = get_redis_connection()
# remote==True means client is using this module, writing behaviour will not be allowed.
self.remote = kwargs.get("remote", False)
@@ -575,11 +575,11 @@ class ServerExpressionCache(ExpressionCache):
return 0
class ServerDatasetCache(DatasetCache):
class DiskDatasetCache(DatasetCache):
"""Prepared cache mechanism for server."""
def __init__(self, provider, **kwargs):
super(ServerDatasetCache, self).__init__(provider)
super(DiskDatasetCache, self).__init__(provider)
self.r = get_redis_connection()
self.remote = kwargs.get("remote", False)
if self.remote:
@@ -612,7 +612,7 @@ class ServerDatasetCache(DatasetCache):
:return:
"""
im = ServerDatasetCache.IndexManager(cache_path)
im = DiskDatasetCache.IndexManager(cache_path)
index_data = im.get_index(start_time, end_time)
if index_data.shape[0] > 0:
start, stop = (
@@ -625,9 +625,7 @@ class ServerDatasetCache(DatasetCache):
with pd.HDFStore(cache_path, mode="r") as store:
if "/{}".format(im.KEY) in store.keys():
df = store.select(key=im.KEY, start=start, stop=stop)
df.reset_index(inplace=True)
df.set_index(["instrument", "datetime"], inplace=True)
df.sort_index(inplace=True)
df = df.swaplevel("datetime", "instrument").sort_index()
# read cache and need to replace not-space fields to field
df = cls.cache_to_origin_data(df, fields)
@@ -684,10 +682,7 @@ class ServerDatasetCache(DatasetCache):
freq=freq,
)
if not features.empty:
features.reset_index(inplace=True)
features.set_index(["datetime", "instrument"], inplace=True)
features.sort_index(inplace=True)
features = features.loc[start_time:end_time]
features = features.sort_index().loc(axis=0)[:, start_time:end_time]
return features
def _dataset_uri(
@@ -851,11 +846,11 @@ class ServerDatasetCache(DatasetCache):
features = self.provider.dataset(instruments, fields, _calendar[0], _calendar[-1], freq)
# sort index by datetime
if not features.empty:
features.reset_index(inplace=True)
features.set_index(["datetime", "instrument"], inplace=True)
features.sort_index(inplace=True)
if features.empty:
return features
# swap index and sorted
features = features.swaplevel("instrument", "datetime").sort_index()
# write cache data
with pd.HDFStore(cache_path + ".data") as store:
@@ -881,7 +876,7 @@ class ServerDatasetCache(DatasetCache):
pickle.dump(meta, f)
os.chmod(cache_path + ".meta", stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH)
# write index file
im = ServerDatasetCache.IndexManager(cache_path)
im = DiskDatasetCache.IndexManager(cache_path)
index_data = im.build_index_from_data(features)
im.update(index_data)
@@ -890,7 +885,7 @@ class ServerDatasetCache(DatasetCache):
# temporarily
os.replace(cache_path + ".data", cache_path)
# the fields of the cached features are converted to the original fields
return features
return features.swaplevel("datetime", "instrument")
def update(self, cache_uri):
cp_cache_uri = os.path.join(self.dtst_cache_path, cache_uri)
@@ -900,7 +895,7 @@ class ServerDatasetCache(DatasetCache):
self.clear_cache(cp_cache_uri)
return 2
im = ServerDatasetCache.IndexManager(cp_cache_uri)
im = DiskDatasetCache.IndexManager(cp_cache_uri)
with CacheUtils.writer_lock(self.r, "dataset-%s" % cache_uri):
with open(cp_cache_uri + ".meta", "rb") as f:
d = pickle.load(f)
@@ -1061,11 +1056,11 @@ class SimpleDatasetCache(DatasetCache):
return self.cache_to_origin_data(data, fields)
class ClientDatasetCache(DatasetCache):
class DatasetURICache(DatasetCache):
"""Prepared cache mechanism for server."""
def __init__(self, provider):
super(ClientDatasetCache, self).__init__(provider)
super(DatasetURICache, self).__init__(provider)
def _uri(self, instruments, fields, start_time, end_time, freq, disk_cache=1, **kwargs):
return hash_args(*self.normalize_uri_args(instruments, fields, freq), disk_cache)
@@ -1117,7 +1112,7 @@ class ClientDatasetCache(DatasetCache):
get_module_logger("cache").debug(f"get feature from {C.dataset_provider}")
else:
mnt_feature_uri = os.path.join(C.mount_path, C.dataset_cache_dir_name, feature_uri)
df = ServerDatasetCache.read_data_from_cache(mnt_feature_uri, start_time, end_time, fields)
df = DiskDatasetCache.read_data_from_cache(mnt_feature_uri, start_time, end_time, fields)
get_module_logger("cache").debug("get feature from uri cache")
return df
@@ -1127,7 +1122,7 @@ class CalendarCache(BaseProviderCache):
pass
class ClientCalendarCache(CalendarCache):
class MemoryCalendarCache(CalendarCache):
def calendar(self, start_time=None, end_time=None, freq="day", future=False):
uri = self._uri(start_time, end_time, freq, future)
result, expire = MemCacheExpire.get_cache(H["c"], uri)

View File

@@ -24,7 +24,7 @@ from .ops import *
from ..log import get_module_logger
from ..utils import parse_field, read_bin, hash_args, normalize_cache_fields
from .base import Feature
from .cache import ServerDatasetCache, ServerExpressionCache
from .cache import DiskDatasetCache, DiskExpressionCache
@six.add_metaclass(abc.ABCMeta)
@@ -357,7 +357,7 @@ class DatasetProvider(object):
whether to skip(0)/use(1)/replace(2) disk_cache
"""
return ServerDatasetCache._uri(instruments, fields, start_time, end_time, freq, disk_cache)
return DiskDatasetCache._uri(instruments, fields, start_time, end_time, freq, disk_cache)
@staticmethod
def get_instruments_d(instruments, freq):
@@ -452,7 +452,7 @@ class DatasetProvider(object):
if len(new_data) > 0:
data = pd.concat(new_data, names=["instrument"], sort=False)
data = ServerDatasetCache.cache_to_origin_data(data, column_names)
data = DiskDatasetCache.cache_to_origin_data(data, column_names)
else:
data = pd.DataFrame(columns=column_names)
@@ -915,7 +915,7 @@ class ClientDatasetProvider(DatasetProvider):
try:
# pre-mound nfs, used for demo
mnt_feature_uri = os.path.join(C.mount_path, C.dataset_cache_dir_name, feature_uri)
df = ServerDatasetCache.read_data_from_cache(mnt_feature_uri, start_time, end_time, fields)
df = DiskDatasetCache.read_data_from_cache(mnt_feature_uri, start_time, end_time, fields)
get_module_logger("data").debug("finish slicing data")
if return_uri:
return df, feature_uri

View File

@@ -142,6 +142,7 @@ class SeriesDFilter(BaseDFilter):
the series of bool value indicating whether the date satisfies the filter condition and exists in target timestamp
"""
fstart, fend = list(filter_series.keys())[0], list(filter_series.keys())[-1]
filter_series = filter_series.astype('bool') # Make sure the filter_series is boolean
timestamp_series[fstart:fend] = timestamp_series[fstart:fend] & filter_series
return timestamp_series

View File

@@ -914,10 +914,7 @@ class IdxMax(Rolling):
if self.N == 0:
series = series.expanding(min_periods=1).apply(lambda x: x.argmax() + 1, raw=True)
else:
series = series.rolling(self.N, min_periods=1).apply(
lambda x: x.argmax() + 1,
raw=True,
)
series = series.rolling(self.N, min_periods=1).apply(lambda x: x.argmax() + 1, raw=True)
return series
@@ -965,10 +962,7 @@ class IdxMin(Rolling):
if self.N == 0:
series = series.expanding(min_periods=1).apply(lambda x: x.argmin() + 1, raw=True)
else:
series = series.rolling(self.N, min_periods=1).apply(
lambda x: x.argmin() + 1,
raw=True,
)
series = series.rolling(self.N, min_periods=1).apply(lambda x: x.argmin() + 1, raw=True)
return series
@@ -1194,11 +1188,12 @@ class Rsquare(Rolling):
super(Rsquare, self).__init__(feature, N, "rsquare")
def _load_internal(self, instrument, start_index, end_index, freq):
series = self.feature.load(instrument, start_index, end_index, freq)
_series = self.feature.load(instrument, start_index, end_index, freq)
if self.N == 0:
series = pd.Series(expanding_rsquare(series.values), index=series.index)
series = pd.Series(expanding_rsquare(_series.values), index=_series.index)
else:
series = pd.Series(rolling_rsquare(series.values, self.N), index=series.index)
series = pd.Series(rolling_rsquare(_series.values, self.N), index=_series.index)
series.loc[np.isclose(_series.rolling(self.N, min_periods=1).std(), 0, atol=2e-05)] = np.nan
return series
@@ -1341,12 +1336,7 @@ class PairRolling(ExpressionOps):
if self.N == 0:
return np.inf
return (
max(
self.feature_left.get_longest_back_rolling(),
self.feature_right.get_longest_back_rolling(),
)
+ self.N
- 1
max(self.feature_left.get_longest_back_rolling(), self.feature_right.get_longest_back_rolling()) + self.N - 1
)
def get_extended_window_size(self):
@@ -1382,6 +1372,18 @@ class Corr(PairRolling):
def __init__(self, feature_left, feature_right, N):
super(Corr, self).__init__(feature_left, feature_right, N, "corr")
def _load_internal(self, instrument, start_index, end_index, freq):
res = super(Corr, self)._load_internal(instrument, start_index, end_index, freq)
# NOTE: Load uses MemCache, so calling load again will not cause performance degradation
series_left = self.feature_left.load(instrument, start_index, end_index, freq)
series_right = self.feature_right.load(instrument, start_index, end_index, freq)
res.loc[
np.isclose(series_left.rolling(self.N, min_periods=1).std(), 0, atol=2e-05)
| np.isclose(series_right.rolling(self.N, min_periods=1).std(), 0, atol=2e-05)
] = np.nan
return res
class Cov(PairRolling):
"""Rolling Covariance
@@ -1403,3 +1405,4 @@ class Cov(PairRolling):
def __init__(self, feature_left, feature_right, N):
super(Cov, self).__init__(feature_left, feature_right, N, "cov")