1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-07-05 03:50:57 +08:00

Update part of the docs

This commit is contained in:
Jactus
2020-11-24 23:56:16 +08:00
parent 5059bba51e
commit b4671746c2
30 changed files with 902 additions and 113 deletions

View File

@@ -748,7 +748,8 @@ class DiskDatasetCache(DatasetCache):
The format the cache contains 3 parts(followed by typical filename).
- index : cache/d41366901e25de3ec47297f12e2ba11d.index
- index : cache/d41366901e25de3ec47297f12e2ba11d.index
- The content of the file may be in following format(pandas.Series)
.. code-block:: python
@@ -765,7 +766,9 @@ class DiskDatasetCache(DatasetCache):
- It indicates the `end_index` of the data for `timestamp`
- meta data: cache/d41366901e25de3ec47297f12e2ba11d.meta
- data : cache/d41366901e25de3ec47297f12e2ba11d
- This is a hdf file sorted by datetime
:param cache_path: The path to store the cache

View File

@@ -152,16 +152,19 @@ class InstrumentProvider(abc.ABC):
{`market`=>base market name, `filter_pipe`=>list of filters}
example :
{'market': 'csi500',
'filter_pipe': [{'filter_type': 'ExpressionDFilter',
'rule_expression': '$open<40',
'filter_start_time': None,
'filter_end_time': None,
'keep': False},
{'filter_type': 'NameDFilter',
'name_rule_re': 'SH[0-9]{4}55',
'filter_start_time': None,
'filter_end_time': None}]}
.. code-block::
{'market': 'csi500',
'filter_pipe': [{'filter_type': 'ExpressionDFilter',
'rule_expression': '$open<40',
'filter_start_time': None,
'filter_end_time': None,
'keep': False},
{'filter_type': 'NameDFilter',
'name_rule_re': 'SH[0-9]{4}55',
'filter_start_time': None,
'filter_end_time': None}]}
"""
if filter_pipe is None:
filter_pipe = []
@@ -956,6 +959,8 @@ class BaseProvider:
disk_cache=None,
):
"""
Parameters:
-----------
disk_cache : int
whether to skip(0)/use(1)/replace(2) disk_cache

View File

@@ -40,12 +40,15 @@ class DataHandler(Serializable):
Example of the data:
The multi-index of the columns is optional.
feature label
$close $volume Ref($close, 1) Mean($close, 3) $high-$low LABEL0
datetime instrument
2010-01-04 SH600000 81.807068 17145150.0 83.737389 83.016739 2.741058 0.0032
SH600004 13.313329 11800983.0 13.313329 13.317701 0.183632 0.0042
SH600005 37.796539 12231662.0 38.258602 37.919757 0.970325 0.0289
.. code-block::
feature label
$close $volume Ref($close, 1) Mean($close, 3) $high-$low LABEL0
datetime instrument
2010-01-04 SH600000 81.807068 17145150.0 83.737389 83.016739 2.741058 0.0032
SH600004 13.313329 11800983.0 13.313329 13.317701 0.183632 0.0042
SH600005 37.796539 12231662.0 38.258602 37.919757 0.970325 0.0289
"""
@@ -107,7 +110,8 @@ class DataHandler(Serializable):
----------
enable_cache : bool
default value is false
if `enable_cache` == True
- if `enable_cache` == True:
the processed data will be saved on disk, and handler will load the cached data from the disk directly
when we call `init` next time
"""
@@ -145,16 +149,21 @@ class DataHandler(Serializable):
level : Union[str, int]
which index level to select the data
col_set : Union[str, List[str]]
if isinstance(col_set, str):
- if isinstance(col_set, str):
select a set of meaningful columns.(e.g. features, columns)
if isinstance(col_set, List[str]):
- if isinstance(col_set, List[str]):
select several sets of meaningful columns, the returned data has multiple levels
squeeze : bool
whether squeeze columns and index
Returns
-------
pd.DataFrame:
pd.DataFrame.
"""
# Fetch column first will be more friendly to SepDataFrame
df = self._fetch_df_by_col(self._data, col_set)

View File

@@ -161,7 +161,7 @@ class StaticDataLoader(DataLoader):
DataLoader that supports loading data from file or as provided.
"""
def __init__(self, config: dict, join='outer'):
def __init__(self, config: dict, join="outer"):
"""
Parameters
----------
@@ -187,8 +187,9 @@ class StaticDataLoader(DataLoader):
def _maybe_load_raw_data(self):
if self._data is not None:
return
self._data = pd.concat({
fields_group: load_dataset(path_or_obj)
for fields_group, path_or_obj in self.config.items()
}, axis=1, join=self.join)
self._data = pd.concat(
{fields_group: load_dataset(path_or_obj) for fields_group, path_or_obj in self.config.items()},
axis=1,
join=self.join,
)
self._data.sort_index(inplace=True)