mirror of
https://github.com/microsoft/qlib.git
synced 2026-07-05 03:50:57 +08:00
Update part of the docs
This commit is contained in:
@@ -748,7 +748,8 @@ class DiskDatasetCache(DatasetCache):
|
||||
|
||||
The format the cache contains 3 parts(followed by typical filename).
|
||||
|
||||
- index : cache/d41366901e25de3ec47297f12e2ba11d.index
|
||||
- index : cache/d41366901e25de3ec47297f12e2ba11d.index
|
||||
|
||||
- The content of the file may be in following format(pandas.Series)
|
||||
|
||||
.. code-block:: python
|
||||
@@ -765,7 +766,9 @@ class DiskDatasetCache(DatasetCache):
|
||||
- It indicates the `end_index` of the data for `timestamp`
|
||||
|
||||
- meta data: cache/d41366901e25de3ec47297f12e2ba11d.meta
|
||||
|
||||
- data : cache/d41366901e25de3ec47297f12e2ba11d
|
||||
|
||||
- This is a hdf file sorted by datetime
|
||||
|
||||
:param cache_path: The path to store the cache
|
||||
|
||||
@@ -152,16 +152,19 @@ class InstrumentProvider(abc.ABC):
|
||||
{`market`=>base market name, `filter_pipe`=>list of filters}
|
||||
|
||||
example :
|
||||
{'market': 'csi500',
|
||||
'filter_pipe': [{'filter_type': 'ExpressionDFilter',
|
||||
'rule_expression': '$open<40',
|
||||
'filter_start_time': None,
|
||||
'filter_end_time': None,
|
||||
'keep': False},
|
||||
{'filter_type': 'NameDFilter',
|
||||
'name_rule_re': 'SH[0-9]{4}55',
|
||||
'filter_start_time': None,
|
||||
'filter_end_time': None}]}
|
||||
|
||||
.. code-block::
|
||||
|
||||
{'market': 'csi500',
|
||||
'filter_pipe': [{'filter_type': 'ExpressionDFilter',
|
||||
'rule_expression': '$open<40',
|
||||
'filter_start_time': None,
|
||||
'filter_end_time': None,
|
||||
'keep': False},
|
||||
{'filter_type': 'NameDFilter',
|
||||
'name_rule_re': 'SH[0-9]{4}55',
|
||||
'filter_start_time': None,
|
||||
'filter_end_time': None}]}
|
||||
"""
|
||||
if filter_pipe is None:
|
||||
filter_pipe = []
|
||||
@@ -956,6 +959,8 @@ class BaseProvider:
|
||||
disk_cache=None,
|
||||
):
|
||||
"""
|
||||
Parameters:
|
||||
-----------
|
||||
disk_cache : int
|
||||
whether to skip(0)/use(1)/replace(2) disk_cache
|
||||
|
||||
|
||||
@@ -40,12 +40,15 @@ class DataHandler(Serializable):
|
||||
|
||||
Example of the data:
|
||||
The multi-index of the columns is optional.
|
||||
feature label
|
||||
$close $volume Ref($close, 1) Mean($close, 3) $high-$low LABEL0
|
||||
datetime instrument
|
||||
2010-01-04 SH600000 81.807068 17145150.0 83.737389 83.016739 2.741058 0.0032
|
||||
SH600004 13.313329 11800983.0 13.313329 13.317701 0.183632 0.0042
|
||||
SH600005 37.796539 12231662.0 38.258602 37.919757 0.970325 0.0289
|
||||
|
||||
.. code-block::
|
||||
|
||||
feature label
|
||||
$close $volume Ref($close, 1) Mean($close, 3) $high-$low LABEL0
|
||||
datetime instrument
|
||||
2010-01-04 SH600000 81.807068 17145150.0 83.737389 83.016739 2.741058 0.0032
|
||||
SH600004 13.313329 11800983.0 13.313329 13.317701 0.183632 0.0042
|
||||
SH600005 37.796539 12231662.0 38.258602 37.919757 0.970325 0.0289
|
||||
|
||||
"""
|
||||
|
||||
@@ -107,7 +110,8 @@ class DataHandler(Serializable):
|
||||
----------
|
||||
enable_cache : bool
|
||||
default value is false
|
||||
if `enable_cache` == True
|
||||
- if `enable_cache` == True:
|
||||
|
||||
the processed data will be saved on disk, and handler will load the cached data from the disk directly
|
||||
when we call `init` next time
|
||||
"""
|
||||
@@ -145,16 +149,21 @@ class DataHandler(Serializable):
|
||||
level : Union[str, int]
|
||||
which index level to select the data
|
||||
col_set : Union[str, List[str]]
|
||||
if isinstance(col_set, str):
|
||||
|
||||
- if isinstance(col_set, str):
|
||||
|
||||
select a set of meaningful columns.(e.g. features, columns)
|
||||
if isinstance(col_set, List[str]):
|
||||
|
||||
- if isinstance(col_set, List[str]):
|
||||
|
||||
select several sets of meaningful columns, the returned data has multiple levels
|
||||
|
||||
squeeze : bool
|
||||
whether squeeze columns and index
|
||||
|
||||
Returns
|
||||
-------
|
||||
pd.DataFrame:
|
||||
pd.DataFrame.
|
||||
"""
|
||||
# Fetch column first will be more friendly to SepDataFrame
|
||||
df = self._fetch_df_by_col(self._data, col_set)
|
||||
|
||||
@@ -161,7 +161,7 @@ class StaticDataLoader(DataLoader):
|
||||
DataLoader that supports loading data from file or as provided.
|
||||
"""
|
||||
|
||||
def __init__(self, config: dict, join='outer'):
|
||||
def __init__(self, config: dict, join="outer"):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
@@ -187,8 +187,9 @@ class StaticDataLoader(DataLoader):
|
||||
def _maybe_load_raw_data(self):
|
||||
if self._data is not None:
|
||||
return
|
||||
self._data = pd.concat({
|
||||
fields_group: load_dataset(path_or_obj)
|
||||
for fields_group, path_or_obj in self.config.items()
|
||||
}, axis=1, join=self.join)
|
||||
self._data = pd.concat(
|
||||
{fields_group: load_dataset(path_or_obj) for fields_group, path_or_obj in self.config.items()},
|
||||
axis=1,
|
||||
join=self.join,
|
||||
)
|
||||
self._data.sort_index(inplace=True)
|
||||
|
||||
Reference in New Issue
Block a user