Update part of the docs

2026-07-05 03:50:57 +08:00 · 2020-11-24 23:56:16 +08:00
parent 5059bba51e
commit b4671746c2
30 changed files with 902 additions and 113 deletions
--- a/qlib/data/cache.py
+++ b/qlib/data/cache.py
@@ -748,7 +748,8 @@ class DiskDatasetCache(DatasetCache):

        The format the cache contains 3 parts(followed by typical filename).

-        - index    : cache/d41366901e25de3ec47297f12e2ba11d.index
+        - index : cache/d41366901e25de3ec47297f12e2ba11d.index
+
            - The content of the file may be in following format(pandas.Series)

                .. code-block:: python
@@ -765,7 +766,9 @@ class DiskDatasetCache(DatasetCache):
            - It indicates the `end_index` of the data for `timestamp`

        - meta data: cache/d41366901e25de3ec47297f12e2ba11d.meta
+
        - data     : cache/d41366901e25de3ec47297f12e2ba11d
+
            - This is a hdf file sorted by datetime

        :param cache_path:  The path to store the cache
--- a/qlib/data/data.py
+++ b/qlib/data/data.py
@@ -152,16 +152,19 @@ class InstrumentProvider(abc.ABC):
            {`market`=>base market name, `filter_pipe`=>list of filters}

            example :
-            {'market': 'csi500',
-             'filter_pipe': [{'filter_type': 'ExpressionDFilter',
-               'rule_expression': '$open<40',
-               'filter_start_time': None,
-               'filter_end_time': None,
-               'keep': False},
-              {'filter_type': 'NameDFilter',
-               'name_rule_re': 'SH[0-9]{4}55',
-               'filter_start_time': None,
-               'filter_end_time': None}]}
+
+            .. code-block::
+
+                {'market': 'csi500',
+                'filter_pipe': [{'filter_type': 'ExpressionDFilter',
+                'rule_expression': '$open<40',
+                'filter_start_time': None,
+                'filter_end_time': None,
+                'keep': False},
+                {'filter_type': 'NameDFilter',
+                'name_rule_re': 'SH[0-9]{4}55',
+                'filter_start_time': None,
+                'filter_end_time': None}]}
        """
        if filter_pipe is None:
            filter_pipe = []
@@ -956,6 +959,8 @@ class BaseProvider:
        disk_cache=None,
    ):
        """
+        Parameters:
+        -----------
        disk_cache : int
            whether to skip(0)/use(1)/replace(2) disk_cache

--- a/qlib/data/dataset/handler.py
+++ b/qlib/data/dataset/handler.py
@@ -40,12 +40,15 @@ class DataHandler(Serializable):

    Example of the data:
    The multi-index of the columns is optional.
-                             feature                                                            label
-                              $close     $volume  Ref($close, 1)  Mean($close, 3)  $high-$low  LABEL0
-    datetime   instrument
-    2010-01-04 SH600000    81.807068  17145150.0       83.737389        83.016739    2.741058  0.0032
-               SH600004    13.313329  11800983.0       13.313329        13.317701    0.183632  0.0042
-               SH600005    37.796539  12231662.0       38.258602        37.919757    0.970325  0.0289
+
+    .. code-block::
+
+                                feature                                                            label
+                                $close     $volume  Ref($close, 1)  Mean($close, 3)  $high-$low  LABEL0
+        datetime   instrument
+        2010-01-04 SH600000    81.807068  17145150.0       83.737389        83.016739    2.741058  0.0032
+                SH600004    13.313329  11800983.0       13.313329        13.317701    0.183632  0.0042
+                SH600005    37.796539  12231662.0       38.258602        37.919757    0.970325  0.0289

    """

@@ -107,7 +110,8 @@ class DataHandler(Serializable):
        ----------
        enable_cache : bool
            default value is false
-            if `enable_cache` == True
+            - if `enable_cache` == True:
+
                the processed data will be saved on disk, and handler will load the cached data from the disk directly
                when we call `init` next time
        """
@@ -145,16 +149,21 @@ class DataHandler(Serializable):
        level : Union[str, int]
            which index level to select the data
        col_set : Union[str, List[str]]
-            if isinstance(col_set, str):
+
+            - if isinstance(col_set, str):
+
                select a set of meaningful columns.(e.g. features, columns)
-            if isinstance(col_set, List[str]):
+
+            - if isinstance(col_set, List[str]):
+
                select several sets of meaningful columns, the returned data has multiple levels
+
        squeeze : bool
            whether squeeze columns and index

        Returns
        -------
-        pd.DataFrame:
+        pd.DataFrame.
        """
        # Fetch column  first will be more friendly to SepDataFrame
        df = self._fetch_df_by_col(self._data, col_set)
--- a/qlib/data/dataset/loader.py
+++ b/qlib/data/dataset/loader.py
@@ -161,7 +161,7 @@ class StaticDataLoader(DataLoader):
    DataLoader that supports loading data from file or as provided.
    """

-    def __init__(self, config: dict, join='outer'):
+    def __init__(self, config: dict, join="outer"):
        """
        Parameters
        ----------
@@ -187,8 +187,9 @@ class StaticDataLoader(DataLoader):
    def _maybe_load_raw_data(self):
        if self._data is not None:
            return
-        self._data = pd.concat({
-            fields_group: load_dataset(path_or_obj)
-            for fields_group, path_or_obj in self.config.items()
-        }, axis=1, join=self.join)
+        self._data = pd.concat(
+            {fields_group: load_dataset(path_or_obj) for fields_group, path_or_obj in self.config.items()},
+            axis=1,
+            join=self.join,
+        )
        self._data.sort_index(inplace=True)