Fix the Errors with unexpected indentation when building Qlib's documentation (#1352)

* Fix ERROR: Unexpected indentation in qlib/data/dataset/handler.py * Fix ERROR: Unexpected indentation in qlib/data/dataset/__init__.py * Fix ERROR: Unexpected indentation in ../qlib/data/cache.py * Fix ERROR: Unexpected indentation in qlib/model/meta/task.py * Fix ERROR: Unexpected indentation in qlib/model/meta/dataset.py * Fix ERROR: Unexpected indentation in qlib/workflow/online/manager.py * Fix ERROR: Unexpected indentation in qlib/workflow/online/update.py * Fix ERROR: Unexpected indentation in /qlib/workflow/__init__.py * Fix ERROR: Unexpected indentation in qlib/data/base.py * Fix ERROR: Unexpected indentation in qlib/data/dataset/loader.py * Fix ERROR: Unexpected indentation in qlib/contrib/evaluate.py * Fix ERROR: Unexpected indentation in qlib/workflow/record_temp.py * Fix ERROR: Unexpected indentation in qlib/workflow/task/gen.py * Fix ERROR: Unexpected indentation in qlib/strategy/base.py * Fix qlib/data/dataset/handler.py * Retest
2026-07-21 11:17:34 +08:00 · 2022-11-15 03:49:36 +03:00
parent 8802653bb9
commit b51e881be3
14 changed files with 80 additions and 23 deletions
--- a/qlib/contrib/evaluate.py
+++ b/qlib/contrib/evaluate.py
@@ -187,9 +187,13 @@ def backtest_daily(
        the benchmark for reporting.
    account : Union[float, int, Position]
        information for describing how to creating the account
+
        For `float` or `int`:
+
            Using Account with only initial cash
+
        For `Position`:
+
            Using Account with a Position
    exchange_kwargs : dict
        the kwargs for initializing Exchange
@@ -283,8 +287,8 @@ def long_short_backtest(
                       NOTE: This will be faster with offline qlib.
    :return:            The result of backtest, it is represented by a dict.
                        { "long": long_returns(excess),
-                          "short": short_returns(excess),
-                          "long_short": long_short_returns}
+                        "short": short_returns(excess),
+                        "long_short": long_short_returns}
    """
    if get_level_index(pred, level="datetime") == 1:
        pred = pred.swaplevel().sort_index()
--- a/qlib/data/base.py
+++ b/qlib/data/base.py
@@ -16,8 +16,10 @@ class Expression(abc.ABC):

    Expression is designed to handle the calculation of data with the format below
    data with two dimension for each instrument,
+
    - feature
    - time:  it  could be observation time or period time.
+
        - period time is designed for Point-in-time database.  For example, the period time maybe 2014Q4, its value can observed for multiple times(different value may be observed at different time due to amendment).
    """

@@ -142,9 +144,12 @@ class Expression(abc.ABC):
        This function is responsible for loading feature/expression based on the expression engine.

        The concrete implementation will be separated into two parts:
+
        1) caching data, handle errors.
+
            - This part is shared by all the expressions and implemented in Expression
        2) processing and calculating data based on the specific expression.
+
            - This part is different in each expression and implemented in each expression

        Expression Engine is shared by different data.
--- a/qlib/data/cache.py
+++ b/qlib/data/cache.py
@@ -394,7 +394,7 @@ class DatasetCache(BaseProviderCache):

        .. note:: The server use redis_lock to make sure
            read-write conflicts will not be triggered
-                but client readers are not considered.
+            but client readers are not considered.
        """
        if disk_cache == 0:
            # skip cache
--- a/qlib/data/dataset/init.py
+++ b/qlib/data/dataset/init.py
@@ -205,8 +205,9 @@ class DatasetH(Dataset):
        col_set : str
            The col_set will be passed to self.handler when fetching data.
            TODO: make it automatic:
-                - select DK_I for test data
-                - select DK_L for training data.
+
+            - select DK_I for test data
+            - select DK_L for training data.
        data_key : str
            The data to fetch:  DK_*
            Default is DK_I, which indicate fetching data for **inference**.
--- a/qlib/data/dataset/handler.py
+++ b/qlib/data/dataset/handler.py
@@ -160,13 +160,17 @@ class DataHandler(Serializable):
        selector : Union[pd.Timestamp, slice, str]
            describe how to select data by index
            It can be categories as following
+
            - fetch single index
            - fetch a range of index
+
                - a slice range
                - pd.Index for specific indexes

            Following conflictions may occurs
-            - Does [20200101", "20210101"] mean selecting this slice or these two days?
+
+            - Does ["20200101", "20210101"] mean selecting this slice or these two days?
+
                - slice have higher priorities

        level : Union[str, int]
@@ -178,7 +182,8 @@ class DataHandler(Serializable):

                select a set of meaningful, pd.Index columns.(e.g. features, columns)

-                if col_set == CS_RAW:
+                - if col_set == CS_RAW:
+
                    the raw dataset will be returned.

            - if isinstance(col_set, List[str]):
@@ -186,8 +191,10 @@ class DataHandler(Serializable):
                select several sets of meaningful columns, the returned data has multiple levels

        proc_func: Callable
+
            - Give a hook for processing data before fetching
            - An example to explain the necessity of the hook:
+
                - A Dataset learned some processors to process data which is related to data segmentation
                - It will apply them every time when preparing data.
                - The learned processor require the dataframe remains the same format when fitting and applying
@@ -326,18 +333,23 @@ class DataHandlerLP(DataHandler):
    DataHandler with **(L)earnable (P)rocessor**

    This handler will produce three pieces of data in pd.DataFrame format.
+
    - DK_R / self._data: the raw data loaded from the loader
    - DK_I / self._infer: the data processed for inference
    - DK_L / self._learn: the data processed for learning model.

    The motivation of using different processor workflows for learning and inference
    Here are some examples.
+
    - The instrument universe for learning and inference may be different.
    - The processing of some samples may rely on label (for example, some samples hit the limit may need extra processing or be dropped).
-        These processors only apply to the learning phase.
+
+        - These processors only apply to the learning phase.

    Tips to improve the performance of data handler
+
    - To reduce the memory cost
+
        - `drop_raw=True`: this will modify the data inplace on raw data;
    """

@@ -482,12 +494,18 @@ class DataHandlerLP(DataHandler):
        Notation: (data)  [processor]

        # data processing flow of self.process_type == DataHandlerLP.PTYPE_I
-        (self._data)-[shared_processors]-(_shared_df)-[learn_processors]-(_learn_df)
-                                               \
-                                                -[infer_processors]-(_infer_df)
+
+        .. code-block:: text
+
+            (self._data)-[shared_processors]-(_shared_df)-[learn_processors]-(_learn_df)
+                                                   \\
+                                                    -[infer_processors]-(_infer_df)

        # data processing flow of self.process_type == DataHandlerLP.PTYPE_A
-        (self._data)-[shared_processors]-(_shared_df)-[infer_processors]-(_infer_df)-[learn_processors]-(_learn_df)
+
+        .. code-block:: text
+
+            (self._data)-[shared_processors]-(_shared_df)-[infer_processors]-(_infer_df)-[learn_processors]-(_learn_df)

        Parameters
        ----------
--- a/qlib/data/dataset/loader.py
+++ b/qlib/data/dataset/loader.py
@@ -278,7 +278,9 @@ class DataLoaderDH(DataLoader):
    - If you just want to load data from single datahandler, you can write them in single data handler

    TODO: What make this module not that easy to use.
+
    - For online scenario
+
        - The underlayer data handler should be configured. But data loader doesn't provide such interface & hook.
    """

--- a/qlib/model/meta/dataset.py
+++ b/qlib/model/meta/dataset.py
@@ -12,11 +12,15 @@ class MetaTaskDataset(Serializable, metaclass=abc.ABCMeta):
    A dataset fetching the data in a meta-level.

    A Meta Dataset is responsible for
+
    - input tasks(e.g. Qlib tasks) and prepare meta tasks
+
        - meta task contains more information than normal tasks (e.g. input data for meta model)

    The learnt pattern could transfer to other meta dataset. The following cases should be supported
+
    - A meta-model trained on meta-dataset A and then applied to meta-dataset B
+
        - Some pattern are shared between meta-dataset A and B, so meta-input on meta-dataset A are used when meta model are applied on meta-dataset-B
    """

--- a/qlib/model/meta/task.py
+++ b/qlib/model/meta/task.py
@@ -11,9 +11,11 @@ class MetaTask:
    It serves as a component as in MetaDatasetDS

    The data processing is different
+
    - the processed input may be different between training and testing
+
        - When training, the X, y, X_test, y_test in training tasks are necessary (# PROC_MODE_FULL #)
-                                                but not necessary in test tasks. (# PROC_MODE_TEST #)
+          but not necessary in test tasks. (# PROC_MODE_TEST #)
        - When the meta model can be transferred into other dataset, only meta_info is necessary  (# PROC_MODE_TRANSFER #)
    """

@@ -24,6 +26,7 @@ class MetaTask:
    def __init__(self, task: dict, meta_info: object, mode: str = PROC_MODE_FULL):
        """
        The `__init__` func is responsible for
+
        - store the task
        - store the origin input data for
        - process the input data for meta data
--- a/qlib/strategy/base.py
+++ b/qlib/strategy/base.py
@@ -36,6 +36,7 @@ class BaseStrategy:
        outer_trade_decision : BaseTradeDecision, optional
            the trade decision of outer strategy which this strategy relies, and it will be traded in
            [start_time, end_time], by default None
+
            - If the strategy is used to split trade decision, it will be used
            - If the strategy is used for portfolio management, it can be ignored
        level_infra : LevelInfrastructure, optional
@@ -45,11 +46,13 @@ class BaseStrategy:

        trade_exchange : Exchange
            exchange that provides market info, used to deal order and generate report
+
            - If `trade_exchange` is None, self.trade_exchange will be set with common_infra
            - It allows different trade_exchanges is used in different executions.
            - For example:
+
                - In daily execution, both daily exchange and minutely are usable, but the daily exchange is
-                    recommended because it run faster.
+                  recommended because it run faster.
                - In minutely execution, the daily exchange is not usable, only the minutely exchange is recommended.
        """

@@ -137,6 +140,7 @@ class BaseStrategy:
        ----------
        execute_result : List[object], optional
            the executed result for trade decision, by default None
+
            - When call the generate_trade_decision firstly, `execute_result` could be None
        """
        raise NotImplementedError("generate_trade_decision is not implemented!")
--- a/qlib/workflow/init.py
+++ b/qlib/workflow/init.py
@@ -350,6 +350,7 @@ class QlibRecorder:
        Method to reset the current uri of current experiment manager.

        NOTE:
+
        - When the uri is refer to a file path, please using the absolute path instead of strings like "~/mlruns/"
          The backend don't support strings like this.
        """
--- a/qlib/workflow/online/manager.py
+++ b/qlib/workflow/online/manager.py
@@ -78,7 +78,9 @@ For simplicity


 # Can we simplify current workflow?
+
 - Can reduce the number of state of tasks?
+
    - For each task, we have three phases (i.e. task, partly trained task, final trained task)
 """

--- a/qlib/workflow/online/update.py
+++ b/qlib/workflow/online/update.py
@@ -82,19 +82,23 @@ class RecordUpdater(metaclass=ABCMeta):
 class DSBasedUpdater(RecordUpdater, metaclass=ABCMeta):
    """
    Dataset-Based Updater
+
    - Providing updating feature for Updating data based on Qlib Dataset

    Assumption
-    - Based on Qlib dataset
-    - The data to be updated is a multi-level index pd.DataFrame. For example label , prediction.

-                                 LABEL0
-        datetime   instrument
-        2021-05-10 SH600000    0.006965
-                   SH600004    0.003407
-        ...                         ...
-        2021-05-28 SZ300498    0.015748
-                   SZ300676   -0.001321
+    - Based on Qlib dataset
+    - The data to be updated is a multi-level index pd.DataFrame. For example label, prediction.
+
+        .. code-block::
+
+                                     LABEL0
+            datetime   instrument
+            2021-05-10 SH600000    0.006965
+                       SH600004    0.003407
+            ...                         ...
+            2021-05-28 SZ300498    0.015748
+                       SZ300676   -0.001321
    """

    def __init__(
@@ -111,6 +115,7 @@ class DSBasedUpdater(RecordUpdater, metaclass=ABCMeta):
        Init PredUpdater.

        Expected behavior in following cases:
+
        - if `to_date` is greater than the max date in the calendar, the data will be updated to the latest date
        - if there are data before `from_date` or after `to_date`, only the data between `from_date` and `to_date` are affected.

@@ -118,11 +123,15 @@ class DSBasedUpdater(RecordUpdater, metaclass=ABCMeta):
            record : Recorder
            to_date :
                update to prediction to the `to_date`
+
                if to_date is None:
+
                    data will updated to the latest date.
            from_date :
                the update will start from `from_date`
+
                if from_date is None:
+
                    the updating will occur on the next tick after the latest data in historical data
            hist_ref : int
                Sometimes, the dataset will have historical depends.
--- a/qlib/workflow/record_temp.py
+++ b/qlib/workflow/record_temp.py
@@ -349,7 +349,9 @@ class PortAnaRecord(ACRecordTemp):
    This is the Portfolio Analysis Record class that generates the analysis results such as those of backtest. This class inherits the ``RecordTemp`` class.

    The following files will be stored in recorder
+
    - report_normal.pkl & positions_normal.pkl:
+
        - The return report and detailed positions of the backtest, returned by `qlib/contrib/evaluate.py:backtest`
    - port_analysis.pkl : The risk analysis of your portfolio, returned by `qlib/contrib/evaluate.py:risk_analysis`
    """
--- a/qlib/workflow/task/gen.py
+++ b/qlib/workflow/task/gen.py
@@ -94,7 +94,9 @@ def handler_mod(task: dict, rolling_gen):
    """
    Help to modify the handler end time when using RollingGen
    It try to handle the following case
+
    - Hander's data end_time is earlier than  dataset's test_data's segments.
+
        - To handle this, handler's data's end_time is extended.

    If the handler's end_time is None, then it is not necessary to change it's end time.