Update doc strings and fix

2026-07-21 11:17:34 +08:00 · 2020-11-25 18:03:52 +08:00
parent 5ac90f25b3
commit b31480a06a
6 changed files with 94 additions and 64 deletions
--- a/examples/benchmarks/DNN/workflow_config_dnn.yaml
+++ b/examples/benchmarks/DNN/workflow_config_dnn.yaml
@@ -30,7 +30,7 @@ task:
        module_path: qlib.contrib.model.pytorch_nn
        kwargs:
            loss: mse
-            input_dim: 360
+            input_dim: 158
            output_dim: 1
            lr: 0.002
            lr_decay: 0.96
--- a/qlib/contrib/evaluate.py
+++ b/qlib/contrib/evaluate.py
@@ -190,7 +190,8 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **k
    Parameters
    ----------

-    # backtest workflow related or commmon arguments
+    - **backtest workflow related or commmon arguments**
+
    pred : pandas.DataFrame
        predict should has <datetime, instrument> index and one `score` column
    account : float
@@ -202,7 +203,8 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **k
    verbose : bool
        whether to print log

-    # strategy related arguments
+    - **strategy related arguments**
+
    strategy : Strategy()
        strategy used in backtest
    topk : int (Default value: 50)
@@ -225,7 +227,8 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **k
    str_type: 'amount', 'weight' or 'dropout'
        strategy type: TopkAmountStrategy ,TopkWeightStrategy or TopkDropoutStrategy

-    # exchange related arguments
+    - **exchange related arguments**
+    
    exchange: Exchange()
        pass the exchange for speeding up.
    subscribe_fields: list
--- a/qlib/contrib/strategy/strategy.py
+++ b/qlib/contrib/strategy/strategy.py
@@ -26,7 +26,9 @@ class BaseStrategy:

    def generate_order_list(self, score_series, current, trade_exchange, pred_date, trade_date):
        """
-        Parameters:
+        DO NOT directly change the state of current
+
+        Parameters
        -----------
        score_series : pd.Seires
            stock_id , score
@@ -39,14 +41,13 @@ class BaseStrategy:
            predict date
        trade_date : pd.Timestamp
            trade date
-
-        DO NOT directly change the state of current
        """
        pass

    def update(self, score_series, pred_date, trade_date):
        """User can use this method to update strategy state each trade date.
-        Parameters:
+
+        Parameters
        -----------
        score_series : pd.Series
            stock_id , score
@@ -98,8 +99,9 @@ class AdjustTimer:
    """AdjustTimer
    Responsible for timing of position adjusting

-    This is designed as multiple inheritance mechanism due to
+    This is designed as multiple inheritance mechanism due to:
    - the is_adjust may need access to the internel state of a strategy
+
    - it can be reguard as a enhancement to the existing strategy
    """

@@ -140,21 +142,24 @@ class WeightStrategyBase(BaseStrategy, AdjustTimer):

    def generate_target_weight_position(self, score, current, trade_date):
        """
-        Parameters:
+        Generate target position from score for this date and the current position.The cash is not considered in the position
+
+        Parameters
        -----------
-        score : pred score for this trade date, pd.Series, index is stock_id, contain 'score' column
-        current : current position, use Position() class
+        score : pd.Series
+            pred score for this trade date, index is stock_id, contain 'score' column
+        current : Position()
+            current position
        trade_exchange : Exchange()
-        trade_date : trade date
-        generate target position from score for this date and the current position
-        The cash is not considered in the position
+        trade_date : pd.Timestamp
+            trade date
        """
        raise NotImplementedError()

    def generate_order_list(self, score_series, current, trade_exchange, pred_date, trade_date):
        """
-        Parameters:
-        ----------
+        Parameters
+        -----------
        score_series : pd.Seires
            stock_id , score
        current : Position()
@@ -188,7 +193,7 @@ class WeightStrategyBase(BaseStrategy, AdjustTimer):
 class TopkDropoutStrategy(BaseStrategy, ListAdjustTimer):
    def __init__(self, topk, n_drop, method="bottom", risk_degree=0.95, thresh=1, hold_thresh=1, **kwargs):
        """
-        Parameters:
+        Parameters
        -----------
        topk : int
            The number of stocks in the portfolio
@@ -229,7 +234,7 @@ class TopkDropoutStrategy(BaseStrategy, ListAdjustTimer):
        """
        Gnererate order list according to score_series at trade_date, will not change current.

-        Parameters:
+        Parameters
        -----------
        score_series : pd.Series
            stock_id , score
--- a/qlib/data/dataset/init.py
+++ b/qlib/data/dataset/init.py
@@ -14,9 +14,11 @@ class Dataset(Serializable):

    def __init__(self, *args, **kwargs):
        """
-        init is designed to finish following steps
+        init is designed to finish following steps:
+
        - setup data
            - The data related attributes' names should start with '_' so that it will not be saved on disk when serializing
+        
        - initialize the state of the dataset(info to prepare the data)
            - The name of essential state for preparing data should not start with '_' so that it could be serialized on disk when serializing.

@@ -29,11 +31,15 @@ class Dataset(Serializable):
        """
        setup the data

-        We split the setup_data function for following situation
-        - 1) User have a Dataset object with learned status on disk
-        - 2) User load the Dataset object from the disk(Note the init function is skiped)
-        - 3) User call `setup_data` to load new data
-        - 4) User prepare data for model based on previous status
+        We split the setup_data function for following situation:
+
+        - User have a Dataset object with learned status on disk
+
+        - User load the Dataset object from the disk(Note the init function is skiped)
+
+        - User call `setup_data` to load new data
+
+        - User prepare data for model based on previous status
        """
        pass

@@ -41,8 +47,9 @@ class Dataset(Serializable):
        """
        The type of dataset depends on the model. (It could be pd.DataFrame, pytorch.DataLoader, etc.)
        The parameters should specify the scope for the prepared data
-        The method sould
+        The method should:
        - process the data
+
        - return the processed data

        Returns
@@ -55,11 +62,12 @@ class Dataset(Serializable):

 class DatasetH(Dataset):
    """
-    Dataset with Data(H)anler
+    Dataset with Data(H)andler

    User should try to put the data preprocessing functions into handler.
-    Only following data processing functions should be placed in Dataset
+    Only following data processing functions should be placed in Dataset:
    - The processing is related to specific model.
+
    - The processing is related to data split
    """

@@ -81,21 +89,26 @@ class DatasetH(Dataset):
        Parameters
        ----------
        handler : Union[dict, DataHandler]
-            handler could be
-            1) insntance of `DataHandler`
-            2) config of `DataHandler`.  Please refer to `DataHandler`
+            handler could be:
+
+            - insntance of `DataHandler`
+
+            - config of `DataHandler`.  Please refer to `DataHandler`
        segments : list
            Describe the options to segment the data.
-            Here are some examples
-            1) 'segments': {
-                    'train': ("2008-01-01", "2014-12-31"),
-                    'valid': ("2017-01-01", "2020-08-01",),
-                    'test': ("2015-01-01", "2016-12-31",),
-                }
-            2) 'segments': {
-                    'insample': ("2008-01-01", "2014-12-31"),
-                    'outsample': ("2017-01-01", "2020-08-01",),
-                }
+            Here are some examples:
+
+            .. code-block::
+            
+                1) 'segments': {
+                        'train': ("2008-01-01", "2014-12-31"),
+                        'valid': ("2017-01-01", "2020-08-01",),
+                        'test': ("2015-01-01", "2016-12-31",),
+                    }
+                2) 'segments': {
+                        'insample': ("2008-01-01", "2014-12-31"),
+                        'outsample': ("2017-01-01", "2020-08-01",),
+                    }
        """
        self._handler = init_instance_by_config(handler, accept_types=DataHandler)
        self._segments = segments.copy()
@@ -114,9 +127,11 @@ class DatasetH(Dataset):
        ----------
        segments : Union[List[str], Tuple[str], str, slice]
            Describe the scope of the data to be prepared
-            Here are some examples
-            1) 'train'
-            2) ['train', 'valid']
+            Here are some examples:
+
+            - 'train'
+
+            - ['train', 'valid']
        col_set : str
            The col_set will be passed to self._handler when fetching data
        data_key: str
--- a/qlib/data/dataset/handler.py
+++ b/qlib/data/dataset/handler.py
@@ -41,7 +41,7 @@ class DataHandler(Serializable):
    Example of the data:
    The multi-index of the columns is optional.

-    .. code-block::
+    .. code-block:: python

                                feature                                                            label
                                $close     $volume  Ref($close, 1)  Mean($close, 3)  $high-$low  LABEL0
@@ -109,7 +109,8 @@ class DataHandler(Serializable):
        Parameters
        ----------
        enable_cache : bool
-            default value is false
+            default value is false:
+
            - if `enable_cache` == True:

                the processed data will be saved on disk, and handler will load the cached data from the disk directly
@@ -378,8 +379,10 @@ class DataHandlerLP(DataHandler):
        init_type : str
            The type `IT_*` listed above
        enable_cache : bool
-            default value is false
-            if `enable_cache` == True:
+            default value is false:
+
+            - if `enable_cache` == True:
+
                the processed data will be saved on disk, and handler will load the cached data from the disk directly
                when we call `init` next time
        """
--- a/qlib/data/dataset/loader.py
+++ b/qlib/data/dataset/loader.py
@@ -39,14 +39,16 @@ class DataLoader(abc.ABC):
        pd.DataFrame:
            data load from the under layer source

-            Example of the data:
-            (The multi-index of the columns is optional.)
-                                    feature                                                             label
-                                    $close     $volume     Ref($close, 1)  Mean($close, 3)  $high-$low  LABEL0
-            datetime    instrument
-            2010-01-04  SH600000    81.807068  17145150.0       83.737389        83.016739    2.741058  0.0032
-                        SH600004    13.313329  11800983.0       13.313329        13.317701    0.183632  0.0042
-                        SH600005    37.796539  12231662.0       38.258602        37.919757    0.970325  0.0289
+            Example of the data (The multi-index of the columns is optional.):
+
+            .. code-block::
+
+                                        feature                                                             label
+                                        $close     $volume     Ref($close, 1)  Mean($close, 3)  $high-$low  LABEL0
+                datetime    instrument
+                2010-01-04  SH600000    81.807068  17145150.0       83.737389        83.016739    2.741058  0.0032
+                            SH600004    13.313329  11800983.0       13.313329        13.317701    0.183632  0.0042
+                            SH600005    37.796539  12231662.0       38.258602        37.919757    0.970325  0.0289
        """
        pass

@@ -55,7 +57,7 @@ class DLWParser(DataLoader):
    """
    (D)ata(L)oader (W)ith (P)arser for features and names

-    Extracting this class so that QlibDataLoader and other dataloaders(such as QdbDataLoader) can share the fields
+    Extracting this class so that QlibDataLoader and other dataloaders(such as QdbDataLoader) can share the fields.
    """

    def __init__(self, config: Tuple[list, tuple, dict]):
@@ -65,14 +67,16 @@ class DLWParser(DataLoader):
        config : Tuple[list, tuple, dict]
            Config will be used to describe the fields and column names

-            <config> := {
-                "group_name1": <fields_info1>
-                "group_name2": <fields_info2>
-            }
-            or
-            <config> := <fields_info>
+            .. code-block:: YAML

-            <fields_info> := ["expr", ...] | (["expr", ...], ["col_name", ...])
+                <config> := {
+                    "group_name1": <fields_info1>
+                    "group_name2": <fields_info2>
+                }
+                or
+                <config> := <fields_info>
+
+                <fields_info> := ["expr", ...] | (["expr", ...], ["col_name", ...])
        """
        self.is_group = isinstance(config, dict)