Fix typos and grammar errors in docstrings and comments (#1366)

* fix gramma error in doc strings * fix typos in exchange.py * fix typos and gramma errors * fix typo and rename function param to avoid shading python keyword * remove redundant parathesis; pass kwargs to parent class * fix pyblack * further correction * assign -> be assigned to
2026-07-21 19:27:36 +08:00 · 2022-11-20 14:15:59 +08:00
parent 0c4db8b0f8
commit cc01812c62
24 changed files with 77 additions and 72 deletions
--- a/qlib/backtest/exchange.py
+++ b/qlib/backtest/exchange.py
@@ -27,10 +27,10 @@ from .high_performance_ds import BaseQuote, NumpyQuote
 class Exchange:
    # `quote_df` is a pd.DataFrame class that contains basic information for backtesting
-    # After some processing, the data will later be maintained by `quote_cls` object for faster data retriving.
+    # After some processing, the data will later be maintained by `quote_cls` object for faster data retrieving.
    # Some conventions for `quote_df`
    # - $close is for calculating the total value at end of each day.
-    #   - if $close is None, the stock on that day is reguarded as suspended.
+    #   - if $close is None, the stock on that day is regarded as suspended.
    # - $factor is for rounding to the trading unit;
    #   - if any $factor is missing when $close exists, trading unit rounding will be disabled
    quote_df: pd.DataFrame
@@ -141,7 +141,7 @@ class Exchange:
        if deal_price is None:
            deal_price = C.deal_price
-        # we have some verbose information here. So logging is enable
+        # we have some verbose information here. So logging is enabled
        self.logger = get_module_logger("online operator")
        # TODO: the quote, trade_dates, codes are not necessary.
@@ -168,7 +168,7 @@ class Exchange:
        self.codes = codes
        # Necessary fields
        # $close is for calculating the total value at end of each day.
-        # - if $close is None, the stock on that day is reguarded as suspended.
+        # - if $close is None, the stock on that day is regarded as suspended.
        # $factor is for rounding to the trading unit
        # $change is for calculating the limit of the stock
@@ -271,7 +271,7 @@ class Exchange:
            raise NotImplementedError(f"This type of `limit_threshold` is not supported")
    def _update_limit(self, limit_threshold: Union[Tuple, float, None]) -> None:
-        # $close is may contains NaN, the nan indicates that the stock is not tradable at that timestamp
+        # $close may contain NaN, the nan indicates that the stock is not tradable at that timestamp
        suspended = self.quote_df["$close"].isna()
        # check limit_threshold
        limit_type = self._get_limit_type(limit_threshold)
@@ -356,12 +356,12 @@ class Exchange:
        Returns
        -------
-        True: the trading of the stock is limted (maybe hit the highest/lowest price), hence the stock is not tradable
+        True: the trading of the stock is limited (maybe hit the highest/lowest price), hence the stock is not tradable
        False: the trading of the stock is not limited, hence the stock may be tradable
        """
        # NOTE:
        # **all** is used when checking limitation.
-        # For example, the stock trading is limited in a day if every miniute is limited in a day if every miniute is limited.
+        # For example, the stock trading is limited in a day if every minute is limited in a day if every minute is limited.
        if direction is None:
            # The trading limitation is related to the trading direction
            # if the direction is not provided, then any limitation from buy or sell will result in trading limitation
@@ -385,17 +385,17 @@ class Exchange:
        # is suspended
        if stock_id in self.quote.get_all_stock():
            # suspended stocks are represented by None $close stock
-            # The $close may contains NaN,
+            # The $close may contain NaN,
            close = self.quote.get_data(stock_id, start_time, end_time, "$close")
            if close is None:
                # if no close record exists
                return True
            elif isinstance(close, IndexData):
-                # **any** non-NaN $close represents trading opportunity may exists
+                # **any** non-NaN $close represents trading opportunity may exist
                #  if all returned is nan, then the stock is suspended
                return cast(bool, cast(IndexData, close).isna().all())
            else:
-                # it is single value, make sure is is not None
+                # it is single value, make sure is not None
                return np.isnan(close)
        else:
            # if the stock is not in the stock list, then it is not tradable and regarded as suspended
@@ -540,8 +540,8 @@ class Exchange:
        direction: OrderDir = OrderDir.BUY,
    ) -> dict:
        """
-        The generate the target position according to the weight and the cash.
+        Generates the target position according to the weight and the cash.
-        NOTE: All the cash will assigned to the tradable stock.
+        NOTE: All the cash will be assigned to the tradable stock.
        Parameter:
        weight_position : dict {stock_id : weight}; allocate cash by weight_position
            among then, weight must be in this range: 0 < weight < 1
@@ -639,7 +639,7 @@ class Exchange:
        random.shuffle(sorted_ids)
        for stock_id in sorted_ids:
-            # Do not generate order for the nontradable stocks
+            # Do not generate order for the non-tradable stocks
            if not self.is_stock_tradable(stock_id=stock_id, start_time=start_time, end_time=end_time):
                continue
--- a/qlib/contrib/data/handler.py
+++ b/qlib/contrib/data/handler.py
@@ -57,7 +57,7 @@ class Alpha360(DataHandlerLP):
        fit_end_time=None,
        filter_pipe=None,
        inst_processor=None,
-        **kwargs,
+        **kwargs
    ):
        infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
        learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
@@ -67,7 +67,7 @@ class Alpha360(DataHandlerLP):
            "kwargs": {
                "config": {
                    "feature": self.get_feature_config(),
-                    "label": kwargs.get("label", self.get_label_config()),
+                    "label": kwargs.pop("label", self.get_label_config()),
                },
                "filter_pipe": filter_pipe,
                "freq": freq,
@@ -82,12 +82,14 @@ class Alpha360(DataHandlerLP):
            data_loader=data_loader,
            learn_processors=learn_processors,
            infer_processors=infer_processors,
            **kwargs
        )
    def get_label_config(self):
-        return (["Ref($close, -2)/Ref($close, -1) - 1"], ["LABEL0"])
+        return ["Ref($close, -2)/Ref($close, -1) - 1"], ["LABEL0"]
-    def get_feature_config(self):
+    @staticmethod
    def get_feature_config():
        # NOTE:
        # Alpha360 tries to provide a dataset with original price data
        # the original price data includes the prices and volume in the last 60 days.
@@ -99,33 +101,33 @@ class Alpha360(DataHandlerLP):
        names = []
        for i in range(59, 0, -1):
-            fields += ["Ref($close, %d)/$close" % (i)]
+            fields += ["Ref($close, %d)/$close" % i]
-            names += ["CLOSE%d" % (i)]
+            names += ["CLOSE%d" % i]
        fields += ["$close/$close"]
        names += ["CLOSE0"]
        for i in range(59, 0, -1):
-            fields += ["Ref($open, %d)/$close" % (i)]
+            fields += ["Ref($open, %d)/$close" % i]
-            names += ["OPEN%d" % (i)]
+            names += ["OPEN%d" % i]
        fields += ["$open/$close"]
        names += ["OPEN0"]
        for i in range(59, 0, -1):
-            fields += ["Ref($high, %d)/$close" % (i)]
+            fields += ["Ref($high, %d)/$close" % i]
-            names += ["HIGH%d" % (i)]
+            names += ["HIGH%d" % i]
        fields += ["$high/$close"]
        names += ["HIGH0"]
        for i in range(59, 0, -1):
-            fields += ["Ref($low, %d)/$close" % (i)]
+            fields += ["Ref($low, %d)/$close" % i]
-            names += ["LOW%d" % (i)]
+            names += ["LOW%d" % i]
        fields += ["$low/$close"]
        names += ["LOW0"]
        for i in range(59, 0, -1):
-            fields += ["Ref($vwap, %d)/$close" % (i)]
+            fields += ["Ref($vwap, %d)/$close" % i]
-            names += ["VWAP%d" % (i)]
+            names += ["VWAP%d" % i]
        fields += ["$vwap/$close"]
        names += ["VWAP0"]
        for i in range(59, 0, -1):
-            fields += ["Ref($volume, %d)/($volume+1e-12)" % (i)]
+            fields += ["Ref($volume, %d)/($volume+1e-12)" % i]
-            names += ["VOLUME%d" % (i)]
+            names += ["VOLUME%d" % i]
        fields += ["$volume/($volume+1e-12)"]
        names += ["VOLUME0"]
@@ -134,7 +136,7 @@ class Alpha360(DataHandlerLP):
 class Alpha360vwap(Alpha360):
    def get_label_config(self):
-        return (["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["LABEL0"])
+        return ["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["LABEL0"]
 class Alpha158(DataHandlerLP):
@@ -151,7 +153,7 @@ class Alpha158(DataHandlerLP):
        process_type=DataHandlerLP.PTYPE_A,
        filter_pipe=None,
        inst_processor=None,
-        **kwargs,
+        **kwargs
    ):
        infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
        learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
@@ -161,7 +163,7 @@ class Alpha158(DataHandlerLP):
            "kwargs": {
                "config": {
                    "feature": self.get_feature_config(),
-                    "label": kwargs.get("label", self.get_label_config()),
+                    "label": kwargs.pop("label", self.get_label_config()),
                },
                "filter_pipe": filter_pipe,
                "freq": freq,
@@ -176,6 +178,7 @@ class Alpha158(DataHandlerLP):
            infer_processors=infer_processors,
            learn_processors=learn_processors,
            process_type=process_type,
            **kwargs
        )
    def get_feature_config(self):
@@ -190,7 +193,7 @@ class Alpha158(DataHandlerLP):
        return self.parse_config_to_fields(conf)
    def get_label_config(self):
-        return (["Ref($close, -2)/Ref($close, -1) - 1"], ["LABEL0"])
+        return ["Ref($close, -2)/Ref($close, -1) - 1"], ["LABEL0"]
    @staticmethod
    def parse_config_to_fields(config):
@@ -426,4 +429,4 @@ class Alpha158(DataHandlerLP):
 class Alpha158vwap(Alpha158):
    def get_label_config(self):
-        return (["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["LABEL0"])
+        return ["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["LABEL0"]
--- a/qlib/contrib/model/pytorch_adarnn.py
+++ b/qlib/contrib/model/pytorch_adarnn.py
@@ -28,7 +28,7 @@ class ADARNN(Model):
    d_feat : int
        input dimension for each time step
    metric: str
-        the evaluate metric used in early stop
+        the evaluation metric used in early stop
    optimizer : str
        optimizer name
    GPU : str
--- a/qlib/contrib/model/pytorch_add.py
+++ b/qlib/contrib/model/pytorch_add.py
@@ -36,7 +36,7 @@ class ADD(Model):
     d_feat : int
         input dimensions for each time step
     metric : str
-         the evaluate metric used in early stop
+         the evaluation metric used in early stop
     optimizer : str
         optimizer name
     GPU : int
--- a/qlib/contrib/model/pytorch_alstm.py
+++ b/qlib/contrib/model/pytorch_alstm.py
@@ -30,7 +30,7 @@ class ALSTM(Model):
    d_feat : int
        input dimension for each time step
    metric: str
-        the evaluate metric used in early stop
+        the evaluation metric used in early stop
    optimizer : str
        optimizer name
    GPU : int
--- a/qlib/contrib/model/pytorch_alstm_ts.py
+++ b/qlib/contrib/model/pytorch_alstm_ts.py
@@ -33,7 +33,7 @@ class ALSTM(Model):
    d_feat : int
        input dimension for each time step
    metric: str
-        the evaluate metric used in early stop
+        the evaluation metric used in early stop
    optimizer : str
        optimizer name
    GPU : int
--- a/qlib/contrib/model/pytorch_gats.py
+++ b/qlib/contrib/model/pytorch_gats.py
@@ -33,7 +33,7 @@ class GATs(Model):
    d_feat : int
        input dimensions for each time step
    metric : str
-        the evaluate metric used in early stop
+        the evaluation metric used in early stop
    optimizer : str
        optimizer name
    GPU : int
--- a/qlib/contrib/model/pytorch_gats_ts.py
+++ b/qlib/contrib/model/pytorch_gats_ts.py
@@ -50,7 +50,7 @@ class GATs(Model):
    d_feat : int
        input dimensions for each time step
    metric : str
-        the evaluate metric used in early stop
+        the evaluation metric used in early stop
    optimizer : str
        optimizer name
    GPU : int
--- a/qlib/contrib/model/pytorch_gru.py
+++ b/qlib/contrib/model/pytorch_gru.py
@@ -30,7 +30,7 @@ class GRU(Model):
    d_feat : int
        input dimension for each time step
    metric: str
-        the evaluate metric used in early stop
+        the evaluation metric used in early stop
    optimizer : str
        optimizer name
    GPU : str
--- a/qlib/contrib/model/pytorch_gru_ts.py
+++ b/qlib/contrib/model/pytorch_gru_ts.py
@@ -31,7 +31,7 @@ class GRU(Model):
    d_feat : int
        input dimension for each time step
    metric: str
-        the evaluate metric used in early stop
+        the evaluation metric used in early stop
    optimizer : str
        optimizer name
    GPU : str
--- a/qlib/contrib/model/pytorch_hist.py
+++ b/qlib/contrib/model/pytorch_hist.py
@@ -34,7 +34,7 @@ class HIST(Model):
    d_feat : int
        input dimensions for each time step
    metric : str
-        the evaluate metric used in early stop
+        the evaluation metric used in early stop
    optimizer : str
        optimizer name
    GPU : str
--- a/qlib/contrib/model/pytorch_igmtf.py
+++ b/qlib/contrib/model/pytorch_igmtf.py
@@ -32,7 +32,7 @@ class IGMTF(Model):
    d_feat : int
        input dimension for each time step
    metric: str
-        the evaluate metric used in early stop
+        the evaluation metric used in early stop
    optimizer : str
        optimizer name
    GPU : str
--- a/qlib/contrib/model/pytorch_lstm.py
+++ b/qlib/contrib/model/pytorch_lstm.py
@@ -29,7 +29,7 @@ class LSTM(Model):
    d_feat : int
        input dimension for each time step
    metric: str
-        the evaluate metric used in early stop
+        the evaluation metric used in early stop
    optimizer : str
        optimizer name
    GPU : str
--- a/qlib/contrib/model/pytorch_lstm_ts.py
+++ b/qlib/contrib/model/pytorch_lstm_ts.py
@@ -30,7 +30,7 @@ class LSTM(Model):
    d_feat : int
        input dimension for each time step
    metric: str
-        the evaluate metric used in early stop
+        the evaluation metric used in early stop
    optimizer : str
        optimizer name
    GPU : str
--- a/qlib/contrib/model/pytorch_tcn.py
+++ b/qlib/contrib/model/pytorch_tcn.py
@@ -33,7 +33,7 @@ class TCN(Model):
    n_chans: int
        number of channels
    metric: str
-        the evaluate metric used in early stop
+        the evaluation metric used in early stop
    optimizer : str
        optimizer name
    GPU : str
--- a/qlib/contrib/model/pytorch_tcn_ts.py
+++ b/qlib/contrib/model/pytorch_tcn_ts.py
@@ -30,7 +30,7 @@ class TCN(Model):
    d_feat : int
        input dimension for each time step
    metric: str
-        the evaluate metric used in early stop
+        the evaluation metric used in early stop
    optimizer : str
        optimizer name
    GPU : str
--- a/qlib/contrib/model/pytorch_tcts.py
+++ b/qlib/contrib/model/pytorch_tcts.py
@@ -29,7 +29,7 @@ class TCTS(Model):
    d_feat : int
        input dimension for each time step
    metric: str
-        the evaluate metric used in early stop
+        the evaluation metric used in early stop
    optimizer : str
        optimizer name
    GPU : str
--- a/qlib/data/dataset/handler.py
+++ b/qlib/data/dataset/handler.py
@@ -137,7 +137,7 @@ class DataHandler(Serializable):
        # Setup data.
        # _data may be with multiple column index level. The outer level indicates the feature set name
        with TimeInspector.logt("Loading data"):
-            # make sure the fetch method is based on a index-sorted pd.DataFrame
+            # make sure the fetch method is based on an index-sorted pd.DataFrame
            self._data = lazy_sort_index(self.data_loader.load(self.instruments, self.start_time, self.end_time))
        # TODO: cache
@@ -167,7 +167,7 @@ class DataHandler(Serializable):
                - a slice range
                - pd.Index for specific indexes
-            Following conflictions may occurs
+            Following conflicts may occur
            - Does ["20200101", "20210101"] mean selecting this slice or these two days?
@@ -229,7 +229,7 @@ class DataHandler(Serializable):
        # This method is extracted for sharing in subclasses
        from .storage import BaseHandlerStorage  # pylint: disable=C0415
-        # Following conflictions may occurs
+        # Following conflicts may occur
        # - Does [20200101", "20210101"] mean selecting this slice or these two days?
        # To solve this issue
        #   - slice have higher priorities (except when level is none)
@@ -313,7 +313,7 @@ class DataHandler(Serializable):
        self, periods: int, min_periods: Optional[int] = None, **kwargs
    ) -> Iterator[Tuple[pd.Timestamp, pd.DataFrame]]:
        """
-        get a iterator of sliced data with given periods
+        get an iterator of sliced data with given periods
        Args:
            periods (int): number of periods.
@@ -412,13 +412,13 @@ class DataHandlerLP(DataHandler):
        process_type: str
            PTYPE_I = 'independent'
-            - self._infer will processed by infer_processors
+            - self._infer will be processed by infer_processors
            - self._learn will be processed by learn_processors
            PTYPE_A = 'append'
-            - self._infer will processed by infer_processors
+            - self._infer will be processed by infer_processors
            - self._learn will be processed by infer_processors + learn_processors
@@ -671,7 +671,8 @@ class DataHandlerLP(DataHandler):
    def cast(cls, handler: "DataHandlerLP") -> "DataHandlerLP":
        """
        Motivation
-        - A user create a datahandler in his customized package. Then he want to share the processed handler to other users without introduce the package dependency and complicated data processing logic.
+        - A user creates a datahandler in his customized package. Then he wants to share the processed handler to
          other users without introduce the package dependency and complicated data processing logic.
        - This class make it possible by casting the class to DataHandlerLP and only keep the processed data
        Parameters
@@ -685,7 +686,7 @@ class DataHandlerLP(DataHandler):
            the converted processed data
        """
        new_hd: DataHandlerLP = object.__new__(DataHandlerLP)
-        new_hd.from_cast = True  # add a mark for the casted instance
+        new_hd.from_cast = True  # add a mark for the cast instance
        for key in list(DataHandlerLP.ATTR_MAP.values()) + [
            "instruments",
--- a/qlib/data/dataset/storage.py
+++ b/qlib/data/dataset/storage.py
@@ -8,7 +8,8 @@ from .utils import get_level_index, fetch_df_by_index, fetch_df_by_col
 class BaseHandlerStorage:
-    """Base data storage for datahandler
+    """
    Base data storage for datahandler
    - pd.DataFrame is the default data storage format in Qlib datahandler
    - If users want to use custom data storage, they should define subclass inherited BaseHandlerStorage, and implement the following method
    """
--- a/qlib/rl/utils/log.py
+++ b/qlib/rl/utils/log.py
@@ -121,7 +121,7 @@ class LogCollector:
        """Log something with any type.
        As it's an "any" object, the only LogWriter accepting it is pickle.
-        Therefore pickle must be able to serialize it.
+        Therefore, pickle must be able to serialize it.
        """
        if loglevel < self._min_loglevel:
            return
@@ -243,7 +243,7 @@ class LogWriter(Generic[ObsType, ActType]):
        rewards
            A list of rewards at each step of this episode.
        contents
-            Logged contents for every steps.
+            Logged contents for every step.
        """
    def log_step(self, reward: float, contents: Dict[str, Any]) -> None:
@@ -285,7 +285,7 @@ class LogWriter(Generic[ObsType, ActType]):
            self.log_episode(self.episode_lengths[env_id], self.episode_rewards[env_id], self.episode_logs[env_id])
-    def on_env_reset(self, env_id: int, obs: ObsType) -> None:
+    def on_env_reset(self, env_id: int, _: ObsType) -> None:
        """Callback for finite env.
        Reset episode statistics. Nothing task-specific is logged here because of
--- a/qlib/workflow/online/manager.py
+++ b/qlib/workflow/online/manager.py
@@ -35,7 +35,7 @@ Simulation + DelayTrainer  When your models don't have any temporal dependence,
                           different time segments (based on whether or not any new model is online).
 =========================  ===================================================================================
-Here is some pseudo code the demonstrate the workflow of each situation
+Here is some pseudo code that demonstrate the workflow of each situation
 For simplicity
    - Only one strategy is used in the strategy
--- a/qlib/workflow/record_temp.py
+++ b/qlib/workflow/record_temp.py
@@ -178,7 +178,7 @@ class SignalRecord(RecordTemp):
                # The backend handler should be DataHandler
                raw_label = dataset.prepare(**params)
            except AttributeError as e:
-                # The data handler is initialize with `drop_raw=True`...
+                # The data handler is initialized with `drop_raw=True`...
                # So raw_label is not available
                logger.warning(f"Exception: {e}")
                raw_label = None
--- a/qlib/workflow/utils.py
+++ b/qlib/workflow/utils.py
@@ -18,30 +18,30 @@ def experiment_exit_handler():
    """
    Method for handling the experiment when any unusual program ending occurs.
    The `atexit` handler should be put in the last, since, as long as the program ends, it will be called.
-    Thus, if any exception or user interuption occurs beforehead, we should handle them first. Once `R` is
+    Thus, if any exception or user interruption occurs beforehand, we should handle them first. Once `R` is
    ended, another call of `R.end_exp` will not take effect.
    Limitations:
-    - If pdb is used in the your program, excepthook will not be triggered when it ends.  The status will be finished
+    - If pdb is used in your program, excepthook will not be triggered when it ends.  The status will be finished
    """
    sys.excepthook = experiment_exception_hook  # handle uncaught exception
    atexit.register(R.end_exp, recorder_status=Recorder.STATUS_FI)  # will not take effect if experiment ends
-def experiment_exception_hook(type, value, tb):
+def experiment_exception_hook(exc_type, value, tb):
    """
    End an experiment with status to be "FAILED". This exception tries to catch those uncaught exception
    and end the experiment automatically.
    Parameters
-    type: Exception type
+    exc_type: Exception type
    value: Exception's value
    tb: Exception's traceback
    """
-    logger.error(f"An exception has been raised[{type.__name__}: {value}].")
+    logger.error(f"An exception has been raised[{exc_type.__name__}: {value}].")
    # Same as original format
    traceback.print_tb(tb)
-    print(f"{type.__name__}: {value}")
+    print(f"{exc_type.__name__}: {value}")
    R.end_exp(recorder_status=Recorder.STATUS_FA)
--- a/scripts/data_collector/crowd_source/README.md
+++ b/scripts/data_collector/crowd_source/README.md
@@ -1,9 +1,9 @@
 # Crowd Source Data
 ## Initiative
-Public data source like yahoo is flawed, it might miss data for stock which is delisted and it might has data which is wrong. This can introduce survivorship bias into our training process.
+Public data source like yahoo is flawed, it might miss data for stock which is delisted and it might have data which is wrong. This can introduce survivorship bias into our training process.
-The crowd sourced data is introduced to merged data from multiple data source and cross validate against each other, so that:
+The Crowd Source Data is introduced to merged data from multiple data source and cross validate against each other, so that:
 1. We will have a more complete history record.
 2. We can identify the anomaly data and apply correction when necessary.
@@ -12,7 +12,7 @@ The raw data is hosted on dolthub repo: https://www.dolthub.com/repositories/che
 The processing script and sql is hosted on github repo: https://github.com/chenditc/investment_data
-The pakcaged docker runtime is hosted on dockerhub: https://hub.docker.com/repository/docker/chenditc/investment_data
+The packaged docker runtime is hosted on dockerhub: https://hub.docker.com/repository/docker/chenditc/investment_data
 ## How to use it in qlib
 ### Option 1: Download release bin data