From cc01812c6259dec9c89b233a3899698c8a19acb0 Mon Sep 17 00:00:00 2001 From: YQ Tsui Date: Sun, 20 Nov 2022 14:15:59 +0800 Subject: [PATCH] Fix typos and grammar errors in docstrings and comments (#1366) * fix gramma error in doc strings * fix typos in exchange.py * fix typos and gramma errors * fix typo and rename function param to avoid shading python keyword * remove redundant parathesis; pass kwargs to parent class * fix pyblack * further correction * assign -> be assigned to --- qlib/backtest/exchange.py | 26 +++++------ qlib/contrib/data/handler.py | 45 ++++++++++--------- qlib/contrib/model/pytorch_adarnn.py | 2 +- qlib/contrib/model/pytorch_add.py | 2 +- qlib/contrib/model/pytorch_alstm.py | 2 +- qlib/contrib/model/pytorch_alstm_ts.py | 2 +- qlib/contrib/model/pytorch_gats.py | 2 +- qlib/contrib/model/pytorch_gats_ts.py | 2 +- qlib/contrib/model/pytorch_gru.py | 2 +- qlib/contrib/model/pytorch_gru_ts.py | 2 +- qlib/contrib/model/pytorch_hist.py | 2 +- qlib/contrib/model/pytorch_igmtf.py | 2 +- qlib/contrib/model/pytorch_lstm.py | 2 +- qlib/contrib/model/pytorch_lstm_ts.py | 2 +- qlib/contrib/model/pytorch_tcn.py | 2 +- qlib/contrib/model/pytorch_tcn_ts.py | 2 +- qlib/contrib/model/pytorch_tcts.py | 2 +- qlib/data/dataset/handler.py | 17 +++---- qlib/data/dataset/storage.py | 3 +- qlib/rl/utils/log.py | 6 +-- qlib/workflow/online/manager.py | 2 +- qlib/workflow/record_temp.py | 2 +- qlib/workflow/utils.py | 12 ++--- scripts/data_collector/crowd_source/README.md | 6 +-- 24 files changed, 77 insertions(+), 72 deletions(-) diff --git a/qlib/backtest/exchange.py b/qlib/backtest/exchange.py index cc760be44..a2cc13623 100644 --- a/qlib/backtest/exchange.py +++ b/qlib/backtest/exchange.py @@ -27,10 +27,10 @@ from .high_performance_ds import BaseQuote, NumpyQuote class Exchange: # `quote_df` is a pd.DataFrame class that contains basic information for backtesting - # After some processing, the data will later be maintained by `quote_cls` object for faster data retriving. + # After some processing, the data will later be maintained by `quote_cls` object for faster data retrieving. # Some conventions for `quote_df` # - $close is for calculating the total value at end of each day. - # - if $close is None, the stock on that day is reguarded as suspended. + # - if $close is None, the stock on that day is regarded as suspended. # - $factor is for rounding to the trading unit; # - if any $factor is missing when $close exists, trading unit rounding will be disabled quote_df: pd.DataFrame @@ -141,7 +141,7 @@ class Exchange: if deal_price is None: deal_price = C.deal_price - # we have some verbose information here. So logging is enable + # we have some verbose information here. So logging is enabled self.logger = get_module_logger("online operator") # TODO: the quote, trade_dates, codes are not necessary. @@ -168,7 +168,7 @@ class Exchange: self.codes = codes # Necessary fields # $close is for calculating the total value at end of each day. - # - if $close is None, the stock on that day is reguarded as suspended. + # - if $close is None, the stock on that day is regarded as suspended. # $factor is for rounding to the trading unit # $change is for calculating the limit of the stock @@ -271,7 +271,7 @@ class Exchange: raise NotImplementedError(f"This type of `limit_threshold` is not supported") def _update_limit(self, limit_threshold: Union[Tuple, float, None]) -> None: - # $close is may contains NaN, the nan indicates that the stock is not tradable at that timestamp + # $close may contain NaN, the nan indicates that the stock is not tradable at that timestamp suspended = self.quote_df["$close"].isna() # check limit_threshold limit_type = self._get_limit_type(limit_threshold) @@ -356,12 +356,12 @@ class Exchange: Returns ------- - True: the trading of the stock is limted (maybe hit the highest/lowest price), hence the stock is not tradable + True: the trading of the stock is limited (maybe hit the highest/lowest price), hence the stock is not tradable False: the trading of the stock is not limited, hence the stock may be tradable """ # NOTE: # **all** is used when checking limitation. - # For example, the stock trading is limited in a day if every miniute is limited in a day if every miniute is limited. + # For example, the stock trading is limited in a day if every minute is limited in a day if every minute is limited. if direction is None: # The trading limitation is related to the trading direction # if the direction is not provided, then any limitation from buy or sell will result in trading limitation @@ -385,17 +385,17 @@ class Exchange: # is suspended if stock_id in self.quote.get_all_stock(): # suspended stocks are represented by None $close stock - # The $close may contains NaN, + # The $close may contain NaN, close = self.quote.get_data(stock_id, start_time, end_time, "$close") if close is None: # if no close record exists return True elif isinstance(close, IndexData): - # **any** non-NaN $close represents trading opportunity may exists + # **any** non-NaN $close represents trading opportunity may exist # if all returned is nan, then the stock is suspended return cast(bool, cast(IndexData, close).isna().all()) else: - # it is single value, make sure is is not None + # it is single value, make sure is not None return np.isnan(close) else: # if the stock is not in the stock list, then it is not tradable and regarded as suspended @@ -540,8 +540,8 @@ class Exchange: direction: OrderDir = OrderDir.BUY, ) -> dict: """ - The generate the target position according to the weight and the cash. - NOTE: All the cash will assigned to the tradable stock. + Generates the target position according to the weight and the cash. + NOTE: All the cash will be assigned to the tradable stock. Parameter: weight_position : dict {stock_id : weight}; allocate cash by weight_position among then, weight must be in this range: 0 < weight < 1 @@ -639,7 +639,7 @@ class Exchange: random.shuffle(sorted_ids) for stock_id in sorted_ids: - # Do not generate order for the nontradable stocks + # Do not generate order for the non-tradable stocks if not self.is_stock_tradable(stock_id=stock_id, start_time=start_time, end_time=end_time): continue diff --git a/qlib/contrib/data/handler.py b/qlib/contrib/data/handler.py index 07eb2da25..ca3ca5545 100644 --- a/qlib/contrib/data/handler.py +++ b/qlib/contrib/data/handler.py @@ -57,7 +57,7 @@ class Alpha360(DataHandlerLP): fit_end_time=None, filter_pipe=None, inst_processor=None, - **kwargs, + **kwargs ): infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time) learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time) @@ -67,7 +67,7 @@ class Alpha360(DataHandlerLP): "kwargs": { "config": { "feature": self.get_feature_config(), - "label": kwargs.get("label", self.get_label_config()), + "label": kwargs.pop("label", self.get_label_config()), }, "filter_pipe": filter_pipe, "freq": freq, @@ -82,12 +82,14 @@ class Alpha360(DataHandlerLP): data_loader=data_loader, learn_processors=learn_processors, infer_processors=infer_processors, + **kwargs ) def get_label_config(self): - return (["Ref($close, -2)/Ref($close, -1) - 1"], ["LABEL0"]) + return ["Ref($close, -2)/Ref($close, -1) - 1"], ["LABEL0"] - def get_feature_config(self): + @staticmethod + def get_feature_config(): # NOTE: # Alpha360 tries to provide a dataset with original price data # the original price data includes the prices and volume in the last 60 days. @@ -99,33 +101,33 @@ class Alpha360(DataHandlerLP): names = [] for i in range(59, 0, -1): - fields += ["Ref($close, %d)/$close" % (i)] - names += ["CLOSE%d" % (i)] + fields += ["Ref($close, %d)/$close" % i] + names += ["CLOSE%d" % i] fields += ["$close/$close"] names += ["CLOSE0"] for i in range(59, 0, -1): - fields += ["Ref($open, %d)/$close" % (i)] - names += ["OPEN%d" % (i)] + fields += ["Ref($open, %d)/$close" % i] + names += ["OPEN%d" % i] fields += ["$open/$close"] names += ["OPEN0"] for i in range(59, 0, -1): - fields += ["Ref($high, %d)/$close" % (i)] - names += ["HIGH%d" % (i)] + fields += ["Ref($high, %d)/$close" % i] + names += ["HIGH%d" % i] fields += ["$high/$close"] names += ["HIGH0"] for i in range(59, 0, -1): - fields += ["Ref($low, %d)/$close" % (i)] - names += ["LOW%d" % (i)] + fields += ["Ref($low, %d)/$close" % i] + names += ["LOW%d" % i] fields += ["$low/$close"] names += ["LOW0"] for i in range(59, 0, -1): - fields += ["Ref($vwap, %d)/$close" % (i)] - names += ["VWAP%d" % (i)] + fields += ["Ref($vwap, %d)/$close" % i] + names += ["VWAP%d" % i] fields += ["$vwap/$close"] names += ["VWAP0"] for i in range(59, 0, -1): - fields += ["Ref($volume, %d)/($volume+1e-12)" % (i)] - names += ["VOLUME%d" % (i)] + fields += ["Ref($volume, %d)/($volume+1e-12)" % i] + names += ["VOLUME%d" % i] fields += ["$volume/($volume+1e-12)"] names += ["VOLUME0"] @@ -134,7 +136,7 @@ class Alpha360(DataHandlerLP): class Alpha360vwap(Alpha360): def get_label_config(self): - return (["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["LABEL0"]) + return ["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["LABEL0"] class Alpha158(DataHandlerLP): @@ -151,7 +153,7 @@ class Alpha158(DataHandlerLP): process_type=DataHandlerLP.PTYPE_A, filter_pipe=None, inst_processor=None, - **kwargs, + **kwargs ): infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time) learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time) @@ -161,7 +163,7 @@ class Alpha158(DataHandlerLP): "kwargs": { "config": { "feature": self.get_feature_config(), - "label": kwargs.get("label", self.get_label_config()), + "label": kwargs.pop("label", self.get_label_config()), }, "filter_pipe": filter_pipe, "freq": freq, @@ -176,6 +178,7 @@ class Alpha158(DataHandlerLP): infer_processors=infer_processors, learn_processors=learn_processors, process_type=process_type, + **kwargs ) def get_feature_config(self): @@ -190,7 +193,7 @@ class Alpha158(DataHandlerLP): return self.parse_config_to_fields(conf) def get_label_config(self): - return (["Ref($close, -2)/Ref($close, -1) - 1"], ["LABEL0"]) + return ["Ref($close, -2)/Ref($close, -1) - 1"], ["LABEL0"] @staticmethod def parse_config_to_fields(config): @@ -426,4 +429,4 @@ class Alpha158(DataHandlerLP): class Alpha158vwap(Alpha158): def get_label_config(self): - return (["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["LABEL0"]) + return ["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["LABEL0"] diff --git a/qlib/contrib/model/pytorch_adarnn.py b/qlib/contrib/model/pytorch_adarnn.py index 3641bd511..7570d74e0 100644 --- a/qlib/contrib/model/pytorch_adarnn.py +++ b/qlib/contrib/model/pytorch_adarnn.py @@ -28,7 +28,7 @@ class ADARNN(Model): d_feat : int input dimension for each time step metric: str - the evaluate metric used in early stop + the evaluation metric used in early stop optimizer : str optimizer name GPU : str diff --git a/qlib/contrib/model/pytorch_add.py b/qlib/contrib/model/pytorch_add.py index b214daed3..e929fe97f 100644 --- a/qlib/contrib/model/pytorch_add.py +++ b/qlib/contrib/model/pytorch_add.py @@ -36,7 +36,7 @@ class ADD(Model): d_feat : int input dimensions for each time step metric : str - the evaluate metric used in early stop + the evaluation metric used in early stop optimizer : str optimizer name GPU : int diff --git a/qlib/contrib/model/pytorch_alstm.py b/qlib/contrib/model/pytorch_alstm.py index 13e3bf879..b0770e2bd 100644 --- a/qlib/contrib/model/pytorch_alstm.py +++ b/qlib/contrib/model/pytorch_alstm.py @@ -30,7 +30,7 @@ class ALSTM(Model): d_feat : int input dimension for each time step metric: str - the evaluate metric used in early stop + the evaluation metric used in early stop optimizer : str optimizer name GPU : int diff --git a/qlib/contrib/model/pytorch_alstm_ts.py b/qlib/contrib/model/pytorch_alstm_ts.py index 60645e2a3..3ab8ed8ab 100644 --- a/qlib/contrib/model/pytorch_alstm_ts.py +++ b/qlib/contrib/model/pytorch_alstm_ts.py @@ -33,7 +33,7 @@ class ALSTM(Model): d_feat : int input dimension for each time step metric: str - the evaluate metric used in early stop + the evaluation metric used in early stop optimizer : str optimizer name GPU : int diff --git a/qlib/contrib/model/pytorch_gats.py b/qlib/contrib/model/pytorch_gats.py index a737bddc4..127408877 100644 --- a/qlib/contrib/model/pytorch_gats.py +++ b/qlib/contrib/model/pytorch_gats.py @@ -33,7 +33,7 @@ class GATs(Model): d_feat : int input dimensions for each time step metric : str - the evaluate metric used in early stop + the evaluation metric used in early stop optimizer : str optimizer name GPU : int diff --git a/qlib/contrib/model/pytorch_gats_ts.py b/qlib/contrib/model/pytorch_gats_ts.py index e8446d868..1b75efe89 100644 --- a/qlib/contrib/model/pytorch_gats_ts.py +++ b/qlib/contrib/model/pytorch_gats_ts.py @@ -50,7 +50,7 @@ class GATs(Model): d_feat : int input dimensions for each time step metric : str - the evaluate metric used in early stop + the evaluation metric used in early stop optimizer : str optimizer name GPU : int diff --git a/qlib/contrib/model/pytorch_gru.py b/qlib/contrib/model/pytorch_gru.py index 2275b86e1..10998236b 100755 --- a/qlib/contrib/model/pytorch_gru.py +++ b/qlib/contrib/model/pytorch_gru.py @@ -30,7 +30,7 @@ class GRU(Model): d_feat : int input dimension for each time step metric: str - the evaluate metric used in early stop + the evaluation metric used in early stop optimizer : str optimizer name GPU : str diff --git a/qlib/contrib/model/pytorch_gru_ts.py b/qlib/contrib/model/pytorch_gru_ts.py index 390a66924..b588392a2 100755 --- a/qlib/contrib/model/pytorch_gru_ts.py +++ b/qlib/contrib/model/pytorch_gru_ts.py @@ -31,7 +31,7 @@ class GRU(Model): d_feat : int input dimension for each time step metric: str - the evaluate metric used in early stop + the evaluation metric used in early stop optimizer : str optimizer name GPU : str diff --git a/qlib/contrib/model/pytorch_hist.py b/qlib/contrib/model/pytorch_hist.py index 25445ba59..f7b565dc5 100644 --- a/qlib/contrib/model/pytorch_hist.py +++ b/qlib/contrib/model/pytorch_hist.py @@ -34,7 +34,7 @@ class HIST(Model): d_feat : int input dimensions for each time step metric : str - the evaluate metric used in early stop + the evaluation metric used in early stop optimizer : str optimizer name GPU : str diff --git a/qlib/contrib/model/pytorch_igmtf.py b/qlib/contrib/model/pytorch_igmtf.py index e3a07c341..d38ef9ad4 100644 --- a/qlib/contrib/model/pytorch_igmtf.py +++ b/qlib/contrib/model/pytorch_igmtf.py @@ -32,7 +32,7 @@ class IGMTF(Model): d_feat : int input dimension for each time step metric: str - the evaluate metric used in early stop + the evaluation metric used in early stop optimizer : str optimizer name GPU : str diff --git a/qlib/contrib/model/pytorch_lstm.py b/qlib/contrib/model/pytorch_lstm.py index 494fd4a0e..a68cf5eac 100755 --- a/qlib/contrib/model/pytorch_lstm.py +++ b/qlib/contrib/model/pytorch_lstm.py @@ -29,7 +29,7 @@ class LSTM(Model): d_feat : int input dimension for each time step metric: str - the evaluate metric used in early stop + the evaluation metric used in early stop optimizer : str optimizer name GPU : str diff --git a/qlib/contrib/model/pytorch_lstm_ts.py b/qlib/contrib/model/pytorch_lstm_ts.py index e703130fb..f1a3c55e8 100755 --- a/qlib/contrib/model/pytorch_lstm_ts.py +++ b/qlib/contrib/model/pytorch_lstm_ts.py @@ -30,7 +30,7 @@ class LSTM(Model): d_feat : int input dimension for each time step metric: str - the evaluate metric used in early stop + the evaluation metric used in early stop optimizer : str optimizer name GPU : str diff --git a/qlib/contrib/model/pytorch_tcn.py b/qlib/contrib/model/pytorch_tcn.py index 8c40683fe..2af7a04ea 100755 --- a/qlib/contrib/model/pytorch_tcn.py +++ b/qlib/contrib/model/pytorch_tcn.py @@ -33,7 +33,7 @@ class TCN(Model): n_chans: int number of channels metric: str - the evaluate metric used in early stop + the evaluation metric used in early stop optimizer : str optimizer name GPU : str diff --git a/qlib/contrib/model/pytorch_tcn_ts.py b/qlib/contrib/model/pytorch_tcn_ts.py index 13c125d27..4972a3065 100755 --- a/qlib/contrib/model/pytorch_tcn_ts.py +++ b/qlib/contrib/model/pytorch_tcn_ts.py @@ -30,7 +30,7 @@ class TCN(Model): d_feat : int input dimension for each time step metric: str - the evaluate metric used in early stop + the evaluation metric used in early stop optimizer : str optimizer name GPU : str diff --git a/qlib/contrib/model/pytorch_tcts.py b/qlib/contrib/model/pytorch_tcts.py index 4f87e5f1e..b46835cb6 100644 --- a/qlib/contrib/model/pytorch_tcts.py +++ b/qlib/contrib/model/pytorch_tcts.py @@ -29,7 +29,7 @@ class TCTS(Model): d_feat : int input dimension for each time step metric: str - the evaluate metric used in early stop + the evaluation metric used in early stop optimizer : str optimizer name GPU : str diff --git a/qlib/data/dataset/handler.py b/qlib/data/dataset/handler.py index 5d73ac6ce..7815445c1 100644 --- a/qlib/data/dataset/handler.py +++ b/qlib/data/dataset/handler.py @@ -137,7 +137,7 @@ class DataHandler(Serializable): # Setup data. # _data may be with multiple column index level. The outer level indicates the feature set name with TimeInspector.logt("Loading data"): - # make sure the fetch method is based on a index-sorted pd.DataFrame + # make sure the fetch method is based on an index-sorted pd.DataFrame self._data = lazy_sort_index(self.data_loader.load(self.instruments, self.start_time, self.end_time)) # TODO: cache @@ -167,7 +167,7 @@ class DataHandler(Serializable): - a slice range - pd.Index for specific indexes - Following conflictions may occurs + Following conflicts may occur - Does ["20200101", "20210101"] mean selecting this slice or these two days? @@ -229,7 +229,7 @@ class DataHandler(Serializable): # This method is extracted for sharing in subclasses from .storage import BaseHandlerStorage # pylint: disable=C0415 - # Following conflictions may occurs + # Following conflicts may occur # - Does [20200101", "20210101"] mean selecting this slice or these two days? # To solve this issue # - slice have higher priorities (except when level is none) @@ -313,7 +313,7 @@ class DataHandler(Serializable): self, periods: int, min_periods: Optional[int] = None, **kwargs ) -> Iterator[Tuple[pd.Timestamp, pd.DataFrame]]: """ - get a iterator of sliced data with given periods + get an iterator of sliced data with given periods Args: periods (int): number of periods. @@ -412,13 +412,13 @@ class DataHandlerLP(DataHandler): process_type: str PTYPE_I = 'independent' - - self._infer will processed by infer_processors + - self._infer will be processed by infer_processors - self._learn will be processed by learn_processors PTYPE_A = 'append' - - self._infer will processed by infer_processors + - self._infer will be processed by infer_processors - self._learn will be processed by infer_processors + learn_processors @@ -671,7 +671,8 @@ class DataHandlerLP(DataHandler): def cast(cls, handler: "DataHandlerLP") -> "DataHandlerLP": """ Motivation - - A user create a datahandler in his customized package. Then he want to share the processed handler to other users without introduce the package dependency and complicated data processing logic. + - A user creates a datahandler in his customized package. Then he wants to share the processed handler to + other users without introduce the package dependency and complicated data processing logic. - This class make it possible by casting the class to DataHandlerLP and only keep the processed data Parameters @@ -685,7 +686,7 @@ class DataHandlerLP(DataHandler): the converted processed data """ new_hd: DataHandlerLP = object.__new__(DataHandlerLP) - new_hd.from_cast = True # add a mark for the casted instance + new_hd.from_cast = True # add a mark for the cast instance for key in list(DataHandlerLP.ATTR_MAP.values()) + [ "instruments", diff --git a/qlib/data/dataset/storage.py b/qlib/data/dataset/storage.py index a8ccdadaa..49afef912 100644 --- a/qlib/data/dataset/storage.py +++ b/qlib/data/dataset/storage.py @@ -8,7 +8,8 @@ from .utils import get_level_index, fetch_df_by_index, fetch_df_by_col class BaseHandlerStorage: - """Base data storage for datahandler + """ + Base data storage for datahandler - pd.DataFrame is the default data storage format in Qlib datahandler - If users want to use custom data storage, they should define subclass inherited BaseHandlerStorage, and implement the following method """ diff --git a/qlib/rl/utils/log.py b/qlib/rl/utils/log.py index e15bf7b54..2a113e47c 100644 --- a/qlib/rl/utils/log.py +++ b/qlib/rl/utils/log.py @@ -121,7 +121,7 @@ class LogCollector: """Log something with any type. As it's an "any" object, the only LogWriter accepting it is pickle. - Therefore pickle must be able to serialize it. + Therefore, pickle must be able to serialize it. """ if loglevel < self._min_loglevel: return @@ -243,7 +243,7 @@ class LogWriter(Generic[ObsType, ActType]): rewards A list of rewards at each step of this episode. contents - Logged contents for every steps. + Logged contents for every step. """ def log_step(self, reward: float, contents: Dict[str, Any]) -> None: @@ -285,7 +285,7 @@ class LogWriter(Generic[ObsType, ActType]): self.log_episode(self.episode_lengths[env_id], self.episode_rewards[env_id], self.episode_logs[env_id]) - def on_env_reset(self, env_id: int, obs: ObsType) -> None: + def on_env_reset(self, env_id: int, _: ObsType) -> None: """Callback for finite env. Reset episode statistics. Nothing task-specific is logged here because of diff --git a/qlib/workflow/online/manager.py b/qlib/workflow/online/manager.py index 9a085ace5..35e73821c 100644 --- a/qlib/workflow/online/manager.py +++ b/qlib/workflow/online/manager.py @@ -35,7 +35,7 @@ Simulation + DelayTrainer When your models don't have any temporal dependence, different time segments (based on whether or not any new model is online). ========================= =================================================================================== -Here is some pseudo code the demonstrate the workflow of each situation +Here is some pseudo code that demonstrate the workflow of each situation For simplicity - Only one strategy is used in the strategy diff --git a/qlib/workflow/record_temp.py b/qlib/workflow/record_temp.py index 283148210..ffda529da 100644 --- a/qlib/workflow/record_temp.py +++ b/qlib/workflow/record_temp.py @@ -178,7 +178,7 @@ class SignalRecord(RecordTemp): # The backend handler should be DataHandler raw_label = dataset.prepare(**params) except AttributeError as e: - # The data handler is initialize with `drop_raw=True`... + # The data handler is initialized with `drop_raw=True`... # So raw_label is not available logger.warning(f"Exception: {e}") raw_label = None diff --git a/qlib/workflow/utils.py b/qlib/workflow/utils.py index 5a2f28d23..0f48c74f0 100644 --- a/qlib/workflow/utils.py +++ b/qlib/workflow/utils.py @@ -18,30 +18,30 @@ def experiment_exit_handler(): """ Method for handling the experiment when any unusual program ending occurs. The `atexit` handler should be put in the last, since, as long as the program ends, it will be called. - Thus, if any exception or user interuption occurs beforehead, we should handle them first. Once `R` is + Thus, if any exception or user interruption occurs beforehand, we should handle them first. Once `R` is ended, another call of `R.end_exp` will not take effect. Limitations: - - If pdb is used in the your program, excepthook will not be triggered when it ends. The status will be finished + - If pdb is used in your program, excepthook will not be triggered when it ends. The status will be finished """ sys.excepthook = experiment_exception_hook # handle uncaught exception atexit.register(R.end_exp, recorder_status=Recorder.STATUS_FI) # will not take effect if experiment ends -def experiment_exception_hook(type, value, tb): +def experiment_exception_hook(exc_type, value, tb): """ End an experiment with status to be "FAILED". This exception tries to catch those uncaught exception and end the experiment automatically. Parameters - type: Exception type + exc_type: Exception type value: Exception's value tb: Exception's traceback """ - logger.error(f"An exception has been raised[{type.__name__}: {value}].") + logger.error(f"An exception has been raised[{exc_type.__name__}: {value}].") # Same as original format traceback.print_tb(tb) - print(f"{type.__name__}: {value}") + print(f"{exc_type.__name__}: {value}") R.end_exp(recorder_status=Recorder.STATUS_FA) diff --git a/scripts/data_collector/crowd_source/README.md b/scripts/data_collector/crowd_source/README.md index 14ddab154..cdf36564b 100644 --- a/scripts/data_collector/crowd_source/README.md +++ b/scripts/data_collector/crowd_source/README.md @@ -1,9 +1,9 @@ # Crowd Source Data ## Initiative -Public data source like yahoo is flawed, it might miss data for stock which is delisted and it might has data which is wrong. This can introduce survivorship bias into our training process. +Public data source like yahoo is flawed, it might miss data for stock which is delisted and it might have data which is wrong. This can introduce survivorship bias into our training process. -The crowd sourced data is introduced to merged data from multiple data source and cross validate against each other, so that: +The Crowd Source Data is introduced to merged data from multiple data source and cross validate against each other, so that: 1. We will have a more complete history record. 2. We can identify the anomaly data and apply correction when necessary. @@ -12,7 +12,7 @@ The raw data is hosted on dolthub repo: https://www.dolthub.com/repositories/che The processing script and sql is hosted on github repo: https://github.com/chenditc/investment_data -The pakcaged docker runtime is hosted on dockerhub: https://hub.docker.com/repository/docker/chenditc/investment_data +The packaged docker runtime is hosted on dockerhub: https://hub.docker.com/repository/docker/chenditc/investment_data ## How to use it in qlib ### Option 1: Download release bin data