1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00

Fix typos and grammar errors in docstrings and comments (#1366)

* fix gramma error in doc strings

* fix typos in exchange.py

* fix typos and gramma errors

* fix typo and rename function param to avoid shading python keyword

* remove redundant parathesis; pass kwargs to parent class

* fix pyblack

* further correction

* assign -> be assigned to
This commit is contained in:
YQ Tsui
2022-11-20 14:15:59 +08:00
committed by GitHub
parent 0c4db8b0f8
commit cc01812c62
24 changed files with 77 additions and 72 deletions

View File

@@ -27,10 +27,10 @@ from .high_performance_ds import BaseQuote, NumpyQuote
class Exchange: class Exchange:
# `quote_df` is a pd.DataFrame class that contains basic information for backtesting # `quote_df` is a pd.DataFrame class that contains basic information for backtesting
# After some processing, the data will later be maintained by `quote_cls` object for faster data retriving. # After some processing, the data will later be maintained by `quote_cls` object for faster data retrieving.
# Some conventions for `quote_df` # Some conventions for `quote_df`
# - $close is for calculating the total value at end of each day. # - $close is for calculating the total value at end of each day.
# - if $close is None, the stock on that day is reguarded as suspended. # - if $close is None, the stock on that day is regarded as suspended.
# - $factor is for rounding to the trading unit; # - $factor is for rounding to the trading unit;
# - if any $factor is missing when $close exists, trading unit rounding will be disabled # - if any $factor is missing when $close exists, trading unit rounding will be disabled
quote_df: pd.DataFrame quote_df: pd.DataFrame
@@ -141,7 +141,7 @@ class Exchange:
if deal_price is None: if deal_price is None:
deal_price = C.deal_price deal_price = C.deal_price
# we have some verbose information here. So logging is enable # we have some verbose information here. So logging is enabled
self.logger = get_module_logger("online operator") self.logger = get_module_logger("online operator")
# TODO: the quote, trade_dates, codes are not necessary. # TODO: the quote, trade_dates, codes are not necessary.
@@ -168,7 +168,7 @@ class Exchange:
self.codes = codes self.codes = codes
# Necessary fields # Necessary fields
# $close is for calculating the total value at end of each day. # $close is for calculating the total value at end of each day.
# - if $close is None, the stock on that day is reguarded as suspended. # - if $close is None, the stock on that day is regarded as suspended.
# $factor is for rounding to the trading unit # $factor is for rounding to the trading unit
# $change is for calculating the limit of the stock # $change is for calculating the limit of the stock
@@ -271,7 +271,7 @@ class Exchange:
raise NotImplementedError(f"This type of `limit_threshold` is not supported") raise NotImplementedError(f"This type of `limit_threshold` is not supported")
def _update_limit(self, limit_threshold: Union[Tuple, float, None]) -> None: def _update_limit(self, limit_threshold: Union[Tuple, float, None]) -> None:
# $close is may contains NaN, the nan indicates that the stock is not tradable at that timestamp # $close may contain NaN, the nan indicates that the stock is not tradable at that timestamp
suspended = self.quote_df["$close"].isna() suspended = self.quote_df["$close"].isna()
# check limit_threshold # check limit_threshold
limit_type = self._get_limit_type(limit_threshold) limit_type = self._get_limit_type(limit_threshold)
@@ -356,12 +356,12 @@ class Exchange:
Returns Returns
------- -------
True: the trading of the stock is limted (maybe hit the highest/lowest price), hence the stock is not tradable True: the trading of the stock is limited (maybe hit the highest/lowest price), hence the stock is not tradable
False: the trading of the stock is not limited, hence the stock may be tradable False: the trading of the stock is not limited, hence the stock may be tradable
""" """
# NOTE: # NOTE:
# **all** is used when checking limitation. # **all** is used when checking limitation.
# For example, the stock trading is limited in a day if every miniute is limited in a day if every miniute is limited. # For example, the stock trading is limited in a day if every minute is limited in a day if every minute is limited.
if direction is None: if direction is None:
# The trading limitation is related to the trading direction # The trading limitation is related to the trading direction
# if the direction is not provided, then any limitation from buy or sell will result in trading limitation # if the direction is not provided, then any limitation from buy or sell will result in trading limitation
@@ -385,17 +385,17 @@ class Exchange:
# is suspended # is suspended
if stock_id in self.quote.get_all_stock(): if stock_id in self.quote.get_all_stock():
# suspended stocks are represented by None $close stock # suspended stocks are represented by None $close stock
# The $close may contains NaN, # The $close may contain NaN,
close = self.quote.get_data(stock_id, start_time, end_time, "$close") close = self.quote.get_data(stock_id, start_time, end_time, "$close")
if close is None: if close is None:
# if no close record exists # if no close record exists
return True return True
elif isinstance(close, IndexData): elif isinstance(close, IndexData):
# **any** non-NaN $close represents trading opportunity may exists # **any** non-NaN $close represents trading opportunity may exist
# if all returned is nan, then the stock is suspended # if all returned is nan, then the stock is suspended
return cast(bool, cast(IndexData, close).isna().all()) return cast(bool, cast(IndexData, close).isna().all())
else: else:
# it is single value, make sure is is not None # it is single value, make sure is not None
return np.isnan(close) return np.isnan(close)
else: else:
# if the stock is not in the stock list, then it is not tradable and regarded as suspended # if the stock is not in the stock list, then it is not tradable and regarded as suspended
@@ -540,8 +540,8 @@ class Exchange:
direction: OrderDir = OrderDir.BUY, direction: OrderDir = OrderDir.BUY,
) -> dict: ) -> dict:
""" """
The generate the target position according to the weight and the cash. Generates the target position according to the weight and the cash.
NOTE: All the cash will assigned to the tradable stock. NOTE: All the cash will be assigned to the tradable stock.
Parameter: Parameter:
weight_position : dict {stock_id : weight}; allocate cash by weight_position weight_position : dict {stock_id : weight}; allocate cash by weight_position
among then, weight must be in this range: 0 < weight < 1 among then, weight must be in this range: 0 < weight < 1
@@ -639,7 +639,7 @@ class Exchange:
random.shuffle(sorted_ids) random.shuffle(sorted_ids)
for stock_id in sorted_ids: for stock_id in sorted_ids:
# Do not generate order for the nontradable stocks # Do not generate order for the non-tradable stocks
if not self.is_stock_tradable(stock_id=stock_id, start_time=start_time, end_time=end_time): if not self.is_stock_tradable(stock_id=stock_id, start_time=start_time, end_time=end_time):
continue continue

View File

@@ -57,7 +57,7 @@ class Alpha360(DataHandlerLP):
fit_end_time=None, fit_end_time=None,
filter_pipe=None, filter_pipe=None,
inst_processor=None, inst_processor=None,
**kwargs, **kwargs
): ):
infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time) infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time) learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
@@ -67,7 +67,7 @@ class Alpha360(DataHandlerLP):
"kwargs": { "kwargs": {
"config": { "config": {
"feature": self.get_feature_config(), "feature": self.get_feature_config(),
"label": kwargs.get("label", self.get_label_config()), "label": kwargs.pop("label", self.get_label_config()),
}, },
"filter_pipe": filter_pipe, "filter_pipe": filter_pipe,
"freq": freq, "freq": freq,
@@ -82,12 +82,14 @@ class Alpha360(DataHandlerLP):
data_loader=data_loader, data_loader=data_loader,
learn_processors=learn_processors, learn_processors=learn_processors,
infer_processors=infer_processors, infer_processors=infer_processors,
**kwargs
) )
def get_label_config(self): def get_label_config(self):
return (["Ref($close, -2)/Ref($close, -1) - 1"], ["LABEL0"]) return ["Ref($close, -2)/Ref($close, -1) - 1"], ["LABEL0"]
def get_feature_config(self): @staticmethod
def get_feature_config():
# NOTE: # NOTE:
# Alpha360 tries to provide a dataset with original price data # Alpha360 tries to provide a dataset with original price data
# the original price data includes the prices and volume in the last 60 days. # the original price data includes the prices and volume in the last 60 days.
@@ -99,33 +101,33 @@ class Alpha360(DataHandlerLP):
names = [] names = []
for i in range(59, 0, -1): for i in range(59, 0, -1):
fields += ["Ref($close, %d)/$close" % (i)] fields += ["Ref($close, %d)/$close" % i]
names += ["CLOSE%d" % (i)] names += ["CLOSE%d" % i]
fields += ["$close/$close"] fields += ["$close/$close"]
names += ["CLOSE0"] names += ["CLOSE0"]
for i in range(59, 0, -1): for i in range(59, 0, -1):
fields += ["Ref($open, %d)/$close" % (i)] fields += ["Ref($open, %d)/$close" % i]
names += ["OPEN%d" % (i)] names += ["OPEN%d" % i]
fields += ["$open/$close"] fields += ["$open/$close"]
names += ["OPEN0"] names += ["OPEN0"]
for i in range(59, 0, -1): for i in range(59, 0, -1):
fields += ["Ref($high, %d)/$close" % (i)] fields += ["Ref($high, %d)/$close" % i]
names += ["HIGH%d" % (i)] names += ["HIGH%d" % i]
fields += ["$high/$close"] fields += ["$high/$close"]
names += ["HIGH0"] names += ["HIGH0"]
for i in range(59, 0, -1): for i in range(59, 0, -1):
fields += ["Ref($low, %d)/$close" % (i)] fields += ["Ref($low, %d)/$close" % i]
names += ["LOW%d" % (i)] names += ["LOW%d" % i]
fields += ["$low/$close"] fields += ["$low/$close"]
names += ["LOW0"] names += ["LOW0"]
for i in range(59, 0, -1): for i in range(59, 0, -1):
fields += ["Ref($vwap, %d)/$close" % (i)] fields += ["Ref($vwap, %d)/$close" % i]
names += ["VWAP%d" % (i)] names += ["VWAP%d" % i]
fields += ["$vwap/$close"] fields += ["$vwap/$close"]
names += ["VWAP0"] names += ["VWAP0"]
for i in range(59, 0, -1): for i in range(59, 0, -1):
fields += ["Ref($volume, %d)/($volume+1e-12)" % (i)] fields += ["Ref($volume, %d)/($volume+1e-12)" % i]
names += ["VOLUME%d" % (i)] names += ["VOLUME%d" % i]
fields += ["$volume/($volume+1e-12)"] fields += ["$volume/($volume+1e-12)"]
names += ["VOLUME0"] names += ["VOLUME0"]
@@ -134,7 +136,7 @@ class Alpha360(DataHandlerLP):
class Alpha360vwap(Alpha360): class Alpha360vwap(Alpha360):
def get_label_config(self): def get_label_config(self):
return (["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["LABEL0"]) return ["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["LABEL0"]
class Alpha158(DataHandlerLP): class Alpha158(DataHandlerLP):
@@ -151,7 +153,7 @@ class Alpha158(DataHandlerLP):
process_type=DataHandlerLP.PTYPE_A, process_type=DataHandlerLP.PTYPE_A,
filter_pipe=None, filter_pipe=None,
inst_processor=None, inst_processor=None,
**kwargs, **kwargs
): ):
infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time) infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time) learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
@@ -161,7 +163,7 @@ class Alpha158(DataHandlerLP):
"kwargs": { "kwargs": {
"config": { "config": {
"feature": self.get_feature_config(), "feature": self.get_feature_config(),
"label": kwargs.get("label", self.get_label_config()), "label": kwargs.pop("label", self.get_label_config()),
}, },
"filter_pipe": filter_pipe, "filter_pipe": filter_pipe,
"freq": freq, "freq": freq,
@@ -176,6 +178,7 @@ class Alpha158(DataHandlerLP):
infer_processors=infer_processors, infer_processors=infer_processors,
learn_processors=learn_processors, learn_processors=learn_processors,
process_type=process_type, process_type=process_type,
**kwargs
) )
def get_feature_config(self): def get_feature_config(self):
@@ -190,7 +193,7 @@ class Alpha158(DataHandlerLP):
return self.parse_config_to_fields(conf) return self.parse_config_to_fields(conf)
def get_label_config(self): def get_label_config(self):
return (["Ref($close, -2)/Ref($close, -1) - 1"], ["LABEL0"]) return ["Ref($close, -2)/Ref($close, -1) - 1"], ["LABEL0"]
@staticmethod @staticmethod
def parse_config_to_fields(config): def parse_config_to_fields(config):
@@ -426,4 +429,4 @@ class Alpha158(DataHandlerLP):
class Alpha158vwap(Alpha158): class Alpha158vwap(Alpha158):
def get_label_config(self): def get_label_config(self):
return (["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["LABEL0"]) return ["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["LABEL0"]

View File

@@ -28,7 +28,7 @@ class ADARNN(Model):
d_feat : int d_feat : int
input dimension for each time step input dimension for each time step
metric: str metric: str
the evaluate metric used in early stop the evaluation metric used in early stop
optimizer : str optimizer : str
optimizer name optimizer name
GPU : str GPU : str

View File

@@ -36,7 +36,7 @@ class ADD(Model):
d_feat : int d_feat : int
input dimensions for each time step input dimensions for each time step
metric : str metric : str
the evaluate metric used in early stop the evaluation metric used in early stop
optimizer : str optimizer : str
optimizer name optimizer name
GPU : int GPU : int

View File

@@ -30,7 +30,7 @@ class ALSTM(Model):
d_feat : int d_feat : int
input dimension for each time step input dimension for each time step
metric: str metric: str
the evaluate metric used in early stop the evaluation metric used in early stop
optimizer : str optimizer : str
optimizer name optimizer name
GPU : int GPU : int

View File

@@ -33,7 +33,7 @@ class ALSTM(Model):
d_feat : int d_feat : int
input dimension for each time step input dimension for each time step
metric: str metric: str
the evaluate metric used in early stop the evaluation metric used in early stop
optimizer : str optimizer : str
optimizer name optimizer name
GPU : int GPU : int

View File

@@ -33,7 +33,7 @@ class GATs(Model):
d_feat : int d_feat : int
input dimensions for each time step input dimensions for each time step
metric : str metric : str
the evaluate metric used in early stop the evaluation metric used in early stop
optimizer : str optimizer : str
optimizer name optimizer name
GPU : int GPU : int

View File

@@ -50,7 +50,7 @@ class GATs(Model):
d_feat : int d_feat : int
input dimensions for each time step input dimensions for each time step
metric : str metric : str
the evaluate metric used in early stop the evaluation metric used in early stop
optimizer : str optimizer : str
optimizer name optimizer name
GPU : int GPU : int

View File

@@ -30,7 +30,7 @@ class GRU(Model):
d_feat : int d_feat : int
input dimension for each time step input dimension for each time step
metric: str metric: str
the evaluate metric used in early stop the evaluation metric used in early stop
optimizer : str optimizer : str
optimizer name optimizer name
GPU : str GPU : str

View File

@@ -31,7 +31,7 @@ class GRU(Model):
d_feat : int d_feat : int
input dimension for each time step input dimension for each time step
metric: str metric: str
the evaluate metric used in early stop the evaluation metric used in early stop
optimizer : str optimizer : str
optimizer name optimizer name
GPU : str GPU : str

View File

@@ -34,7 +34,7 @@ class HIST(Model):
d_feat : int d_feat : int
input dimensions for each time step input dimensions for each time step
metric : str metric : str
the evaluate metric used in early stop the evaluation metric used in early stop
optimizer : str optimizer : str
optimizer name optimizer name
GPU : str GPU : str

View File

@@ -32,7 +32,7 @@ class IGMTF(Model):
d_feat : int d_feat : int
input dimension for each time step input dimension for each time step
metric: str metric: str
the evaluate metric used in early stop the evaluation metric used in early stop
optimizer : str optimizer : str
optimizer name optimizer name
GPU : str GPU : str

View File

@@ -29,7 +29,7 @@ class LSTM(Model):
d_feat : int d_feat : int
input dimension for each time step input dimension for each time step
metric: str metric: str
the evaluate metric used in early stop the evaluation metric used in early stop
optimizer : str optimizer : str
optimizer name optimizer name
GPU : str GPU : str

View File

@@ -30,7 +30,7 @@ class LSTM(Model):
d_feat : int d_feat : int
input dimension for each time step input dimension for each time step
metric: str metric: str
the evaluate metric used in early stop the evaluation metric used in early stop
optimizer : str optimizer : str
optimizer name optimizer name
GPU : str GPU : str

View File

@@ -33,7 +33,7 @@ class TCN(Model):
n_chans: int n_chans: int
number of channels number of channels
metric: str metric: str
the evaluate metric used in early stop the evaluation metric used in early stop
optimizer : str optimizer : str
optimizer name optimizer name
GPU : str GPU : str

View File

@@ -30,7 +30,7 @@ class TCN(Model):
d_feat : int d_feat : int
input dimension for each time step input dimension for each time step
metric: str metric: str
the evaluate metric used in early stop the evaluation metric used in early stop
optimizer : str optimizer : str
optimizer name optimizer name
GPU : str GPU : str

View File

@@ -29,7 +29,7 @@ class TCTS(Model):
d_feat : int d_feat : int
input dimension for each time step input dimension for each time step
metric: str metric: str
the evaluate metric used in early stop the evaluation metric used in early stop
optimizer : str optimizer : str
optimizer name optimizer name
GPU : str GPU : str

View File

@@ -137,7 +137,7 @@ class DataHandler(Serializable):
# Setup data. # Setup data.
# _data may be with multiple column index level. The outer level indicates the feature set name # _data may be with multiple column index level. The outer level indicates the feature set name
with TimeInspector.logt("Loading data"): with TimeInspector.logt("Loading data"):
# make sure the fetch method is based on a index-sorted pd.DataFrame # make sure the fetch method is based on an index-sorted pd.DataFrame
self._data = lazy_sort_index(self.data_loader.load(self.instruments, self.start_time, self.end_time)) self._data = lazy_sort_index(self.data_loader.load(self.instruments, self.start_time, self.end_time))
# TODO: cache # TODO: cache
@@ -167,7 +167,7 @@ class DataHandler(Serializable):
- a slice range - a slice range
- pd.Index for specific indexes - pd.Index for specific indexes
Following conflictions may occurs Following conflicts may occur
- Does ["20200101", "20210101"] mean selecting this slice or these two days? - Does ["20200101", "20210101"] mean selecting this slice or these two days?
@@ -229,7 +229,7 @@ class DataHandler(Serializable):
# This method is extracted for sharing in subclasses # This method is extracted for sharing in subclasses
from .storage import BaseHandlerStorage # pylint: disable=C0415 from .storage import BaseHandlerStorage # pylint: disable=C0415
# Following conflictions may occurs # Following conflicts may occur
# - Does [20200101", "20210101"] mean selecting this slice or these two days? # - Does [20200101", "20210101"] mean selecting this slice or these two days?
# To solve this issue # To solve this issue
# - slice have higher priorities (except when level is none) # - slice have higher priorities (except when level is none)
@@ -313,7 +313,7 @@ class DataHandler(Serializable):
self, periods: int, min_periods: Optional[int] = None, **kwargs self, periods: int, min_periods: Optional[int] = None, **kwargs
) -> Iterator[Tuple[pd.Timestamp, pd.DataFrame]]: ) -> Iterator[Tuple[pd.Timestamp, pd.DataFrame]]:
""" """
get a iterator of sliced data with given periods get an iterator of sliced data with given periods
Args: Args:
periods (int): number of periods. periods (int): number of periods.
@@ -412,13 +412,13 @@ class DataHandlerLP(DataHandler):
process_type: str process_type: str
PTYPE_I = 'independent' PTYPE_I = 'independent'
- self._infer will processed by infer_processors - self._infer will be processed by infer_processors
- self._learn will be processed by learn_processors - self._learn will be processed by learn_processors
PTYPE_A = 'append' PTYPE_A = 'append'
- self._infer will processed by infer_processors - self._infer will be processed by infer_processors
- self._learn will be processed by infer_processors + learn_processors - self._learn will be processed by infer_processors + learn_processors
@@ -671,7 +671,8 @@ class DataHandlerLP(DataHandler):
def cast(cls, handler: "DataHandlerLP") -> "DataHandlerLP": def cast(cls, handler: "DataHandlerLP") -> "DataHandlerLP":
""" """
Motivation Motivation
- A user create a datahandler in his customized package. Then he want to share the processed handler to other users without introduce the package dependency and complicated data processing logic. - A user creates a datahandler in his customized package. Then he wants to share the processed handler to
other users without introduce the package dependency and complicated data processing logic.
- This class make it possible by casting the class to DataHandlerLP and only keep the processed data - This class make it possible by casting the class to DataHandlerLP and only keep the processed data
Parameters Parameters
@@ -685,7 +686,7 @@ class DataHandlerLP(DataHandler):
the converted processed data the converted processed data
""" """
new_hd: DataHandlerLP = object.__new__(DataHandlerLP) new_hd: DataHandlerLP = object.__new__(DataHandlerLP)
new_hd.from_cast = True # add a mark for the casted instance new_hd.from_cast = True # add a mark for the cast instance
for key in list(DataHandlerLP.ATTR_MAP.values()) + [ for key in list(DataHandlerLP.ATTR_MAP.values()) + [
"instruments", "instruments",

View File

@@ -8,7 +8,8 @@ from .utils import get_level_index, fetch_df_by_index, fetch_df_by_col
class BaseHandlerStorage: class BaseHandlerStorage:
"""Base data storage for datahandler """
Base data storage for datahandler
- pd.DataFrame is the default data storage format in Qlib datahandler - pd.DataFrame is the default data storage format in Qlib datahandler
- If users want to use custom data storage, they should define subclass inherited BaseHandlerStorage, and implement the following method - If users want to use custom data storage, they should define subclass inherited BaseHandlerStorage, and implement the following method
""" """

View File

@@ -121,7 +121,7 @@ class LogCollector:
"""Log something with any type. """Log something with any type.
As it's an "any" object, the only LogWriter accepting it is pickle. As it's an "any" object, the only LogWriter accepting it is pickle.
Therefore pickle must be able to serialize it. Therefore, pickle must be able to serialize it.
""" """
if loglevel < self._min_loglevel: if loglevel < self._min_loglevel:
return return
@@ -243,7 +243,7 @@ class LogWriter(Generic[ObsType, ActType]):
rewards rewards
A list of rewards at each step of this episode. A list of rewards at each step of this episode.
contents contents
Logged contents for every steps. Logged contents for every step.
""" """
def log_step(self, reward: float, contents: Dict[str, Any]) -> None: def log_step(self, reward: float, contents: Dict[str, Any]) -> None:
@@ -285,7 +285,7 @@ class LogWriter(Generic[ObsType, ActType]):
self.log_episode(self.episode_lengths[env_id], self.episode_rewards[env_id], self.episode_logs[env_id]) self.log_episode(self.episode_lengths[env_id], self.episode_rewards[env_id], self.episode_logs[env_id])
def on_env_reset(self, env_id: int, obs: ObsType) -> None: def on_env_reset(self, env_id: int, _: ObsType) -> None:
"""Callback for finite env. """Callback for finite env.
Reset episode statistics. Nothing task-specific is logged here because of Reset episode statistics. Nothing task-specific is logged here because of

View File

@@ -35,7 +35,7 @@ Simulation + DelayTrainer When your models don't have any temporal dependence,
different time segments (based on whether or not any new model is online). different time segments (based on whether or not any new model is online).
========================= =================================================================================== ========================= ===================================================================================
Here is some pseudo code the demonstrate the workflow of each situation Here is some pseudo code that demonstrate the workflow of each situation
For simplicity For simplicity
- Only one strategy is used in the strategy - Only one strategy is used in the strategy

View File

@@ -178,7 +178,7 @@ class SignalRecord(RecordTemp):
# The backend handler should be DataHandler # The backend handler should be DataHandler
raw_label = dataset.prepare(**params) raw_label = dataset.prepare(**params)
except AttributeError as e: except AttributeError as e:
# The data handler is initialize with `drop_raw=True`... # The data handler is initialized with `drop_raw=True`...
# So raw_label is not available # So raw_label is not available
logger.warning(f"Exception: {e}") logger.warning(f"Exception: {e}")
raw_label = None raw_label = None

View File

@@ -18,30 +18,30 @@ def experiment_exit_handler():
""" """
Method for handling the experiment when any unusual program ending occurs. Method for handling the experiment when any unusual program ending occurs.
The `atexit` handler should be put in the last, since, as long as the program ends, it will be called. The `atexit` handler should be put in the last, since, as long as the program ends, it will be called.
Thus, if any exception or user interuption occurs beforehead, we should handle them first. Once `R` is Thus, if any exception or user interruption occurs beforehand, we should handle them first. Once `R` is
ended, another call of `R.end_exp` will not take effect. ended, another call of `R.end_exp` will not take effect.
Limitations: Limitations:
- If pdb is used in the your program, excepthook will not be triggered when it ends. The status will be finished - If pdb is used in your program, excepthook will not be triggered when it ends. The status will be finished
""" """
sys.excepthook = experiment_exception_hook # handle uncaught exception sys.excepthook = experiment_exception_hook # handle uncaught exception
atexit.register(R.end_exp, recorder_status=Recorder.STATUS_FI) # will not take effect if experiment ends atexit.register(R.end_exp, recorder_status=Recorder.STATUS_FI) # will not take effect if experiment ends
def experiment_exception_hook(type, value, tb): def experiment_exception_hook(exc_type, value, tb):
""" """
End an experiment with status to be "FAILED". This exception tries to catch those uncaught exception End an experiment with status to be "FAILED". This exception tries to catch those uncaught exception
and end the experiment automatically. and end the experiment automatically.
Parameters Parameters
type: Exception type exc_type: Exception type
value: Exception's value value: Exception's value
tb: Exception's traceback tb: Exception's traceback
""" """
logger.error(f"An exception has been raised[{type.__name__}: {value}].") logger.error(f"An exception has been raised[{exc_type.__name__}: {value}].")
# Same as original format # Same as original format
traceback.print_tb(tb) traceback.print_tb(tb)
print(f"{type.__name__}: {value}") print(f"{exc_type.__name__}: {value}")
R.end_exp(recorder_status=Recorder.STATUS_FA) R.end_exp(recorder_status=Recorder.STATUS_FA)

View File

@@ -1,9 +1,9 @@
# Crowd Source Data # Crowd Source Data
## Initiative ## Initiative
Public data source like yahoo is flawed, it might miss data for stock which is delisted and it might has data which is wrong. This can introduce survivorship bias into our training process. Public data source like yahoo is flawed, it might miss data for stock which is delisted and it might have data which is wrong. This can introduce survivorship bias into our training process.
The crowd sourced data is introduced to merged data from multiple data source and cross validate against each other, so that: The Crowd Source Data is introduced to merged data from multiple data source and cross validate against each other, so that:
1. We will have a more complete history record. 1. We will have a more complete history record.
2. We can identify the anomaly data and apply correction when necessary. 2. We can identify the anomaly data and apply correction when necessary.
@@ -12,7 +12,7 @@ The raw data is hosted on dolthub repo: https://www.dolthub.com/repositories/che
The processing script and sql is hosted on github repo: https://github.com/chenditc/investment_data The processing script and sql is hosted on github repo: https://github.com/chenditc/investment_data
The pakcaged docker runtime is hosted on dockerhub: https://hub.docker.com/repository/docker/chenditc/investment_data The packaged docker runtime is hosted on dockerhub: https://hub.docker.com/repository/docker/chenditc/investment_data
## How to use it in qlib ## How to use it in qlib
### Option 1: Download release bin data ### Option 1: Download release bin data