diff --git a/examples/benchmarks/DNN/workflow_config_dnn.yaml b/examples/benchmarks/DNN/workflow_config_dnn.yaml index e853726ca..0f9ae7254 100644 --- a/examples/benchmarks/DNN/workflow_config_dnn.yaml +++ b/examples/benchmarks/DNN/workflow_config_dnn.yaml @@ -30,7 +30,7 @@ task: module_path: qlib.contrib.model.pytorch_nn kwargs: loss: mse - input_dim: 360 + input_dim: 158 output_dim: 1 lr: 0.002 lr_decay: 0.96 diff --git a/qlib/contrib/evaluate.py b/qlib/contrib/evaluate.py index a9b08719a..cf1793c93 100644 --- a/qlib/contrib/evaluate.py +++ b/qlib/contrib/evaluate.py @@ -190,7 +190,8 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **k Parameters ---------- - # backtest workflow related or commmon arguments + - **backtest workflow related or commmon arguments** + pred : pandas.DataFrame predict should has index and one `score` column account : float @@ -202,7 +203,8 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **k verbose : bool whether to print log - # strategy related arguments + - **strategy related arguments** + strategy : Strategy() strategy used in backtest topk : int (Default value: 50) @@ -225,7 +227,8 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **k str_type: 'amount', 'weight' or 'dropout' strategy type: TopkAmountStrategy ,TopkWeightStrategy or TopkDropoutStrategy - # exchange related arguments + - **exchange related arguments** + exchange: Exchange() pass the exchange for speeding up. subscribe_fields: list diff --git a/qlib/contrib/strategy/strategy.py b/qlib/contrib/strategy/strategy.py index 6eac9bafe..0e6a4ae2d 100644 --- a/qlib/contrib/strategy/strategy.py +++ b/qlib/contrib/strategy/strategy.py @@ -26,7 +26,9 @@ class BaseStrategy: def generate_order_list(self, score_series, current, trade_exchange, pred_date, trade_date): """ - Parameters: + DO NOT directly change the state of current + + Parameters ----------- score_series : pd.Seires stock_id , score @@ -39,14 +41,13 @@ class BaseStrategy: predict date trade_date : pd.Timestamp trade date - - DO NOT directly change the state of current """ pass def update(self, score_series, pred_date, trade_date): """User can use this method to update strategy state each trade date. - Parameters: + + Parameters ----------- score_series : pd.Series stock_id , score @@ -98,8 +99,9 @@ class AdjustTimer: """AdjustTimer Responsible for timing of position adjusting - This is designed as multiple inheritance mechanism due to + This is designed as multiple inheritance mechanism due to: - the is_adjust may need access to the internel state of a strategy + - it can be reguard as a enhancement to the existing strategy """ @@ -140,21 +142,24 @@ class WeightStrategyBase(BaseStrategy, AdjustTimer): def generate_target_weight_position(self, score, current, trade_date): """ - Parameters: + Generate target position from score for this date and the current position.The cash is not considered in the position + + Parameters ----------- - score : pred score for this trade date, pd.Series, index is stock_id, contain 'score' column - current : current position, use Position() class + score : pd.Series + pred score for this trade date, index is stock_id, contain 'score' column + current : Position() + current position trade_exchange : Exchange() - trade_date : trade date - generate target position from score for this date and the current position - The cash is not considered in the position + trade_date : pd.Timestamp + trade date """ raise NotImplementedError() def generate_order_list(self, score_series, current, trade_exchange, pred_date, trade_date): """ - Parameters: - ---------- + Parameters + ----------- score_series : pd.Seires stock_id , score current : Position() @@ -188,7 +193,7 @@ class WeightStrategyBase(BaseStrategy, AdjustTimer): class TopkDropoutStrategy(BaseStrategy, ListAdjustTimer): def __init__(self, topk, n_drop, method="bottom", risk_degree=0.95, thresh=1, hold_thresh=1, **kwargs): """ - Parameters: + Parameters ----------- topk : int The number of stocks in the portfolio @@ -229,7 +234,7 @@ class TopkDropoutStrategy(BaseStrategy, ListAdjustTimer): """ Gnererate order list according to score_series at trade_date, will not change current. - Parameters: + Parameters ----------- score_series : pd.Series stock_id , score diff --git a/qlib/data/dataset/__init__.py b/qlib/data/dataset/__init__.py index c46528944..3dbc17c23 100644 --- a/qlib/data/dataset/__init__.py +++ b/qlib/data/dataset/__init__.py @@ -14,9 +14,11 @@ class Dataset(Serializable): def __init__(self, *args, **kwargs): """ - init is designed to finish following steps + init is designed to finish following steps: + - setup data - The data related attributes' names should start with '_' so that it will not be saved on disk when serializing + - initialize the state of the dataset(info to prepare the data) - The name of essential state for preparing data should not start with '_' so that it could be serialized on disk when serializing. @@ -29,11 +31,15 @@ class Dataset(Serializable): """ setup the data - We split the setup_data function for following situation - - 1) User have a Dataset object with learned status on disk - - 2) User load the Dataset object from the disk(Note the init function is skiped) - - 3) User call `setup_data` to load new data - - 4) User prepare data for model based on previous status + We split the setup_data function for following situation: + + - User have a Dataset object with learned status on disk + + - User load the Dataset object from the disk(Note the init function is skiped) + + - User call `setup_data` to load new data + + - User prepare data for model based on previous status """ pass @@ -41,8 +47,9 @@ class Dataset(Serializable): """ The type of dataset depends on the model. (It could be pd.DataFrame, pytorch.DataLoader, etc.) The parameters should specify the scope for the prepared data - The method sould + The method should: - process the data + - return the processed data Returns @@ -55,11 +62,12 @@ class Dataset(Serializable): class DatasetH(Dataset): """ - Dataset with Data(H)anler + Dataset with Data(H)andler User should try to put the data preprocessing functions into handler. - Only following data processing functions should be placed in Dataset + Only following data processing functions should be placed in Dataset: - The processing is related to specific model. + - The processing is related to data split """ @@ -81,21 +89,26 @@ class DatasetH(Dataset): Parameters ---------- handler : Union[dict, DataHandler] - handler could be - 1) insntance of `DataHandler` - 2) config of `DataHandler`. Please refer to `DataHandler` + handler could be: + + - insntance of `DataHandler` + + - config of `DataHandler`. Please refer to `DataHandler` segments : list Describe the options to segment the data. - Here are some examples - 1) 'segments': { - 'train': ("2008-01-01", "2014-12-31"), - 'valid': ("2017-01-01", "2020-08-01",), - 'test': ("2015-01-01", "2016-12-31",), - } - 2) 'segments': { - 'insample': ("2008-01-01", "2014-12-31"), - 'outsample': ("2017-01-01", "2020-08-01",), - } + Here are some examples: + + .. code-block:: + + 1) 'segments': { + 'train': ("2008-01-01", "2014-12-31"), + 'valid': ("2017-01-01", "2020-08-01",), + 'test': ("2015-01-01", "2016-12-31",), + } + 2) 'segments': { + 'insample': ("2008-01-01", "2014-12-31"), + 'outsample': ("2017-01-01", "2020-08-01",), + } """ self._handler = init_instance_by_config(handler, accept_types=DataHandler) self._segments = segments.copy() @@ -114,9 +127,11 @@ class DatasetH(Dataset): ---------- segments : Union[List[str], Tuple[str], str, slice] Describe the scope of the data to be prepared - Here are some examples - 1) 'train' - 2) ['train', 'valid'] + Here are some examples: + + - 'train' + + - ['train', 'valid'] col_set : str The col_set will be passed to self._handler when fetching data data_key: str diff --git a/qlib/data/dataset/handler.py b/qlib/data/dataset/handler.py index e0a4d809a..4d3d88c38 100644 --- a/qlib/data/dataset/handler.py +++ b/qlib/data/dataset/handler.py @@ -41,7 +41,7 @@ class DataHandler(Serializable): Example of the data: The multi-index of the columns is optional. - .. code-block:: + .. code-block:: python feature label $close $volume Ref($close, 1) Mean($close, 3) $high-$low LABEL0 @@ -109,7 +109,8 @@ class DataHandler(Serializable): Parameters ---------- enable_cache : bool - default value is false + default value is false: + - if `enable_cache` == True: the processed data will be saved on disk, and handler will load the cached data from the disk directly @@ -378,8 +379,10 @@ class DataHandlerLP(DataHandler): init_type : str The type `IT_*` listed above enable_cache : bool - default value is false - if `enable_cache` == True: + default value is false: + + - if `enable_cache` == True: + the processed data will be saved on disk, and handler will load the cached data from the disk directly when we call `init` next time """ diff --git a/qlib/data/dataset/loader.py b/qlib/data/dataset/loader.py index e95dc4479..404313e80 100644 --- a/qlib/data/dataset/loader.py +++ b/qlib/data/dataset/loader.py @@ -39,14 +39,16 @@ class DataLoader(abc.ABC): pd.DataFrame: data load from the under layer source - Example of the data: - (The multi-index of the columns is optional.) - feature label - $close $volume Ref($close, 1) Mean($close, 3) $high-$low LABEL0 - datetime instrument - 2010-01-04 SH600000 81.807068 17145150.0 83.737389 83.016739 2.741058 0.0032 - SH600004 13.313329 11800983.0 13.313329 13.317701 0.183632 0.0042 - SH600005 37.796539 12231662.0 38.258602 37.919757 0.970325 0.0289 + Example of the data (The multi-index of the columns is optional.): + + .. code-block:: + + feature label + $close $volume Ref($close, 1) Mean($close, 3) $high-$low LABEL0 + datetime instrument + 2010-01-04 SH600000 81.807068 17145150.0 83.737389 83.016739 2.741058 0.0032 + SH600004 13.313329 11800983.0 13.313329 13.317701 0.183632 0.0042 + SH600005 37.796539 12231662.0 38.258602 37.919757 0.970325 0.0289 """ pass @@ -55,7 +57,7 @@ class DLWParser(DataLoader): """ (D)ata(L)oader (W)ith (P)arser for features and names - Extracting this class so that QlibDataLoader and other dataloaders(such as QdbDataLoader) can share the fields + Extracting this class so that QlibDataLoader and other dataloaders(such as QdbDataLoader) can share the fields. """ def __init__(self, config: Tuple[list, tuple, dict]): @@ -65,14 +67,16 @@ class DLWParser(DataLoader): config : Tuple[list, tuple, dict] Config will be used to describe the fields and column names - := { - "group_name1": - "group_name2": - } - or - := + .. code-block:: YAML - := ["expr", ...] | (["expr", ...], ["col_name", ...]) + := { + "group_name1": + "group_name2": + } + or + := + + := ["expr", ...] | (["expr", ...], ["col_name", ...]) """ self.is_group = isinstance(config, dict)