mirror of
https://github.com/microsoft/qlib.git
synced 2026-06-06 05:51:17 +08:00
Update doc strings and fix
This commit is contained in:
@@ -30,7 +30,7 @@ task:
|
||||
module_path: qlib.contrib.model.pytorch_nn
|
||||
kwargs:
|
||||
loss: mse
|
||||
input_dim: 360
|
||||
input_dim: 158
|
||||
output_dim: 1
|
||||
lr: 0.002
|
||||
lr_decay: 0.96
|
||||
|
||||
@@ -190,7 +190,8 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **k
|
||||
Parameters
|
||||
----------
|
||||
|
||||
# backtest workflow related or commmon arguments
|
||||
- **backtest workflow related or commmon arguments**
|
||||
|
||||
pred : pandas.DataFrame
|
||||
predict should has <datetime, instrument> index and one `score` column
|
||||
account : float
|
||||
@@ -202,7 +203,8 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **k
|
||||
verbose : bool
|
||||
whether to print log
|
||||
|
||||
# strategy related arguments
|
||||
- **strategy related arguments**
|
||||
|
||||
strategy : Strategy()
|
||||
strategy used in backtest
|
||||
topk : int (Default value: 50)
|
||||
@@ -225,7 +227,8 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **k
|
||||
str_type: 'amount', 'weight' or 'dropout'
|
||||
strategy type: TopkAmountStrategy ,TopkWeightStrategy or TopkDropoutStrategy
|
||||
|
||||
# exchange related arguments
|
||||
- **exchange related arguments**
|
||||
|
||||
exchange: Exchange()
|
||||
pass the exchange for speeding up.
|
||||
subscribe_fields: list
|
||||
|
||||
@@ -26,7 +26,9 @@ class BaseStrategy:
|
||||
|
||||
def generate_order_list(self, score_series, current, trade_exchange, pred_date, trade_date):
|
||||
"""
|
||||
Parameters:
|
||||
DO NOT directly change the state of current
|
||||
|
||||
Parameters
|
||||
-----------
|
||||
score_series : pd.Seires
|
||||
stock_id , score
|
||||
@@ -39,14 +41,13 @@ class BaseStrategy:
|
||||
predict date
|
||||
trade_date : pd.Timestamp
|
||||
trade date
|
||||
|
||||
DO NOT directly change the state of current
|
||||
"""
|
||||
pass
|
||||
|
||||
def update(self, score_series, pred_date, trade_date):
|
||||
"""User can use this method to update strategy state each trade date.
|
||||
Parameters:
|
||||
|
||||
Parameters
|
||||
-----------
|
||||
score_series : pd.Series
|
||||
stock_id , score
|
||||
@@ -98,8 +99,9 @@ class AdjustTimer:
|
||||
"""AdjustTimer
|
||||
Responsible for timing of position adjusting
|
||||
|
||||
This is designed as multiple inheritance mechanism due to
|
||||
This is designed as multiple inheritance mechanism due to:
|
||||
- the is_adjust may need access to the internel state of a strategy
|
||||
|
||||
- it can be reguard as a enhancement to the existing strategy
|
||||
"""
|
||||
|
||||
@@ -140,21 +142,24 @@ class WeightStrategyBase(BaseStrategy, AdjustTimer):
|
||||
|
||||
def generate_target_weight_position(self, score, current, trade_date):
|
||||
"""
|
||||
Parameters:
|
||||
Generate target position from score for this date and the current position.The cash is not considered in the position
|
||||
|
||||
Parameters
|
||||
-----------
|
||||
score : pred score for this trade date, pd.Series, index is stock_id, contain 'score' column
|
||||
current : current position, use Position() class
|
||||
score : pd.Series
|
||||
pred score for this trade date, index is stock_id, contain 'score' column
|
||||
current : Position()
|
||||
current position
|
||||
trade_exchange : Exchange()
|
||||
trade_date : trade date
|
||||
generate target position from score for this date and the current position
|
||||
The cash is not considered in the position
|
||||
trade_date : pd.Timestamp
|
||||
trade date
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def generate_order_list(self, score_series, current, trade_exchange, pred_date, trade_date):
|
||||
"""
|
||||
Parameters:
|
||||
----------
|
||||
Parameters
|
||||
-----------
|
||||
score_series : pd.Seires
|
||||
stock_id , score
|
||||
current : Position()
|
||||
@@ -188,7 +193,7 @@ class WeightStrategyBase(BaseStrategy, AdjustTimer):
|
||||
class TopkDropoutStrategy(BaseStrategy, ListAdjustTimer):
|
||||
def __init__(self, topk, n_drop, method="bottom", risk_degree=0.95, thresh=1, hold_thresh=1, **kwargs):
|
||||
"""
|
||||
Parameters:
|
||||
Parameters
|
||||
-----------
|
||||
topk : int
|
||||
The number of stocks in the portfolio
|
||||
@@ -229,7 +234,7 @@ class TopkDropoutStrategy(BaseStrategy, ListAdjustTimer):
|
||||
"""
|
||||
Gnererate order list according to score_series at trade_date, will not change current.
|
||||
|
||||
Parameters:
|
||||
Parameters
|
||||
-----------
|
||||
score_series : pd.Series
|
||||
stock_id , score
|
||||
|
||||
@@ -14,9 +14,11 @@ class Dataset(Serializable):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
"""
|
||||
init is designed to finish following steps
|
||||
init is designed to finish following steps:
|
||||
|
||||
- setup data
|
||||
- The data related attributes' names should start with '_' so that it will not be saved on disk when serializing
|
||||
|
||||
- initialize the state of the dataset(info to prepare the data)
|
||||
- The name of essential state for preparing data should not start with '_' so that it could be serialized on disk when serializing.
|
||||
|
||||
@@ -29,11 +31,15 @@ class Dataset(Serializable):
|
||||
"""
|
||||
setup the data
|
||||
|
||||
We split the setup_data function for following situation
|
||||
- 1) User have a Dataset object with learned status on disk
|
||||
- 2) User load the Dataset object from the disk(Note the init function is skiped)
|
||||
- 3) User call `setup_data` to load new data
|
||||
- 4) User prepare data for model based on previous status
|
||||
We split the setup_data function for following situation:
|
||||
|
||||
- User have a Dataset object with learned status on disk
|
||||
|
||||
- User load the Dataset object from the disk(Note the init function is skiped)
|
||||
|
||||
- User call `setup_data` to load new data
|
||||
|
||||
- User prepare data for model based on previous status
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -41,8 +47,9 @@ class Dataset(Serializable):
|
||||
"""
|
||||
The type of dataset depends on the model. (It could be pd.DataFrame, pytorch.DataLoader, etc.)
|
||||
The parameters should specify the scope for the prepared data
|
||||
The method sould
|
||||
The method should:
|
||||
- process the data
|
||||
|
||||
- return the processed data
|
||||
|
||||
Returns
|
||||
@@ -55,11 +62,12 @@ class Dataset(Serializable):
|
||||
|
||||
class DatasetH(Dataset):
|
||||
"""
|
||||
Dataset with Data(H)anler
|
||||
Dataset with Data(H)andler
|
||||
|
||||
User should try to put the data preprocessing functions into handler.
|
||||
Only following data processing functions should be placed in Dataset
|
||||
Only following data processing functions should be placed in Dataset:
|
||||
- The processing is related to specific model.
|
||||
|
||||
- The processing is related to data split
|
||||
"""
|
||||
|
||||
@@ -81,21 +89,26 @@ class DatasetH(Dataset):
|
||||
Parameters
|
||||
----------
|
||||
handler : Union[dict, DataHandler]
|
||||
handler could be
|
||||
1) insntance of `DataHandler`
|
||||
2) config of `DataHandler`. Please refer to `DataHandler`
|
||||
handler could be:
|
||||
|
||||
- insntance of `DataHandler`
|
||||
|
||||
- config of `DataHandler`. Please refer to `DataHandler`
|
||||
segments : list
|
||||
Describe the options to segment the data.
|
||||
Here are some examples
|
||||
1) 'segments': {
|
||||
'train': ("2008-01-01", "2014-12-31"),
|
||||
'valid': ("2017-01-01", "2020-08-01",),
|
||||
'test': ("2015-01-01", "2016-12-31",),
|
||||
}
|
||||
2) 'segments': {
|
||||
'insample': ("2008-01-01", "2014-12-31"),
|
||||
'outsample': ("2017-01-01", "2020-08-01",),
|
||||
}
|
||||
Here are some examples:
|
||||
|
||||
.. code-block::
|
||||
|
||||
1) 'segments': {
|
||||
'train': ("2008-01-01", "2014-12-31"),
|
||||
'valid': ("2017-01-01", "2020-08-01",),
|
||||
'test': ("2015-01-01", "2016-12-31",),
|
||||
}
|
||||
2) 'segments': {
|
||||
'insample': ("2008-01-01", "2014-12-31"),
|
||||
'outsample': ("2017-01-01", "2020-08-01",),
|
||||
}
|
||||
"""
|
||||
self._handler = init_instance_by_config(handler, accept_types=DataHandler)
|
||||
self._segments = segments.copy()
|
||||
@@ -114,9 +127,11 @@ class DatasetH(Dataset):
|
||||
----------
|
||||
segments : Union[List[str], Tuple[str], str, slice]
|
||||
Describe the scope of the data to be prepared
|
||||
Here are some examples
|
||||
1) 'train'
|
||||
2) ['train', 'valid']
|
||||
Here are some examples:
|
||||
|
||||
- 'train'
|
||||
|
||||
- ['train', 'valid']
|
||||
col_set : str
|
||||
The col_set will be passed to self._handler when fetching data
|
||||
data_key: str
|
||||
|
||||
@@ -41,7 +41,7 @@ class DataHandler(Serializable):
|
||||
Example of the data:
|
||||
The multi-index of the columns is optional.
|
||||
|
||||
.. code-block::
|
||||
.. code-block:: python
|
||||
|
||||
feature label
|
||||
$close $volume Ref($close, 1) Mean($close, 3) $high-$low LABEL0
|
||||
@@ -109,7 +109,8 @@ class DataHandler(Serializable):
|
||||
Parameters
|
||||
----------
|
||||
enable_cache : bool
|
||||
default value is false
|
||||
default value is false:
|
||||
|
||||
- if `enable_cache` == True:
|
||||
|
||||
the processed data will be saved on disk, and handler will load the cached data from the disk directly
|
||||
@@ -378,8 +379,10 @@ class DataHandlerLP(DataHandler):
|
||||
init_type : str
|
||||
The type `IT_*` listed above
|
||||
enable_cache : bool
|
||||
default value is false
|
||||
if `enable_cache` == True:
|
||||
default value is false:
|
||||
|
||||
- if `enable_cache` == True:
|
||||
|
||||
the processed data will be saved on disk, and handler will load the cached data from the disk directly
|
||||
when we call `init` next time
|
||||
"""
|
||||
|
||||
@@ -39,14 +39,16 @@ class DataLoader(abc.ABC):
|
||||
pd.DataFrame:
|
||||
data load from the under layer source
|
||||
|
||||
Example of the data:
|
||||
(The multi-index of the columns is optional.)
|
||||
feature label
|
||||
$close $volume Ref($close, 1) Mean($close, 3) $high-$low LABEL0
|
||||
datetime instrument
|
||||
2010-01-04 SH600000 81.807068 17145150.0 83.737389 83.016739 2.741058 0.0032
|
||||
SH600004 13.313329 11800983.0 13.313329 13.317701 0.183632 0.0042
|
||||
SH600005 37.796539 12231662.0 38.258602 37.919757 0.970325 0.0289
|
||||
Example of the data (The multi-index of the columns is optional.):
|
||||
|
||||
.. code-block::
|
||||
|
||||
feature label
|
||||
$close $volume Ref($close, 1) Mean($close, 3) $high-$low LABEL0
|
||||
datetime instrument
|
||||
2010-01-04 SH600000 81.807068 17145150.0 83.737389 83.016739 2.741058 0.0032
|
||||
SH600004 13.313329 11800983.0 13.313329 13.317701 0.183632 0.0042
|
||||
SH600005 37.796539 12231662.0 38.258602 37.919757 0.970325 0.0289
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -55,7 +57,7 @@ class DLWParser(DataLoader):
|
||||
"""
|
||||
(D)ata(L)oader (W)ith (P)arser for features and names
|
||||
|
||||
Extracting this class so that QlibDataLoader and other dataloaders(such as QdbDataLoader) can share the fields
|
||||
Extracting this class so that QlibDataLoader and other dataloaders(such as QdbDataLoader) can share the fields.
|
||||
"""
|
||||
|
||||
def __init__(self, config: Tuple[list, tuple, dict]):
|
||||
@@ -65,14 +67,16 @@ class DLWParser(DataLoader):
|
||||
config : Tuple[list, tuple, dict]
|
||||
Config will be used to describe the fields and column names
|
||||
|
||||
<config> := {
|
||||
"group_name1": <fields_info1>
|
||||
"group_name2": <fields_info2>
|
||||
}
|
||||
or
|
||||
<config> := <fields_info>
|
||||
.. code-block:: YAML
|
||||
|
||||
<fields_info> := ["expr", ...] | (["expr", ...], ["col_name", ...])
|
||||
<config> := {
|
||||
"group_name1": <fields_info1>
|
||||
"group_name2": <fields_info2>
|
||||
}
|
||||
or
|
||||
<config> := <fields_info>
|
||||
|
||||
<fields_info> := ["expr", ...] | (["expr", ...], ["col_name", ...])
|
||||
"""
|
||||
self.is_group = isinstance(config, dict)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user