1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00

Update doc strings and fix

This commit is contained in:
Jactus
2020-11-25 18:03:52 +08:00
parent 5ac90f25b3
commit b31480a06a
6 changed files with 94 additions and 64 deletions

View File

@@ -30,7 +30,7 @@ task:
module_path: qlib.contrib.model.pytorch_nn
kwargs:
loss: mse
input_dim: 360
input_dim: 158
output_dim: 1
lr: 0.002
lr_decay: 0.96

View File

@@ -190,7 +190,8 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **k
Parameters
----------
# backtest workflow related or commmon arguments
- **backtest workflow related or commmon arguments**
pred : pandas.DataFrame
predict should has <datetime, instrument> index and one `score` column
account : float
@@ -202,7 +203,8 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **k
verbose : bool
whether to print log
# strategy related arguments
- **strategy related arguments**
strategy : Strategy()
strategy used in backtest
topk : int (Default value: 50)
@@ -225,7 +227,8 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **k
str_type: 'amount', 'weight' or 'dropout'
strategy type: TopkAmountStrategy ,TopkWeightStrategy or TopkDropoutStrategy
# exchange related arguments
- **exchange related arguments**
exchange: Exchange()
pass the exchange for speeding up.
subscribe_fields: list

View File

@@ -26,7 +26,9 @@ class BaseStrategy:
def generate_order_list(self, score_series, current, trade_exchange, pred_date, trade_date):
"""
Parameters:
DO NOT directly change the state of current
Parameters
-----------
score_series : pd.Seires
stock_id , score
@@ -39,14 +41,13 @@ class BaseStrategy:
predict date
trade_date : pd.Timestamp
trade date
DO NOT directly change the state of current
"""
pass
def update(self, score_series, pred_date, trade_date):
"""User can use this method to update strategy state each trade date.
Parameters:
Parameters
-----------
score_series : pd.Series
stock_id , score
@@ -98,8 +99,9 @@ class AdjustTimer:
"""AdjustTimer
Responsible for timing of position adjusting
This is designed as multiple inheritance mechanism due to
This is designed as multiple inheritance mechanism due to:
- the is_adjust may need access to the internel state of a strategy
- it can be reguard as a enhancement to the existing strategy
"""
@@ -140,21 +142,24 @@ class WeightStrategyBase(BaseStrategy, AdjustTimer):
def generate_target_weight_position(self, score, current, trade_date):
"""
Parameters:
Generate target position from score for this date and the current position.The cash is not considered in the position
Parameters
-----------
score : pred score for this trade date, pd.Series, index is stock_id, contain 'score' column
current : current position, use Position() class
score : pd.Series
pred score for this trade date, index is stock_id, contain 'score' column
current : Position()
current position
trade_exchange : Exchange()
trade_date : trade date
generate target position from score for this date and the current position
The cash is not considered in the position
trade_date : pd.Timestamp
trade date
"""
raise NotImplementedError()
def generate_order_list(self, score_series, current, trade_exchange, pred_date, trade_date):
"""
Parameters:
----------
Parameters
-----------
score_series : pd.Seires
stock_id , score
current : Position()
@@ -188,7 +193,7 @@ class WeightStrategyBase(BaseStrategy, AdjustTimer):
class TopkDropoutStrategy(BaseStrategy, ListAdjustTimer):
def __init__(self, topk, n_drop, method="bottom", risk_degree=0.95, thresh=1, hold_thresh=1, **kwargs):
"""
Parameters:
Parameters
-----------
topk : int
The number of stocks in the portfolio
@@ -229,7 +234,7 @@ class TopkDropoutStrategy(BaseStrategy, ListAdjustTimer):
"""
Gnererate order list according to score_series at trade_date, will not change current.
Parameters:
Parameters
-----------
score_series : pd.Series
stock_id , score

View File

@@ -14,9 +14,11 @@ class Dataset(Serializable):
def __init__(self, *args, **kwargs):
"""
init is designed to finish following steps
init is designed to finish following steps:
- setup data
- The data related attributes' names should start with '_' so that it will not be saved on disk when serializing
- initialize the state of the dataset(info to prepare the data)
- The name of essential state for preparing data should not start with '_' so that it could be serialized on disk when serializing.
@@ -29,11 +31,15 @@ class Dataset(Serializable):
"""
setup the data
We split the setup_data function for following situation
- 1) User have a Dataset object with learned status on disk
- 2) User load the Dataset object from the disk(Note the init function is skiped)
- 3) User call `setup_data` to load new data
- 4) User prepare data for model based on previous status
We split the setup_data function for following situation:
- User have a Dataset object with learned status on disk
- User load the Dataset object from the disk(Note the init function is skiped)
- User call `setup_data` to load new data
- User prepare data for model based on previous status
"""
pass
@@ -41,8 +47,9 @@ class Dataset(Serializable):
"""
The type of dataset depends on the model. (It could be pd.DataFrame, pytorch.DataLoader, etc.)
The parameters should specify the scope for the prepared data
The method sould
The method should:
- process the data
- return the processed data
Returns
@@ -55,11 +62,12 @@ class Dataset(Serializable):
class DatasetH(Dataset):
"""
Dataset with Data(H)anler
Dataset with Data(H)andler
User should try to put the data preprocessing functions into handler.
Only following data processing functions should be placed in Dataset
Only following data processing functions should be placed in Dataset:
- The processing is related to specific model.
- The processing is related to data split
"""
@@ -81,21 +89,26 @@ class DatasetH(Dataset):
Parameters
----------
handler : Union[dict, DataHandler]
handler could be
1) insntance of `DataHandler`
2) config of `DataHandler`. Please refer to `DataHandler`
handler could be:
- insntance of `DataHandler`
- config of `DataHandler`. Please refer to `DataHandler`
segments : list
Describe the options to segment the data.
Here are some examples
1) 'segments': {
'train': ("2008-01-01", "2014-12-31"),
'valid': ("2017-01-01", "2020-08-01",),
'test': ("2015-01-01", "2016-12-31",),
}
2) 'segments': {
'insample': ("2008-01-01", "2014-12-31"),
'outsample': ("2017-01-01", "2020-08-01",),
}
Here are some examples:
.. code-block::
1) 'segments': {
'train': ("2008-01-01", "2014-12-31"),
'valid': ("2017-01-01", "2020-08-01",),
'test': ("2015-01-01", "2016-12-31",),
}
2) 'segments': {
'insample': ("2008-01-01", "2014-12-31"),
'outsample': ("2017-01-01", "2020-08-01",),
}
"""
self._handler = init_instance_by_config(handler, accept_types=DataHandler)
self._segments = segments.copy()
@@ -114,9 +127,11 @@ class DatasetH(Dataset):
----------
segments : Union[List[str], Tuple[str], str, slice]
Describe the scope of the data to be prepared
Here are some examples
1) 'train'
2) ['train', 'valid']
Here are some examples:
- 'train'
- ['train', 'valid']
col_set : str
The col_set will be passed to self._handler when fetching data
data_key: str

View File

@@ -41,7 +41,7 @@ class DataHandler(Serializable):
Example of the data:
The multi-index of the columns is optional.
.. code-block::
.. code-block:: python
feature label
$close $volume Ref($close, 1) Mean($close, 3) $high-$low LABEL0
@@ -109,7 +109,8 @@ class DataHandler(Serializable):
Parameters
----------
enable_cache : bool
default value is false
default value is false:
- if `enable_cache` == True:
the processed data will be saved on disk, and handler will load the cached data from the disk directly
@@ -378,8 +379,10 @@ class DataHandlerLP(DataHandler):
init_type : str
The type `IT_*` listed above
enable_cache : bool
default value is false
if `enable_cache` == True:
default value is false:
- if `enable_cache` == True:
the processed data will be saved on disk, and handler will load the cached data from the disk directly
when we call `init` next time
"""

View File

@@ -39,14 +39,16 @@ class DataLoader(abc.ABC):
pd.DataFrame:
data load from the under layer source
Example of the data:
(The multi-index of the columns is optional.)
feature label
$close $volume Ref($close, 1) Mean($close, 3) $high-$low LABEL0
datetime instrument
2010-01-04 SH600000 81.807068 17145150.0 83.737389 83.016739 2.741058 0.0032
SH600004 13.313329 11800983.0 13.313329 13.317701 0.183632 0.0042
SH600005 37.796539 12231662.0 38.258602 37.919757 0.970325 0.0289
Example of the data (The multi-index of the columns is optional.):
.. code-block::
feature label
$close $volume Ref($close, 1) Mean($close, 3) $high-$low LABEL0
datetime instrument
2010-01-04 SH600000 81.807068 17145150.0 83.737389 83.016739 2.741058 0.0032
SH600004 13.313329 11800983.0 13.313329 13.317701 0.183632 0.0042
SH600005 37.796539 12231662.0 38.258602 37.919757 0.970325 0.0289
"""
pass
@@ -55,7 +57,7 @@ class DLWParser(DataLoader):
"""
(D)ata(L)oader (W)ith (P)arser for features and names
Extracting this class so that QlibDataLoader and other dataloaders(such as QdbDataLoader) can share the fields
Extracting this class so that QlibDataLoader and other dataloaders(such as QdbDataLoader) can share the fields.
"""
def __init__(self, config: Tuple[list, tuple, dict]):
@@ -65,14 +67,16 @@ class DLWParser(DataLoader):
config : Tuple[list, tuple, dict]
Config will be used to describe the fields and column names
<config> := {
"group_name1": <fields_info1>
"group_name2": <fields_info2>
}
or
<config> := <fields_info>
.. code-block:: YAML
<fields_info> := ["expr", ...] | (["expr", ...], ["col_name", ...])
<config> := {
"group_name1": <fields_info1>
"group_name2": <fields_info2>
}
or
<config> := <fields_info>
<fields_info> := ["expr", ...] | (["expr", ...], ["col_name", ...])
"""
self.is_group = isinstance(config, dict)