1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-07-05 03:50:57 +08:00

Update docs and fix tabnet

This commit is contained in:
Jactus
2020-11-26 00:55:26 +08:00
parent 5be847909f
commit 87cee85cea
27 changed files with 624 additions and 495 deletions

View File

@@ -17,7 +17,7 @@ class Dataset(Serializable):
init is designed to finish following steps:
- setup data
- The data related attributes' names should start with '_' so that it will not be saved on disk when serializing
- The data related attributes' names should start with '_' so that it will not be saved on disk when serializing.
- initialize the state of the dataset(info to prepare the data)
- The name of essential state for preparing data should not start with '_' so that it could be serialized on disk when serializing.
@@ -29,17 +29,17 @@ class Dataset(Serializable):
def setup_data(self, *args, **kwargs):
"""
setup the data
Setup the data.
We split the setup_data function for following situation:
- User have a Dataset object with learned status on disk
- User have a Dataset object with learned status on disk.
- User load the Dataset object from the disk(Note the init function is skiped)
- User load the Dataset object from the disk(Note the init function is skiped).
- User call `setup_data` to load new data
- User call `setup_data` to load new data.
- User prepare data for model based on previous status
- User prepare data for model based on previous status.
"""
pass
@@ -66,9 +66,10 @@ class DatasetH(Dataset):
User should try to put the data preprocessing functions into handler.
Only following data processing functions should be placed in Dataset:
- The processing is related to specific model.
- The processing is related to data split
- The processing is related to data split.
"""
def __init__(self, handler: Union[dict, DataHandler], segments: list):
@@ -76,15 +77,15 @@ class DatasetH(Dataset):
Parameters
----------
handler : Union[dict, DataHandler]
handler will be passed into setup_data
handler will be passed into setup_data.
segments : list
handler will be passed into setup_data
handler will be passed into setup_data.
"""
super().__init__(handler, segments)
def setup_data(self, handler: Union[dict, DataHandler], segments: list):
"""
setup the underlying data
Setup the underlying data.
Parameters
----------
@@ -121,7 +122,7 @@ class DatasetH(Dataset):
**kwargs,
) -> Union[List[pd.DataFrame], pd.DataFrame]:
"""
prepare the data for learning and inference
Prepare the data for learning and inference.
Parameters
----------
@@ -132,11 +133,12 @@ class DatasetH(Dataset):
- 'train'
- ['train', 'valid']
col_set : str
The col_set will be passed to self._handler when fetching data
data_key: str
The col_set will be passed to self._handler when fetching data.
data_key : str
The data to fetch: DK_*
Default is DK_I, which indicate fetching data for **inference**
Default is DK_I, which indicate fetching data for **inference**.
Returns
-------