From 635632e4ed59facc412af0912f4e0f087942a4d2 Mon Sep 17 00:00:00 2001 From: you-n-g Date: Tue, 25 Jan 2022 11:28:23 +0800 Subject: [PATCH] Update handler processors docs (#879) * Update handler.py * Update handler.py * Update handler.py --- qlib/data/dataset/handler.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/qlib/data/dataset/handler.py b/qlib/data/dataset/handler.py index 0547ef41a..a6877f013 100644 --- a/qlib/data/dataset/handler.py +++ b/qlib/data/dataset/handler.py @@ -333,7 +333,18 @@ class DataHandlerLP(DataHandler): """ DataHandler with **(L)earnable (P)rocessor** - Tips to improving the performance of data handler + This handler will produce three pieces of data in pd.DataFrame format. + - DK_R / self._data: the raw data loaded from the loader + - DK_I / self._infer: the data processed for inference + - DK_L / self._learn: the data processed for learning model. + + The motivation of using different processor workflows for learning and inference + Here are some examples. + - The instrument universe for learning and inference may be different. + - The processing of some samples may rely on label (for example, some samples hit the limit may need extra processing or be dropped). + These processors only apply to the learning phase. + + Tips to improve the performance of data handler - To reduce the memory cost - `drop_raw=True`: this will modify the data inplace on raw data; """