Add analyser example and finetune example

add IC and rank IC
monitor initial version
2026-07-27 22:12:47 +08:00 · 2021-06-06 07:51:52 +00:00 · 2021-06-06 07:43:26 +00:00 · 2021-06-06 07:43:26 +00:00 · 2021-05-31 08:52:41 +08:00 · 2021-05-30 09:44:47 +08:00
219 changed files with 16673 additions and 2861 deletions
--- a/.deepsource.toml
+++ b/.deepsource.toml
@@ -0,0 +1,12 @@
+version = 1
+
+test_patterns = ["tests/test_*.py"]
+
+exclude_patterns = ["examples/**"]
+
+[[analyzers]]
+name = "python"
+enabled = true
+
+  [analyzers.meta]
+  runtime_version = "3.x.x"
--- a/.github/ISSUE_TEMPLATE/bug-report.md
+++ b/.github/ISSUE_TEMPLATE/bug-report.md
@@ -28,7 +28,8 @@ Steps to reproduce the behavior:

 ## Environment

-**Note**: One could run `python scripts/collect_info.py` under the `qlib` directory to get the following information.
+**Note**: User could run `cd scripts && python collect_info.py all` under project directory to get system information
+and paste them here directly.

 - Qlib version:
 - Python version:
@@ -37,4 +38,4 @@ Steps to reproduce the behavior:

 ## Additional Notes

-<!-- Add any other information about the problem here. -->
+<!-- Add any other information about the problem here. -->
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@@ -0,0 +1,24 @@
+name: Mark stale issues and pull requests
+
+on:
+  schedule:
+  - cron: "0 0/3 * * *"
+
+jobs:
+  stale:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/stale@v3
+      with:
+        repo-token: ${{ secrets.GITHUB_TOKEN }}
+        stale-issue-message: 'This issue is stale because it has been open for three months with no activity. Remove the stale label or comment on the issue otherwise this will be closed in 5 days'
+        stale-pr-message: 'This PR is stale because it has been open for a year with no activity. Remove the stale label or comment on the PR otherwise this will be closed in 5 days'
+        stale-issue-label: 'stale'
+        stale-pr-label: 'stale'
+        days-before-stale: 90
+        days-before-close: 5
+        operations-per-run: 100
+        exempt-issue-labels: 'bug,enhancement'
+        remove-stale-when-updated: true
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -12,8 +12,8 @@ jobs:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
-        os: [windows-latest, ubuntu-16.04, ubuntu-18.04, macos-latest]
-        python-version: [3.6, 3.7, 3.8]
+        os: [windows-latest, ubuntu-16.04, ubuntu-18.04, ubuntu-20.04, macos-latest]
+        python-version: [3.6, 3.7, 3.8, 3.9]

    steps:
    - uses: actions/checkout@v2
@@ -23,37 +23,98 @@ jobs:
      with:
        python-version: ${{ matrix.python-version }}

-    - name: Install dependencies
+    - name: Lint with Black
      run: |
-        pip install --upgrade cython
-        pip install numpy jupyter jupyter_contrib_nbextensions
-        python setup.py install
+        cd ..
+        if [ "$RUNNER_OS" == "Windows" ]; then
+          $CONDA\\python.exe -m pip install black
+          $CONDA\\python.exe -m black qlib -l 120 --check --diff
+        else
+          sudo $CONDA/bin/python -m pip install black
+          $CONDA/bin/python -m black qlib -l 120 --check --diff
+        fi
+      shell: bash

+    # Test Qlib installed with pip
+    - name: Install Qlib with pip
+      run: |
+        if [ "$RUNNER_OS" == "Windows" ]; then
+          $CONDA\\python.exe -m pip install numpy==1.19.5
+          $CONDA\\python.exe -m pip install pyqlib --ignore-installed ruamel.yaml numpy --user
+        else
+          sudo $CONDA/bin/python -m pip install numpy==1.19.5
+          sudo $CONDA/bin/python -m pip install pyqlib --ignore-installed ruamel.yaml numpy
+        fi
+      shell: bash
+    
    - name: Install Lightgbm for MacOS
      if: runner.os == 'macOS'
      run: |
        /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Microsoft/qlib/main/.github/brew_install.sh)"
        HOMEBREW_NO_AUTO_UPDATE=1 brew install lightgbm

+    - name: Test data downloads
+      run: |
+        if [ "$RUNNER_OS" == "Windows" ]; then
+          $CONDA\\python.exe scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn
+        else
+          $CONDA/bin/python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn
+        fi
+      shell: bash
+
+    - name: Test workflow by config (install from pip)
+      run: |
+        if [ "$RUNNER_OS" == "Windows" ]; then
+          $CONDA\\python.exe qlib\\workflow\\cli.py examples\\benchmarks\\LightGBM\\workflow_config_lightgbm_Alpha158.yaml 
+          $CONDA\\python.exe -m pip uninstall -y pyqlib
+        else
+          $CONDA/bin/python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml 
+          sudo $CONDA/bin/python -m pip uninstall -y pyqlib
+        fi
+      shell: bash
+      
+    # Test Qlib installed from source
+    - name: Install Qlib from source
+      run: |
+        if [ "$RUNNER_OS" == "Windows" ]; then
+          $CONDA\\python.exe -m pip install --upgrade cython
+          $CONDA\\python.exe -m pip install numpy jupyter jupyter_contrib_nbextensions
+          $CONDA\\python.exe -m pip install -U scipy scikit-learn # installing without this line will cause errors on GitHub Actions, while instsalling locally won't
+          $CONDA\\python.exe setup.py install
+        else
+          sudo $CONDA/bin/python -m pip install --upgrade cython
+          sudo $CONDA/bin/python -m pip install numpy jupyter jupyter_contrib_nbextensions
+          sudo $CONDA/bin/python -m pip install -U scipy scikit-learn # installing without this line will cause errors on GitHub Actions, while instsalling locally won't
+          sudo $CONDA/bin/python setup.py install
+        fi
+      shell: bash
+
    - name: Install test dependencies
      run: |
-        python -m pip install --upgrade pip
-        pip install black pytest
-
-    - name: Lint with Black
-      run: |
-        cd ..
-        python -m black qlib -l 120 --check --diff
+        if [ "$RUNNER_OS" == "Windows" ]; then
+          $CONDA\\python.exe -m pip install --upgrade pip
+          $CONDA\\python.exe -m pip install black pytest
+        else
+          sudo $CONDA/bin/python -m pip install --upgrade pip
+          sudo $CONDA/bin/python -m pip install black pytest
+        fi
+      shell: bash 

    - name: Unit tests with Pytest
      run: |
        cd tests
-        pytest . --durations=0
+        if [ "$RUNNER_OS" == "Windows" ]; then
+          $CONDA\\python.exe -m pytest . --durations=0
+        else
+          $CONDA/bin/python -m pytest . --durations=0
+        fi
+      shell: bash

-    - name: Test data downloads
+    - name: Test workflow by config (install from source)
      run: |
-        python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn
-
-    - name: Test workflow by config
-      run: |
-        qrun examples/benchmarks/LightGBM/workflow_config_lightgbm.yaml 
+        if [ "$RUNNER_OS" == "Windows" ]; then
+          $CONDA\\python.exe qlib\\workflow\\cli.py examples\\benchmarks\\LightGBM\\workflow_config_lightgbm_Alpha158.yaml 
+        else
+          $CONDA/bin/python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml 
+        fi
+      shell: bash
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
 __pycache__/

 *.pyc
+*.pyd
 *.so
 *.ipynb
 .ipynb_checkpoints
@@ -33,3 +34,7 @@ tags

 .pytest_cache/
 .vscode/
+
+*.swp
+
+./pretrain
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -0,0 +1,21 @@
+# .readthedocs.yml
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+# Required
+version: 2
+
+# Build documentation in the docs/ directory with Sphinx
+sphinx:
+  configuration: docs/conf.py
+
+# Build all formats
+formats: all
+
+# Optionally set the version of Python and requirements required to build your docs
+python:
+  version: 3.7
+  install:
+    - requirements: docs/requirements.txt
+    - method: setuptools
+      path: .
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -114,7 +114,7 @@ Version 0.4.1
 Version 0.4.2
 --------------------
 - Refactor DataHandler
- Add ``ALPHA360`` DataHandler
+- Add ``Alpha360`` DataHandler


 Version 0.4.3
--- a/README.md
+++ b/README.md
@@ -7,6 +7,20 @@
 [![License](https://img.shields.io/pypi/l/pyqlib)](LICENSE)
 [![Join the chat at https://gitter.im/Microsoft/qlib](https://badges.gitter.im/Microsoft/qlib.svg)](https://gitter.im/Microsoft/qlib?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)

+## :newspaper: **What's NEW!** &nbsp;   :sparkling_heart: 
+Recent released features
+| Feature | Status |
+| --                      | ------    |
+| Online serving and automatic model rolling | :star: [Released](https://github.com/microsoft/qlib/pull/290) on May 17, 2021 | 
+| DoubleEnsemble Model | [Released](https://github.com/microsoft/qlib/pull/286) on Mar 2, 2021 | 
+| High-frequency data processing example | [Released](https://github.com/microsoft/qlib/pull/257) on Feb 5, 2021  |
+| High-frequency trading example | [Part of code released](https://github.com/microsoft/qlib/pull/227) on Jan 28, 2021  | 
+| High-frequency data(1min) | [Released](https://github.com/microsoft/qlib/pull/221) on Jan 27, 2021 |
+| Tabnet Model | [Released](https://github.com/microsoft/qlib/pull/205) on Jan 22, 2021 | 
+
+Features released before 2021 are not listed here.
+
+

 <p align="center">
  <img src="http://fintech.msra.cn/images_v060/logo/1.png" />
@@ -17,10 +31,11 @@ Qlib is an AI-oriented quantitative investment platform, which aims to realize t

 It contains the full ML pipeline of data processing, model training, back-testing; and covers the entire chain of quantitative investment: alpha seeking, risk modeling, portfolio optimization, and order execution. 

-With Qlib, user can easily try ideas to create better Quant investment strategies.
+With Qlib, users can easily try ideas to create better Quant investment strategies.

 For more details, please refer to our paper ["Qlib: An AI-oriented Quantitative Investment Platform"](https://arxiv.org/abs/2009.11189).

+- [**Plans**](#plans)
 - [Framework of Qlib](#framework-of-qlib)
 - [Quick Start](#quick-start)
  - [Installation](#installation)
@@ -34,9 +49,21 @@ For more details, please refer to our paper ["Qlib: An AI-oriented Quantitative
 - [More About Qlib](#more-about-qlib)
 - [Offline Mode and Online Mode](#offline-mode-and-online-mode)
  - [Performance of Qlib Data Server](#performance-of-qlib-data-server)
+- [Related Reports](#related-reports)
+- [Contact Us](#contact-us)
 - [Contributing](#contributing)


+# Plans
+New features under development(order by estimated release time).
+Your feedbacks about the features are very important.
+| Feature                        | Status      |
+| --                      | ------    |
+| Planning-based portfolio optimization | Under review:  https://github.com/microsoft/qlib/pull/280 | 
+| Fund data supporting and analysis  |  Under review: https://github.com/microsoft/qlib/pull/292 |
+| Point-in-Time database | Under review: https://github.com/microsoft/qlib/pull/343 |
+| High-frequency trading | Under review: https://github.com/microsoft/qlib/pull/408 | 
+| Meta-Learning-based data selection | Initial opensource version under development |

 # Framework of Qlib

@@ -45,11 +72,11 @@ For more details, please refer to our paper ["Qlib: An AI-oriented Quantitative
 </div>


-At the module level, Qlib is a platform that consists of the above components. The components are designed as loose-coupled modules and each component could be used stand-alone.
+At the module level, Qlib is a platform that consists of the above components. The components are designed as loose-coupled modules, and each component could be used stand-alone.

 | Name                   | Description                                                                                                                                                                                                                                                                                                                                                             |
 | ------                 | -----                                                                                                                                                                                                                                                                                                                                                                   |
-| `Infrastructure` layer | `Infrastructure` layer provides underlying support for Quant research. `DataServer` provides high-performance infrastructure for users to manage and retrieve raw data. `Trainer` provides flexible interface to control the training process of models which enable algorithms controlling the training process.                                                       |
+| `Infrastructure` layer | `Infrastructure` layer provides underlying support for Quant research. `DataServer` provides a high-performance infrastructure for users to manage and retrieve raw data. `Trainer` provides a flexible interface to control the training process of models, which enable algorithms to control the training process.                                                       |
 | `Workflow` layer       | `Workflow` layer covers the whole workflow of quantitative investment. `Information Extractor` extracts data for models. `Forecast Model` focuses on producing all kinds of forecast signals (e.g. _alpha_, risk) for other modules. With these signals `Portfolio Generator` will generate the target portfolio and produce orders to be executed by `Order Executor`. |
 | `Interface` layer      | `Interface` layer tries to present a user-friendly interface for the underlying system. `Analyser` module will provide users detailed analysis reports of forecasting signals, portfolios and execution results                                                                                                                                                                 |

@@ -61,17 +88,36 @@ At the module level, Qlib is a platform that consists of the above components. T

 This quick start guide tries to demonstrate
 1. It's very easy to build a complete Quant research workflow and try your ideas with _Qlib_.
-1. Though with *public data* and *simple models*, machine learning technologies **work very well** in practical Quant investment.
+2. Though with *public data* and *simple models*, machine learning technologies **work very well** in practical Quant investment.
+
+Here is a quick **[demo](https://terminalizer.com/view/3f24561a4470)** shows how to install ``Qlib``, and run LightGBM with ``qrun``. **But**, please make sure you have already prepared the data following the [instruction](#data-preparation).
+

 ## Installation

-Users can easily install ``Qlib`` by pip according to the following command
+This table demonstrates the supported Python version of `Qlib`:
+|               | install with pip           | install from source  | plot |
+| ------------- |:---------------------:|:--------------------:|:----:|
+| Python 3.6    | :heavy_check_mark:    | :heavy_check_mark: (only with `Anaconda`)                  | :heavy_check_mark: |
+| Python 3.7    | :heavy_check_mark:    | :heavy_check_mark:   | :heavy_check_mark: |
+| Python 3.8    | :heavy_check_mark:    | :heavy_check_mark:   | :heavy_check_mark: |
+| Python 3.9    | :x:                   | :heavy_check_mark:   | :x: |
+
+**Note**: 
+1. Please pay attention that installing cython in Python 3.6 will raise some error when installing ``Qlib`` from source. If users use Python 3.6 on their machines, it is recommended to *upgrade* Python to version 3.7 or use `conda`'s Python to install ``Qlib`` from source.
+2. For Python 3.9, `Qlib` supports running workflows such as training models, doing backtest and plot most of the related figures (those included in [notebook](examples/workflow_by_code.ipynb)). However, plotting for the *model performance* is not supported for now and we will fix this when the dependent packages are upgraded in the future.
+
+### Install with pip
+Users can easily install ``Qlib`` by pip according to the following command.

 ```bash
  pip install pyqlib
 ```

-Also, users can install ``Qlib`` by the source code according to the following steps:
+**Note**: pip will install the latest stable qlib. However, the main branch of qlib is in active development. If you want to test the latest scripts or functions in the main branch. Please install qlib with the methods below.
+
+### Install from source
+Also, users can install the latest dev version ``Qlib`` by the source code according to the following steps:

 * Before installing ``Qlib`` from source, users need to install some dependencies:

@@ -80,25 +126,38 @@ Also, users can install ``Qlib`` by the source code according to the following s
  pip install --upgrade  cython
  ```

-* Clone the repository and install ``Qlib``:
-
-  ```bash
-  git clone https://github.com/microsoft/qlib.git && cd qlib
-  python setup.py install
-  ```
+* Clone the repository and install ``Qlib`` as follows.
+  * If you haven't installed qlib by the command ``pip install pyqlib`` before:
+    ```bash
+    git clone https://github.com/microsoft/qlib.git && cd qlib
+    python setup.py install
+    ```
+  * If you have already installed the stable version by the command ``pip install pyqlib``:
+    ```bash
+    git clone https://github.com/microsoft/qlib.git && cd qlib
+    pip install .
+    ```
+  **Note**: **Only** the command ``pip install .`` **can** overwrite the stable version installed by ``pip install pyqlib``, while the command ``python setup.py install`` **can't**.

+**Tips**: If you fail to install `Qlib` or run the examples in your environment,  comparing your steps and the [CI workflow](.github/workflows/test.yml) may help you find the problem.

 ## Data Preparation
 Load and prepare data by running the following code:
  ```bash
+  # get 1d data
  python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn
+
+  # get 1min data
+  python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data_1min --region cn --interval 1min
+
  ```

 This dataset is created by public data collected by [crawler scripts](scripts/data_collector/), which have been released in
 the same repository.
 Users could create the same dataset with it. 

-*Please pay **ATTENTION** that the data is collected from [Yahoo Finance](https://finance.yahoo.com/lookup) and the data might not be perfect. We recommend users to prepare their own data if they have high-quality dataset. For more information, users can refer to the [related document](https://qlib.readthedocs.io/en/latest/component/data.html#converting-csv-format-into-qlib-format)*.
+*Please pay **ATTENTION** that the data is collected from [Yahoo Finance](https://finance.yahoo.com/lookup), and the data might not be perfect.
+We recommend users to prepare their own data if they have a high-quality dataset. For more information, users can refer to the [related document](https://qlib.readthedocs.io/en/latest/component/data.html#converting-csv-format-into-qlib-format)*.

 <!-- 
 - Run the initialization code and get stock data:
@@ -130,12 +189,16 @@ Users could create the same dataset with it.
 ## Auto Quant Research Workflow
 Qlib provides a tool named `qrun` to run the whole workflow automatically (including building dataset, training models, backtest and evaluation). You can start an auto quant research workflow and have a graphical reports analysis according to the following steps: 

-1. Quant Research Workflow: Run  `qrun` with lightgbm workflow config ([workflow_config_lightgbm.yaml](examples/benchmarks/LightGBM/workflow_config_lightgbm.yaml)) as following.
+1. Quant Research Workflow: Run  `qrun` with lightgbm workflow config ([workflow_config_lightgbm_Alpha158.yaml](examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml) as following.
    ```bash
      cd examples  # Avoid running program under the directory contains `qlib`
-      qrun benchmarks/LightGBM/workflow_config_lightgbm.yaml
+      qrun benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
    ```
-    The result of `qrun` is as follows, please refer to please refer to [Intraday Trading](https://qlib.readthedocs.io/en/latest/component/backtest.html) for more details about the result. 
+    If users want to use `qrun` under debug mode, please use the following command:
+    ```bash
+    python -m pdb qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
+    ```
+    The result of `qrun` is as follows, please refer to [Intraday Trading](https://qlib.readthedocs.io/en/latest/component/backtest.html) for more details about the result. 

    ```bash

@@ -153,9 +216,6 @@ Qlib provides a tool named `qrun` to run the whole workflow automatically (inclu
    annualized_return  0.128982
    information_ratio  1.444287
    max_drawdown      -0.091078
-
-
-
    ```
    Here are detailed documents for `qrun` and [workflow](https://qlib.readthedocs.io/en/latest/component/workflow.html).

@@ -185,40 +245,45 @@ Qlib provides a tool named `qrun` to run the whole workflow automatically (inclu
      - Rank Label
      ![Rank Label](docs/_static/img/rank_label.png)
      -->
+   - [Explanation](https://qlib.readthedocs.io/en/latest/component/report.html) of above results

 ## Building Customized Quant Research Workflow by Code
-The automatic workflow may not suite the research workflow of all Quant researchers. To support a flexible Quant research workflow, Qlib also provides a modularized interface to allow researchers to build their own workflow by code. [Here](examples/workflow_by_code.ipynb) is a demo for customized Quant research workflow by code.
+The automatic workflow may not suit the research workflow of all Quant researchers. To support a flexible Quant research workflow, Qlib also provides a modularized interface to allow researchers to build their own workflow by code. [Here](examples/workflow_by_code.ipynb) is a demo for customized Quant research workflow by code.


 # [Quant Model Zoo](examples/benchmarks)

 Here is a list of models built on `Qlib`.
- [GBDT based on LightGBM](qlib/contrib/model/gbdt.py)
- [GBDT based on Catboost](qlib/contrib/model/catboost_model.py)
- [GBDT based on XGBoost](qlib/contrib/model/xgboost.py)
+- [GBDT based on XGBoost (Tianqi Chen, et al. 2016)](qlib/contrib/model/xgboost.py)
+- [GBDT based on LightGBM (Guolin Ke, et al. 2017)](qlib/contrib/model/gbdt.py)
+- [GBDT based on Catboost (Liudmila Prokhorenkova, et al. 2017)](qlib/contrib/model/catboost_model.py)
 - [MLP based on pytorch](qlib/contrib/model/pytorch_nn.py)
- [GRU based on pytorch](qlib/contrib/model/pytorch_gru.py)
- [LSTM based on pytorcn](qlib/contrib/model/pytorch_lstm.py)
- [ALSTM based on pytorcn](qlib/contrib/model/pytorch_alstm.py)
- [GATs based on pytorch](qlib/contrib/model/pytorch_gats.py)
- [SFM based on pytorch](qlib/contrib/model/pytorch_sfm.py)
-<!-- - [TFT based on tensorflow](examples/benchmarks/TFT/tft.py) -->
+- [LSTM based on pytorch (Sepp Hochreiter, et al. 1997)](qlib/contrib/model/pytorch_lstm.py)
+- [GRU based on pytorch (Kyunghyun Cho, et al. 2014)](qlib/contrib/model/pytorch_gru.py)
+- [ALSTM based on pytorch (Yao Qin, et al. 2017)](qlib/contrib/model/pytorch_alstm.py)
+- [GATs based on pytorch (Petar Velickovic, et al. 2017)](qlib/contrib/model/pytorch_gats.py)
+- [SFM based on pytorch (Liheng Zhang, et al. 2017)](qlib/contrib/model/pytorch_sfm.py)
+- [TFT based on tensorflow (Bryan Lim, et al. 2019)](examples/benchmarks/TFT/tft.py)
+- [TabNet based on pytorch (Sercan O. Arik, et al. 2019)](qlib/contrib/model/pytorch_tabnet.py)
+- [DoubleEnsemble based on LightGBM (Chuheng Zhang, et al. 2020)](qlib/contrib/model/double_ensemble.py)

 Your PR of new Quant models is highly welcomed.

+The performance of each model on the `Alpha158` and `Alpha360` dataset can be found [here](examples/benchmarks/README.md).
+
 ## Run a single model
 All the models listed above are runnable with ``Qlib``. Users can find the config files we provide and some details about the model through the [benchmarks](examples/benchmarks) folder. More information can be retrieved at the model files listed above.

 `Qlib` provides three different ways to run a single model, users can pick the one that fits their cases best:
- User can use the tool `qrun` mentioned above to run a model's workflow based from a config file.
- User can create a `workflow_by_code` python script based on the [one](examples/workflow_by_code.py) listed in the `examples` folder.
+- Users can use the tool `qrun` mentioned above to run a model's workflow based from a config file.
+- Users can create a `workflow_by_code` python script based on the [one](examples/workflow_by_code.py) listed in the `examples` folder.

- User can use the script [`run_all_model.py`](examples/run_all_model.py) listed in the `examples` folder to run a model. Here is an example of the specific shell command to be used: `python run_all_model.py --models=lightgbm`, where the `--models` arguments can take any number of models listed above(the available models can be found  in [benchmarks](examples/benchmarks/)). For more use cases, please refer to the file's [docstrings](examples/run_all_model.py).
+- Users can use the script [`run_all_model.py`](examples/run_all_model.py) listed in the `examples` folder to run a model. Here is an example of the specific shell command to be used: `python run_all_model.py --models=lightgbm`, where the `--models` arguments can take any number of models listed above(the available models can be found  in [benchmarks](examples/benchmarks/)). For more use cases, please refer to the file's [docstrings](examples/run_all_model.py).

 ## Run multiple models
-`Qlib` also provides a script [`run_all_model.py`](examples/run_all_model.py) which can run multiple models for several iterations. (**Note**: the script only supprots *Linux* now. Other OS will be supported in the future.)
+`Qlib` also provides a script [`run_all_model.py`](examples/run_all_model.py) which can run multiple models for several iterations. (**Note**: the script only support *Linux* for now. Other OS will be supported in the future. Besides, it doesn't support parrallel running the same model for multiple times as well, and this will be fixed in the future development too.)

-The script will create a unique virtual environment for each model, and delete the environments after training. Thus, only experiment results such as `IC` and `backtest` results will be generated and stored. (**Note**: the script will erase your previous experiment records created by running itself.)
+The script will create a unique virtual environment for each model, and delete the environments after training. Thus, only experiment results such as `IC` and `backtest` results will be generated and stored.

 Here is an example of running all the models for 10 iterations:
 ```python
@@ -229,12 +294,12 @@ It also provides the API to run specific models at once. For more use cases, ple


 # Quant Dataset Zoo
-Dataset plays a very important role in Quant. Here is a list of the datasets built on `Qlib`.
+Dataset plays a very important role in Quant. Here is a list of the datasets built on `Qlib`:

 | Dataset                                    | US Market | China Market |
 | --                                         | --        | --           |
 | [Alpha360](./qlib/contrib/data/handler.py) |  √        |  √           |
-| [Alpha158](./qlib/contrib/data/handler.py) |  √        |  √           | 
+| [Alpha158](./qlib/contrib/data/handler.py) |  √        |  √           |

 [Here](https://qlib.readthedocs.io/en/latest/advanced/alpha.html) is a tutorial to build dataset with `Qlib`.
 Your PR to build new Quant dataset is highly welcomed.
@@ -276,13 +341,27 @@ which creates a dataset (14 features/factors) from the basic OHLCV daily data of
 * `+(-)E` indicates with (out) `ExpressionCache`
 * `+(-)D` indicates with (out) `DatasetCache`

-Most general-purpose databases take too much time on loading data. After looking into the underlying implementation, we find that data go through too many layers of interfaces and unnecessary format transformations in general-purpose database solutions.
+Most general-purpose databases take too much time to load data. After looking into the underlying implementation, we find that data go through too many layers of interfaces and unnecessary format transformations in general-purpose database solutions.
 Such overheads greatly slow down the data loading process.
 Qlib data are stored in a compact format, which is efficient to be combined into arrays for scientific computation.

+# Related Reports
+- [【华泰金工林晓明团队】图神经网络选股与Qlib实践——华泰人工智能系列之四十二](https://mp.weixin.qq.com/s/w5fDB6oAv9dO6vlhf1kmhA)
+- [Guide To Qlib: Microsoft’s AI Investment Platform](https://analyticsindiamag.com/qlib/)
+- [【华泰金工林晓明团队】微软AI量化投资平台Qlib体验——华泰人工智能系列之四十](https://mp.weixin.qq.com/s/Brcd7im4NibJOJzZfMn6tQ)
+- [微软也搞AI量化平台？还是开源的！](https://mp.weixin.qq.com/s/47bP5YwxfTp2uTHjUBzJQQ)
+- [微矿Qlib：业内首个AI量化投资开源平台](https://mp.weixin.qq.com/s/vsJv7lsgjEi-ALYUz4CvtQ)

+# Contact Us
+- If you have any issues, please create issue [here](https://github.com/microsoft/qlib/issues/new/choose) or send messages in [gitter](https://gitter.im/Microsoft/qlib).
+- If you want to make contributions to `Qlib`, please [create pull requests](https://github.com/microsoft/qlib/compare). 
+- For other reasons, you are welcome to contact us by email([qlib@microsoft.com](mailto:qlib@microsoft.com)).
+  - We are recruiting new members(both FTEs and interns), your resumes are welcome!

-
+Join IM discussion groups:
+|[Gitter](https://gitter.im/Microsoft/qlib)|
+|----|
+|![image](http://fintech.msra.cn/images_v060/qrcode/gitter_qr.png)|

 # Contributing

--- a/docs/FAQ/FAQ.rst
+++ b/docs/FAQ/FAQ.rst
@@ -70,3 +70,31 @@ If the issue is not resolved, use ``keys *`` to find if multiple keys exist. If


 Also, feel free to post a new issue in our GitHub repository. We always check each issue carefully and try our best to solve them.
+
+3. ModuleNotFoundError: No module named 'qlib.data._libs.rolling'
+------------------------------------------------------------------------------------------------------------------------------------
+
+.. code-block:: python
+
+    #### Do not import qlib package in the repository directory in case of importing qlib from . without compiling #####
+    Traceback (most recent call last):
+    File "<stdin>", line 1, in <module>
+    File "qlib/qlib/__init__.py", line 19, in init
+        from .data.cache import H
+    File "qlib/qlib/data/__init__.py", line 8, in <module>
+        from .data import (
+    File "qlib/qlib/data/data.py", line 20, in <module>
+        from .cache import H
+    File "qlib/qlib/data/cache.py", line 36, in <module>
+        from .ops import Operators
+    File "qlib/qlib/data/ops.py", line 19, in <module>
+        from ._libs.rolling import rolling_slope, rolling_rsquare, rolling_resi
+    ModuleNotFoundError: No module named 'qlib.data._libs.rolling'
+
+- If the error occurs when importing ``qlib`` package with ``PyCharm`` IDE, users can execute the following command in the project root folder to compile Cython files and generate executable files:
+
+    .. code-block:: bash
+
+        python setup.py build_ext --inplace
+
+- If the error occurs when importing ``qlib`` package with command ``python`` , users need to change the running directory to ensure that the script does not run in the project directory.
--- a/docs/_static/demo.sh
+++ b/docs/_static/demo.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+git clone https://github.com/microsoft/qlib.git
+cd qlib
+ls
+pip install pyqlib
+# or
+# pip install numpy
+# pip install --upgrade cython
+# python setup.py install
+cd examples
+ls
+qrun benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
--- a/docs/_static/img/online_serving.png
+++ b/docs/_static/img/online_serving.png
--- a/docs/_static/img/qrcode/gitter_qr.png
+++ b/docs/_static/img/qrcode/gitter_qr.png
--- a/docs/advanced/alpha.rst
+++ b/docs/advanced/alpha.rst
@@ -50,57 +50,37 @@ Users can use ``Data Handler`` to build formulaic alphas `MACD` in qlib:

 .. code-block:: python

-    >> from qlib.data.dataset.handler import QLibDataHandler
+    >> from qlib.data.dataset.loader import QlibDataLoader
    >> MACD_EXP = '(EMA($close, 12) - EMA($close, 26))/$close - EMA((EMA($close, 12) - EMA($close, 26))/$close, 9)/$close'
    >> fields = [MACD_EXP] # MACD
    >> names = ['MACD']
-    >> labels = ['$close'] # label
+    >> labels = ['Ref($close, -2)/Ref($close, -1) - 1'] # label
    >> label_names = ['LABEL']
-    >> data_handler = QLibDataHandler(start_date='2010-01-01', end_date='2017-12-31', fields=fields, names=names, labels=labels, label_names=label_names)
-    >> TRAINER_CONFIG = {
-    ..     "train_start_date": "2007-01-01",
-    ..     "train_end_date": "2014-12-31",
-    ..     "validate_start_date": "2015-01-01",
-    ..     "validate_end_date": "2016-12-31",
-    ..  "test_start_date": "2017-01-01",
-    ..  "test_end_date": "2020-08-01",
+    >> data_loader_config = {
+    ..     "feature": (fields, names),
+    ..     "label": (labels, label_names)
    .. }
-    >> feature_train, label_train, feature_validate, label_validate, feature_test, label_test = data_handler.get_split_data(**TRAINER_CONFIG)
-    >> print(feature_train, label_train)
-                            MACD
-    instrument  datetime            
-    SH600000    2010-01-04 -0.008625
-                2010-01-05 -0.007234
-                2010-01-06 -0.007693
-                2010-01-07 -0.009633
-                2010-01-08 -0.009891
-    ...                         ...
-    SZ300251    2014-12-25  0.043072
-                2014-12-26  0.041345
-                2014-12-29  0.042733
-                2014-12-30  0.042066
-                2014-12-31  0.036299
-
-    [322025 rows x 1 columns]    
-                            LABEL
-    instrument  datetime            
-    SH600000    2010-01-04  4.260015
-                2010-01-05  4.292182
-                2010-01-06  4.207747
-                2010-01-07  4.113258
-                2010-01-08  4.159496
-    ...                         ...
-    SZ300251    2014-12-25  4.343212
-                2014-12-26  4.470587
-                2014-12-29  4.762474
-                2014-12-30  4.369748
-                2014-12-31  4.182222
-
-    [322025 rows x 1 columns]
+    >> data_loader = QlibDataLoader(config=data_loader_config)
+    >> df = data_loader.load(instruments='csi300', start_time='2010-01-01', end_time='2017-12-31')
+    >> print(df)
+                            feature     label
+                               MACD     LABEL
+    datetime   instrument                    
+    2010-01-04 SH600000   -0.011547 -0.019672
+               SH600004    0.002745 -0.014721
+               SH600006    0.010133  0.002911
+               SH600008   -0.001113  0.009818
+               SH600009    0.025878 -0.017758
+    ...                         ...       ...
+    2017-12-29 SZ300124    0.007306 -0.005074
+               SZ300136   -0.013492  0.056352
+               SZ300144   -0.000966  0.011853
+               SZ300251    0.004383  0.021739
+               SZ300315   -0.030557  0.012455

 Reference
 ===========

-To learn more about ``Data Handler``, please refer to `Data Handler <../component/data.html>`_
+To learn more about ``Data Loader``, please refer to `Data Loader <../component/data.html#data-loader>`_

 To learn more about ``Data API``, please refer to `Data API <../component/data.html>`_
--- a/docs/advanced/serial.rst
+++ b/docs/advanced/serial.rst
@@ -0,0 +1,45 @@
+.. _serial:
+
+=================================
+Serialization
+=================================
+.. currentmodule:: qlib
+
+Introduction
+===================
+``Qlib`` supports dumping the state of ``DataHandler``, ``DataSet``, ``Processor`` and ``Model``, etc. into a disk and reloading them. 
+
+Serializable Class
+========================
+
+``Qlib`` provides a base class ``qlib.utils.serial.Serializable``, whose state can be dumped into or loaded from disk in `pickle` format. 
+When users dump the state of a ``Serializable`` instance, the attributes of the instance whose name **does not** start with `_` will be saved on the disk.
+However, users can use ``config`` method or override ``default_dump_all`` attribute to prevent this feature.
+
+Users can also override ``pickle_backend`` attribute to choose a pickle backend. The supported value is "pickle" (default and common) and "dill" (dump more things such as function, more information in `here <https://pypi.org/project/dill/>`_).
+
+Example
+==========================
+``Qlib``'s serializable class includes  ``DataHandler``, ``DataSet``, ``Processor`` and ``Model``, etc., which are subclass of  ``qlib.utils.serial.Serializable``. 
+Specifically, ``qlib.data.dataset.DatasetH`` is one of them. Users can serialize ``DatasetH`` as follows.
+
+.. code-block:: Python
+
+    ##=============dump dataset=============
+    dataset.to_pickle(path="dataset.pkl") # dataset is an instance of qlib.data.dataset.DatasetH
+
+    ##=============reload dataset=============
+    with open("dataset.pkl", "rb") as file_dataset:
+        dataset = pickle.load(file_dataset)
+
+.. note::
+    Only state of ``DatasetH`` should be saved on the disk, such as some `mean` and `variance` used for data normalization, etc. 
+
+    After reloading the ``DatasetH``, users need to reinitialize it. It means that users can reset some states of ``DatasetH`` or ``QlibDataHandler`` such as `instruments`, `start_time`, `end_time` and `segments`, etc.,  and generate new data according to the states (data is not state and should not be saved on the disk).
+
+A more detailed example is in this `link <https://github.com/microsoft/qlib/tree/main/examples/highfreq>`_.
+
+
+API
+===================
+Please refer to `Serializable API <../reference/api.html#module-qlib.utils.serial.Serializable>`_.
--- a/docs/advanced/task_management.rst
+++ b/docs/advanced/task_management.rst
@@ -0,0 +1,89 @@
+.. _task_management:
+
+=================================
+Task Management
+=================================
+.. currentmodule:: qlib
+
+
+Introduction
+=============
+
+The `Workflow <../component/introduction.html>`_ part introduces how to run research workflow in a loosely-coupled way. But it can only execute one ``task`` when you use ``qrun``.
+To automatically generate and execute different tasks, ``Task Management`` provides a whole process including `Task Generating`_, `Task Storing`_, `Task Training`_ and `Task Collecting`_. 
+With this module, users can run their ``task`` automatically at different periods, in different losses, or even by different models.
+
+This whole process can be used in `Online Serving <../component/online.html>`_.
+
+An example of the entire process is shown `here <https://github.com/microsoft/qlib/tree/main/examples/model_rolling/task_manager_rolling.py>`_.
+
+Task Generating
+===============
+A ``task`` consists of `Model`, `Dataset`, `Record`, or anything added by users. 
+The specific task template can be viewed in 
+`Task Section <../component/workflow.html#task-section>`_.
+Even though the task template is fixed, users can customize their ``TaskGen`` to generate different ``task`` by task template.
+
+Here is the base class of ``TaskGen``:
+
+.. autoclass:: qlib.workflow.task.gen.TaskGen
+    :members:
+
+``Qlib`` provides a class `RollingGen <https://github.com/microsoft/qlib/tree/main/qlib/workflow/task/gen.py>`_ to generate a list of ``task`` of the dataset in different date segments.
+This class allows users to verify the effect of data from different periods on the model in one experiment. More information is `here <../reference/api.html#TaskGen>`_.
+
+Task Storing
+===============
+To achieve higher efficiency and the possibility of cluster operation, ``Task Manager`` will store all tasks in `MongoDB <https://www.mongodb.com/>`_.
+``TaskManager`` can fetch undone tasks automatically and manage the lifecycle of a set of tasks with error handling.
+Users **MUST** finish the configuration of `MongoDB <https://www.mongodb.com/>`_ when using this module.
+
+Users need to provide the MongoDB URL and database name for using ``TaskManager`` in `initialization <../start/initialization.html#Parameters>`_ or make a statement like this.
+
+    .. code-block:: python
+
+        from qlib.config import C
+        C["mongo"] = {
+            "task_url" : "mongodb://localhost:27017/", # your MongoDB url
+            "task_db_name" : "rolling_db" # database name
+        }
+
+.. autoclass:: qlib.workflow.task.manage.TaskManager
+    :members:
+
+More information of ``Task Manager`` can be found in `here <../reference/api.html#TaskManager>`_.
+
+Task Training
+===============
+After generating and storing those ``task``, it's time to run the ``task`` which is in the *WAITING* status.
+``Qlib`` provides a method called ``run_task`` to run those ``task`` in task pool, however, users can also customize how tasks are executed.
+An easy way to get the ``task_func`` is using ``qlib.model.trainer.task_train`` directly.
+It will run the whole workflow defined by ``task``, which includes *Model*, *Dataset*, *Record*.
+
+.. autofunction:: qlib.workflow.task.manage.run_task
+
+Meanwhile, ``Qlib`` provides a module called ``Trainer``. 
+
+.. autoclass:: qlib.model.trainer.Trainer
+    :members:
+
+``Trainer`` will train a list of tasks and return a list of model recorders.
+``Qlib`` offer two kinds of Trainer, TrainerR is the simplest way and TrainerRM is based on TaskManager to help manager tasks lifecycle automatically. 
+If you do not want to use ``Task Manager`` to manage tasks, then use TrainerR to train a list of tasks generated by ``TaskGen`` is enough.
+`Here <../reference/api.html#Trainer>`_ are the details about different ``Trainer``.
+
+Task Collecting
+===============
+To collect the results of ``task`` after training, ``Qlib`` provides `Collector <../reference/api.html#Collector>`_, `Group <../reference/api.html#Group>`_ and `Ensemble <../reference/api.html#Ensemble>`_ to collect the results in a readable, expandable and loosely-coupled way.
+
+`Collector <../reference/api.html#Collector>`_ can collect objects from everywhere and process them such as merging, grouping, averaging and so on. It has 2 step action including ``collect`` (collect anything in a dict) and ``process_collect`` (process collected dict).
+
+`Group <../reference/api.html#Group>`_ also has 2 steps including ``group`` (can group a set of object based on `group_func` and change them to a dict) and ``reduce`` (can make a dict become an ensemble based on some rule).
+For example: {(A,B,C1): object, (A,B,C2): object} ---``group``---> {(A,B): {C1: object, C2: object}} ---``reduce``---> {(A,B): object}
+
+`Ensemble <../reference/api.html#Ensemble>`_ can merge the objects in an ensemble. 
+For example: {C1: object, C2: object} ---``Ensemble``---> object
+
+So the hierarchy is ``Collector``'s second step corresponds to ``Group``. And ``Group``'s second step correspond to ``Ensemble``.
+
+For more information, please see `Collector <../reference/api.html#Collector>`_, `Group <../reference/api.html#Group>`_ and `Ensemble <../reference/api.html#Ensemble>`_, or the `example <https://github.com/microsoft/qlib/tree/main/examples/model_rolling/task_manager_rolling.py>`_.
--- a/docs/component/data.rst
+++ b/docs/component/data.rst
@@ -31,7 +31,7 @@ Qlib Format Data
 We've specially designed a data structure to manage financial data, please refer to the `File storage design section in Qlib paper <https://arxiv.org/abs/2009.11189>`_ for detailed information.
 Such data will be stored with filename suffix `.bin` (We'll call them `.bin` file, `.bin` format, or qlib format). `.bin` file is designed for scientific computing on finance data.

-``Qlib`` provides two different off-the-shelf dataset, which can be accessed through this `link <https://github.com/microsoft/qlib/blob/main/qlib/contrib/data/handler.py>`_:
+``Qlib`` provides two different off-the-shelf datasets, which can be accessed through this `link <https://github.com/microsoft/qlib/blob/main/qlib/contrib/data/handler.py>`_:

 ========================  =================  ================
 Dataset                   US Market          China Market
@@ -41,6 +41,7 @@ Alpha360                  √                  √
 Alpha158                  √                  √
 ========================  =================  ================

+Also, ``Qlib`` provides a high-frequency dataset. Users can run a high-frequency dataset example through this `link <https://github.com/microsoft/qlib/tree/main/examples/highfreq>`_.

 Qlib Format Dataset
 --------------------
@@ -48,15 +49,19 @@ Qlib Format Dataset

 .. code-block:: bash

+    # download 1d
    python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn

+    # download 1min
+    python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_cn_1min --region cn --interval 1min
+
 In addition to China-Stock data, ``Qlib`` also includes a US-Stock dataset, which can be downloaded with the following command:

 .. code-block:: bash

    python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/us_data --region us

-After running the above command, users can find china-stock and us-stock data in ``Qlib`` format in the ``~/.qlib/csv_data/cn_data`` directory and ``~/.qlib/csv_data/us_data`` directory respectively.
+After running the above command, users can find china-stock and us-stock data in ``Qlib`` format in the ``~/.qlib/qlib_data/cn_data`` directory and ``~/.qlib/qlib_data/us_data`` directory respectively.

 ``Qlib`` also provides the scripts in ``scripts/data_collector`` to help users crawl the latest data on the Internet and convert it to qlib format.

@@ -67,12 +72,19 @@ Converting CSV Format into Qlib Format

 ``Qlib`` has provided the script ``scripts/dump_bin.py`` to convert **any** data in CSV format into `.bin` files (``Qlib`` format) as long as they are in the correct format.

-Users can download the demo china-stock data in CSV format as follows for reference to the CSV format.
+Besides downloading the prepared demo data, users could download demo data directly from the Collector as follows for reference to the CSV format.
+Here are some example:

-.. code-block:: bash
+for daily data:
+  .. code-block:: bash

    python scripts/get_data.py csv_data_cn --target_dir ~/.qlib/csv_data/cn_data

+for 1min data:
+  .. code-block:: bash
+
+    python scripts/data_collector/yahoo/collector.py download_data --source_dir ~/.qlib/stock_data/source/cn_1min --region CN --start 2021-05-20 --end 2021-05-23 --delay 0.1 --interval 1min --limit_nums 10
+
 Users can also provide their own data in CSV format. However, the CSV data **must satisfies** following criterions:

 - CSV file is named after a specific stock *or* the CSV file includes a column of the stock name
@@ -126,20 +138,30 @@ After conversion, users can find their Qlib format data in the directory `~/.qli
    The arguments of `--include_fields` should correspond with the column names of CSV files. The columns names of dataset provided by ``Qlib`` should include open, close, high, low, volume and factor at least.
    
    - `open`
-        The opening price
+        The adjusted opening price
    - `close`
-        The closing price
+        The adjusted closing price
    - `high`
-        The highest price
+        The adjusted highest price
    - `low`
-        The lowest price
+        The adjusted lowest price
    - `volume`
-        The trading volume
+        The adjusted trading volume
    - `factor`
-        The Restoration factor
+        The Restoration factor. Normally, ``factor = adjusted_price / original_price``, `adjusted price` reference: `split adjusted <https://www.investopedia.com/terms/s/splitadjusted.asp>`_

    In the convention of `Qlib` data processing, `open, close, high, low, volume, money and factor` will be set to NaN if the stock is suspended. 

+Stock Pool (Market)
+--------------------------------
+
+``Qlib`` defines `stock pool <https://github.com/microsoft/qlib/blob/main/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml#L4>`_ as stock list and their date ranges. Predefined stock pools (e.g. csi300) may be imported as follows.
+
+.. code-block:: bash
+
+    python collector.py --index_name CSI300 --qlib_dir <user qlib data dir> --method parse_instruments
+
+
 Multiple Stock Modes
 --------------------------------

@@ -158,7 +180,7 @@ The `trade unit` defines the unit number of stocks can be used in a trade, and t
 - If users use ``Qlib`` in china-stock mode, china-stock data is required. Users can use ``Qlib`` in china-stock mode according to the following steps:
    - Download china-stock in qlib format, please refer to section `Qlib Format Dataset <#qlib-format-dataset>`_.
    - Initialize ``Qlib`` in china-stock mode
-        Supposed that users download their Qlib format data in the directory ``~/.qlib/csv_data/cn_data``. Users only need to initialize ``Qlib`` as follows.
+        Supposed that users download their Qlib format data in the directory ``~/.qlib/qlib_data/cn_data``. Users only need to initialize ``Qlib`` as follows.
        
        .. code-block:: python

@@ -167,9 +189,9 @@ The `trade unit` defines the unit number of stocks can be used in a trade, and t
        

 - If users use ``Qlib`` in US-stock mode, US-stock data is required. ``Qlib`` also provides a script to download US-stock data. Users can use ``Qlib`` in US-stock mode according to the following steps:
-    - Download china-stock in qlib format, please refer to section `Qlib Format Dataset <#qlib-format-dataset>`_.
+    - Download us-stock in qlib format, please refer to section `Qlib Format Dataset <#qlib-format-dataset>`_.
    - Initialize ``Qlib`` in US-stock mode
-        Supposed that users prepare their Qlib format data in the directory ``~/.qlib/csv_data/us_data``. Users only need to initialize ``Qlib`` as follows.
+        Supposed that users prepare their Qlib format data in the directory ``~/.qlib/qlib_data/us_data``. Users only need to initialize ``Qlib`` as follows.
        
        .. code-block:: python

@@ -177,6 +199,11 @@ The `trade unit` defines the unit number of stocks can be used in a trade, and t
            qlib.init(provider_uri='~/.qlib/qlib_data/us_data', region=REG_US)
        

+.. note::
+
+    PRs for new data source are highly welcome! Users could commit the code to crawl data as a PR like `the examples here  <https://github.com/microsoft/qlib/tree/main/scripts>`_. And then we will use the code to create data cache on our server which other users could use directly.
+
+
 Data API
 ========================

@@ -195,6 +222,7 @@ Feature
 - `ExpressionOps`
    `ExpressionOps` will use operator for feature construction.
    To know more about  ``Operator``, please refer to `Operator API <../reference/api.html#module-qlib.data.ops>`_.
+    Also, ``Qlib`` supports users to define their own custom ``Operator``, an example has been given in ``tests/test_register_ops.py``.

 To know more about  ``Feature``, please refer to `Feature API <../reference/api.html#module-qlib.data.base>`_.

@@ -212,6 +240,25 @@ Filter
    - `cross-sectional features filter` \: rule_expression = '$rank($close)<10'
    - `time-sequence features filter`: rule_expression = '$Ref($close, 3)>100'

+Here is a simple example showing how to use filter in a basic ``Qlib`` workflow configuration file:
+
+.. code-block:: yaml
+
+    filter: &filter
+        filter_type: ExpressionDFilter
+        rule_expression: "Ref($close, -2) / Ref($close, -1) > 1"
+        filter_start_time: 2010-01-01
+        filter_end_time: 2010-01-07
+        keep: False
+
+    data_handler_config: &data_handler_config
+        start_time: 2010-01-01
+        end_time: 2021-01-22
+        fit_start_time: 2010-01-01
+        fit_end_time: 2015-12-31
+        instruments: *market
+        filter_pipe: [*filter]
+
 To know more about ``Filter``, please refer to `Filter API <../reference/api.html#module-qlib.data.filter>`_.

 Reference
@@ -273,9 +320,10 @@ Here are some important interfaces that ``DataHandlerLP`` provides:
 .. autoclass:: qlib.data.dataset.handler.DataHandlerLP
    :members: __init__, fetch, get_cols

-If users want to load features and labels by config, users can inherit ``qlib.data.dataset.handler.ConfigDataHandler``, ``Qlib`` also provides some preprocess method in this subclass.

-If users want to use qlib data, `QLibDataHandler` is recommended. Users can inherit their custom class from `QLibDataHandler`, which is also a subclass of `ConfigDataHandler`.
+If users want to load features and labels by config, users can define a new handler and call the static method `parse_config_to_fields` of ``qlib.contrib.data.handler.Alpha158``.
+
+Also, users can pass ``qlib.contrib.data.processor.ConfigSectionProcessor`` that provides some preprocess methods for features defined by config into the new handler.


 Processor
@@ -295,6 +343,7 @@ The ``Processor`` module in ``Qlib`` is designed to be learnable and it is respo
 - ``RobustZScoreNorm``: `processor` that applies robust z-score normalization.
 - ``CSZScoreNorm``: `processor` that applies cross sectional z-score normalization.
 - ``CSRankNorm``: `processor` that applies cross sectional rank normalization.
+- ``CSZFillna``: `processor` that fills N/A values in a cross sectional way by the mean of the column.

 Users can also create their own `processor` by inheriting the base class of ``Processor``. Please refer to the implementation of all the processors for more information (`Processor Link <https://github.com/microsoft/qlib/blob/main/qlib/data/dataset/processor.py>`_). 

@@ -311,7 +360,6 @@ Qlib provides implemented data handler `Alpha158`. The following example shows h

 .. note:: Users need to initialize ``Qlib`` with `qlib.init` first, please refer to `initialization <../start/initialization.html>`_.

-
 .. code-block:: Python

    import qlib
@@ -338,6 +386,9 @@ Qlib provides implemented data handler `Alpha158`. The following example shows h
        # fetch all the features
        print(h.fetch(col_set="feature"))

+
+.. note:: In the ``Alpha158``, ``Qlib`` uses the label `Ref($close, -2)/Ref($close, -1) - 1` that means the change from T+1 to T+2, rather than `Ref($close, -1)/$close - 1`, of which the reason is that when getting the T day close price of a china stock, the stock can be bought on T+1 day and sold on T+2 day.
+
 API
 ---------

@@ -362,8 +413,7 @@ The ``DatasetH`` class is the `dataset` with `Data Handler`. Here is the most im
 API
 ---------

-To know more about ``Dataset``, please refer to `Dataset API <../reference/api.html#module-qlib.data.dataset.__init__>`_.
-
+To know more about ``Dataset``, please refer to `Dataset API <../reference/api.html#dataset>`_.


 Cache
--- a/docs/component/online.rst
+++ b/docs/component/online.rst
@@ -0,0 +1,46 @@
+.. _online:
+
+=================================
+Online Serving
+=================================
+.. currentmodule:: qlib
+
+
+Introduction
+=============
+
+.. image:: ../_static/img/online_serving.png
+    :align: center
+
+
+In addition to backtesting, one way to test a model is effective is to make predictions in real market conditions or even do real trading based on those predictions.
+``Online Serving`` is a set of modules for online models using the latest data,
+which including `Online Manager <#Online Manager>`_, `Online Strategy <#Online Strategy>`_, `Online Tool <#Online Tool>`_, `Updater <#Updater>`_. 
+
+`Here <https://github.com/microsoft/qlib/tree/main/examples/online_srv>`_ are several examples for reference, which demonstrate different features of ``Online Serving``.
+If you have many models or `task` needs to be managed, please consider `Task Management <../advanced/task_management.html>`_.
+The `examples <https://github.com/microsoft/qlib/tree/main/examples/online_srv>`_ are based on some components in `Task Management <../advanced/task_management.html>`_ such as ``TrainerRM`` or ``Collector``.
+
+Online Manager
+=============
+
+.. automodule:: qlib.workflow.online.manager
+    :members:
+
+Online Strategy
+=============
+
+.. automodule:: qlib.workflow.online.strategy
+    :members:
+
+Online Tool
+=============
+
+.. automodule:: qlib.workflow.online.utils
+    :members:
+
+Updater
+=============
+
+.. automodule:: qlib.workflow.online.update
+    :members:
--- a/docs/component/recorder.rst
+++ b/docs/component/recorder.rst
@@ -34,8 +34,10 @@ Here is a general view of the structure of the system:
            - Recorder 2
            - ...
        - ...
-This experiment management system defines a set of interface and provided a concrete implementation based on the machine learning platform: ``MLFlow`` (`link <https://mlflow.org/>`_). 
+        
+This experiment management system defines a set of interface and provided a concrete implementation ``MLflowExpManager``, which is based on the machine learning platform: ``MLFlow`` (`link <https://mlflow.org/>`_). 

+If users set the implementation of ``ExpManager`` to be ``MLflowExpManager``, they can use the command `mlflow ui` to visualize and check the experiment results. For more information, pleaes refer to the related documents `here <https://www.mlflow.org/docs/latest/cli.html#mlflow-ui>`_.

 Qlib Recorder
 ===================
@@ -91,8 +93,54 @@ Record Template

 The ``RecordTemp`` class is a class that enables generate experiment results such as IC and backtest in a certain format. We have provided three different `Record Template` class:

- ``SignalRecord``: This class generates the `preidction` results of the model.
+- ``SignalRecord``: This class generates the `prediction` results of the model.
 - ``SigAnaRecord``: This class generates the `IC`, `ICIR`, `Rank IC` and `Rank ICIR` of the model.
+
+Here is a simple example of what is done in ``SigAnaRecord``, which users can refer to if they want to calculate IC, Rank IC, Long-Short Return with their own prediction and label.
+
+.. code-block:: Python
+
+    from qlib.contrib.eva.alpha import calc_ic, calc_long_short_return
+
+    ic, ric = calc_ic(pred.iloc[:, 0], label.iloc[:, 0])
+    long_short_r, long_avg_r = calc_long_short_return(pred.iloc[:, 0], label.iloc[:, 0])
+
 - ``PortAnaRecord``: This class generates the results of `backtest`. The detailed information about `backtest` as well as the available `strategy`, users can refer to `Strategy <../component/strategy.html>`_ and `Backtest <../component/backtest.html>`_.

+Here is a simple exampke of what is done in ``PortAnaRecord``, which users can refer to if they want to do backtest based on their own prediction and label.
+
+.. code-block:: Python
+
+    from qlib.contrib.strategy.strategy import TopkDropoutStrategy
+    from qlib.contrib.evaluate import (
+        backtest as normal_backtest,
+        risk_analysis,
+    )
+
+    # backtest
+    STRATEGY_CONFIG = {
+        "topk": 50,
+        "n_drop": 5,
+    }
+    BACKTEST_CONFIG = {
+        "verbose": False,
+        "limit_threshold": 0.095,
+        "account": 100000000,
+        "benchmark": BENCHMARK,
+        "deal_price": "close",
+        "open_cost": 0.0005,
+        "close_cost": 0.0015,
+        "min_cost": 5,
+    }
+    
+    strategy = TopkDropoutStrategy(**STRATEGY_CONFIG)
+    report_normal, positions_normal = normal_backtest(pred_score, strategy=strategy, **BACKTEST_CONFIG)
+
+    # analysis
+    analysis = dict()
+    analysis["excess_return_without_cost"] = risk_analysis(report_normal["return"] - report_normal["bench"])
+    analysis["excess_return_with_cost"] = risk_analysis(report_normal["return"] - report_normal["bench"] - report_normal["cost"])
+    analysis_df = pd.concat(analysis)  # type: pd.DataFrame
+    print(analysis_df)
+
 For more information about the APIs, please refer to `Record Template API <../reference/api.html#module-qlib.workflow.record_temp>`_.
--- a/docs/component/report.rst
+++ b/docs/component/report.rst
@@ -101,7 +101,7 @@ Graphical Result
    - Axis Y: 
        - `ic`
            The `Pearson correlation coefficient` series between `label` and `prediction score`.
-            In the above example, the `label` is formulated as `Ref($close, -1)/$close - 1`. Please refer to `Data Featrue <data.html#feature>`_ for more details.
+            In the above example, the `label` is formulated as `Ref($close, -1)/$close - 1`. Please refer to `Data Feature <data.html#feature>`_ for more details.
                
        - `rank_ic`
            The `Spearman's rank correlation coefficient` series between `label` and `prediction score`.
--- a/docs/component/strategy.rst
+++ b/docs/component/strategy.rst
@@ -111,8 +111,6 @@ Usage & Example
        pred_score, strategy=strategy, **BACKTEST_CONFIG
    )

-Also, the above example has been given in ``examples/train_backtest_analyze.ipynb``.
-
 To know more about the `prediction score` `pred_score` output by ``Forecast Model``, please refer to `Forecast Model: Model Training & Prediction <model.html>`_.

 To know more about ``Intraday Trading``, please refer to `Intraday Trading: Model&Strategy Testing <backtest.html>`_.
--- a/docs/component/workflow.rst
+++ b/docs/component/workflow.rst
@@ -90,12 +90,12 @@ Below is a typical config file of ``qrun``.
                    test: [2017-01-01, 2020-08-01]
        record: 
            - class: SignalRecord
-            module_path: qlib.workflow.record_temp
-            kwargs: {}
+                module_path: qlib.workflow.record_temp
+                kwargs: {}
            - class: PortAnaRecord
-            module_path: qlib.workflow.record_temp
-            kwargs: 
-                config: *port_analysis_config
+                module_path: qlib.workflow.record_temp
+                kwargs: 
+                    config: *port_analysis_config

 After saving the config into `configuration.yaml`, users could start the workflow and test their ideas with a single command below.

@@ -103,6 +103,12 @@ After saving the config into `configuration.yaml`, users could start the workflo

    qrun configuration.yaml

+If users want to use ``qrun`` under debug mode, please use the following command:
+
+.. code-block:: bash
+
+    python -m pdb qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
+
 .. note:: 

    `qrun` will be placed in your $PATH directory when installing ``Qlib``.
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -226,3 +226,8 @@ epub_exclude_files = ["search.html"]

 autodoc_member_order = "bysource"
 autodoc_default_flags = ["members"]
+autodoc_default_options = {
+    "members": True,
+    "member-order": "bysource",
+    "special-members": "__init__",
+}
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -42,6 +42,7 @@ Document Structure
   Intraday Trading: Model&Strategy Testing <component/backtest.rst>
   Qlib Recorder: Experiment Management <component/recorder.rst>
   Analysis: Evaluation & Results Analysis <component/report.rst>
+   Online Serving: Online Management & Strategy & Tool <component/online.rst>

 .. toctree::
   :maxdepth: 3
@@ -49,6 +50,8 @@ Document Structure
   
   Building Formulaic Alphas <advanced/alpha.rst>
   Online & Offline mode <advanced/server.rst>
+   Serialization <advanced/serial.rst>
+   Task Management <advanced/task_management.rst>

 .. toctree::
   :maxdepth: 3
--- a/docs/reference/api.rst
+++ b/docs/reference/api.rst
@@ -53,6 +53,34 @@ Cache
 .. autoclass:: qlib.data.cache.DiskDatasetCache
    :members:

+
+Storage
+-------------
+.. autoclass:: qlib.data.storage.storage.BaseStorage
+    :members:
+
+.. autoclass:: qlib.data.storage.storage.CalendarStorage
+    :members:
+
+.. autoclass:: qlib.data.storage.storage.InstrumentStorage
+    :members:
+
+.. autoclass:: qlib.data.storage.storage.FeatureStorage
+    :members:
+
+.. autoclass:: qlib.data.storage.file_storage.FileStorageMixin
+    :members:
+
+.. autoclass:: qlib.data.storage.file_storage.FileCalendarStorage
+    :members:
+
+.. autoclass:: qlib.data.storage.file_storage.FileInstrumentStorage
+    :members:
+
+.. autoclass:: qlib.data.storage.file_storage.FileFeatureStorage
+    :members:
+
+
 Dataset
 ---------------

@@ -152,4 +180,81 @@ Recorder
 Record Template
 --------------------
 .. automodule:: qlib.workflow.record_temp
-    :members:
+    :members:
+
+Task Management
+====================
+
+
+TaskGen
+--------------------
+.. automodule:: qlib.workflow.task.gen
+    :members:
+
+TaskManager
+--------------------
+.. automodule:: qlib.workflow.task.manage
+    :members:
+
+Trainer
+--------------------
+.. automodule:: qlib.model.trainer
+    :members:
+
+Collector
+--------------------
+.. automodule:: qlib.workflow.task.collect
+    :members:
+
+Group
+--------------------
+.. automodule:: qlib.model.ens.group
+    :members:
+
+Ensemble
+--------------------
+.. automodule:: qlib.model.ens.ensemble
+    :members:
+
+Utils
+--------------------
+.. automodule:: qlib.workflow.task.utils
+    :members:
+
+
+Online Serving
+====================
+
+
+Online Manager
+--------------------
+.. automodule:: qlib.workflow.online.manager
+    :members:
+
+Online Strategy
+--------------------
+.. automodule:: qlib.workflow.online.strategy
+    :members:
+
+Online Tool
+--------------------
+.. automodule:: qlib.workflow.online.utils
+    :members:
+
+RecordUpdater
+--------------------
+.. automodule:: qlib.workflow.online.update
+    :members:
+
+
+Utils
+====================
+
+Serializable
+--------------------
+
+.. automodule:: qlib.utils.serial.Serializable
+    :members:
+
+
+    
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1 +1,5 @@
-Cython==0.29.21
+Cython
+cmake
+numpy
+scipy
+scikit-learn
--- a/docs/start/initialization.rst
+++ b/docs/start/initialization.rst
@@ -63,6 +63,7 @@ Besides `provider_uri` and `region`, `qlib.init` has other parameters. The follo
        If Qlib fails to connect redis via `redis_host` and `redis_port`, cache mechanism will not be used! Please refer to `Cache <../component/data.html#cache>`_ for details.
 - `exp_manager`
    Type: dict, optional parameter, the setting of `experiment manager` to be used in qlib. Users can specify an experiment manager class, as well as the tracking URI for all the experiments. However, please be aware that we only support input of a dictionary in the following style for `exp_manager`. For more information about `exp_manager`, users can refer to `Recorder: Experiment Management <../component/recorder.html>`_.
+    
    .. code-block:: Python

        # For example, if you want to set your tracking_uri to a <specific folder>, you can initialize qlib below
@@ -74,3 +75,14 @@ Besides `provider_uri` and `region`, `qlib.init` has other parameters. The follo
                "default_exp_name": "Experiment",
            }
        })
+- `mongo`
+    Type: dict, optional parameter, the setting of `MongoDB <https://www.mongodb.com/>`_ which will be used in some features such as `Task Management <../advanced/task_management.html>`_, with high performance and clustered processing. 
+    Users need finished `installation <https://www.mongodb.com/try/download/community>`_ firstly, and run it in a fixed URL.
+
+    .. code-block:: Python
+
+        # For example, you can initialize qlib below
+        qlib.init(provider_uri=provider_uri, region=REG_CN, mongo={
+            "task_url": "mongodb://localhost:27017/",  # your mongo url
+            "task_db_name": "rolling_db", # the database name of Task Management
+        })
--- a/docs/start/integration.rst
+++ b/docs/start/integration.rst
@@ -82,7 +82,7 @@ The Custom models need to inherit `qlib.model.base.Model <../reference/api.html#
            return pd.Series(self.model.predict(x_test.values), index=x_test.index)

 - Override the `finetune` method (Optional)
-    - This method is optional to the users, and when users one to use this method on their own models, they should inherit the ``ModelFT`` base class, which includes the interface of `finetune`.
+    - This method is optional to the users. When users want to use this method on their own models, they should inherit the ``ModelFT`` base class, which includes the interface of `finetune`.
    - The parameters must include the parameter `dataset`.
    - Code Example: In the following example, users will use `LightGBM` as the model and finetune it.
    .. code-block:: Python
--- a/examples/README.md
+++ b/examples/README.md
@@ -1,6 +1,6 @@
 # Requirements

-Here is the minimal hardware requirements to run the example.
+Here is the minimal hardware requirements to run the `workflow_by_code` example.
 - Memory: 16G
 - Free Disk: 5G

--- a/examples/benchmarks/ALSTM/workflow_config_alstm_Alpha158.yaml
+++ b/examples/benchmarks/ALSTM/workflow_config_alstm_Alpha158.yaml
@@ -0,0 +1,93 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors:
+        - class: FilterCol
+          kwargs:
+              fields_group: feature
+              col_list: ["RESI5", "WVMA5", "RSQR5", "KLEN", "RSQR10", "CORR5", "CORD5", "CORR10", 
+                            "ROC60", "RESI10", "VSTD5", "RSQR60", "CORR60", "WVMA60", "STD5", 
+                            "RSQR20", "CORD60", "CORD10", "CORR20", "KLOW"
+                        ]
+        - class: RobustZScoreNorm
+          kwargs:
+              fields_group: feature
+              clip_outlier: true
+        - class: Fillna
+          kwargs:
+              fields_group: feature
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"] 
+
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: ALSTM
+        module_path: qlib.contrib.model.pytorch_alstm_ts
+        kwargs:
+            d_feat: 20
+            hidden_size: 64
+            num_layers: 2
+            dropout: 0.0
+            n_epochs: 200
+            lr: 1e-3
+            early_stop: 10
+            batch_size: 800
+            metric: loss
+            loss: mse
+            n_jobs: 20
+            GPU: 0
+            rnn_type: GRU
+    dataset:
+        class: TSDatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha158
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+            step_len: 20
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/ALSTM/workflow_config_alstm_Alpha360.yaml
+++ b/examples/benchmarks/ALSTM/workflow_config_alstm_Alpha360.yaml
@@ -54,7 +54,6 @@ task:
            batch_size: 800
            metric: loss
            loss: mse
-            seed: 0
            GPU: 0
            rnn_type: GRU
    dataset:
@@ -62,7 +61,7 @@ task:
        module_path: qlib.data.dataset
        kwargs:
            handler:
-                class: ALPHA360
+                class: Alpha360
                module_path: qlib.contrib.data.handler
                kwargs: *data_handler_config
            segments:
--- a/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha158.yaml
+++ b/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha158.yaml
--- a/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha360.yaml
+++ b/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha360.yaml
@@ -0,0 +1,72 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors: []
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"]
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: CatBoostModel
+        module_path: qlib.contrib.model.catboost_model
+        kwargs:
+            loss: RMSE
+            learning_rate: 0.0421
+            subsample: 0.8789
+            max_depth: 6
+            num_leaves: 100
+            thread_count: 20
+            grow_policy: Lossguide
+            bootstrap_type: Poisson
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha360
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/DoubleEnsemble/README.md
+++ b/examples/benchmarks/DoubleEnsemble/README.md
@@ -0,0 +1,4 @@
+# DoubleEnsemble
+* DoubleEnsemble is an ensemble framework leveraging learning trajectory based sample reweighting and shuffling based feature selection, to solve both the low signal-to-noise ratio and increasing number of features problems. They identify the key samples based on the training dynamics on each sample and elicit key features based on the ablation impact of each feature via shuffling. The model is applicable to a wide range of base models, capable of extracting complex patterns, while mitigating the overfitting and instability issues for financial market prediction.
+* This code used in Qlib is implemented by ourselves.
+* Paper: DoubleEnsemble: A New Ensemble Method Based on Sample Reweighting and Feature Selection for Financial Data Analysis [https://arxiv.org/pdf/2010.01265.pdf](https://arxiv.org/pdf/2010.01265.pdf).
--- a/examples/benchmarks/DoubleEnsemble/requirements.txt
+++ b/examples/benchmarks/DoubleEnsemble/requirements.txt
@@ -0,0 +1,3 @@
+pandas==1.1.2
+numpy==1.17.4
+lightgbm==3.1.0
--- a/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha158.yaml
+++ b/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha158.yaml
@@ -0,0 +1,90 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: DEnsembleModel
+        module_path: qlib.contrib.model.double_ensemble
+        kwargs:
+            base_model: "gbm"
+            loss: mse
+            num_models: 6
+            enable_sr: True
+            enable_fs: True
+            alpha1: 1
+            alpha2: 1
+            bins_sr: 10
+            bins_fs: 5
+            decay: 0.5
+            sample_ratios:
+                - 0.8
+                - 0.7
+                - 0.6
+                - 0.5
+                - 0.4
+            sub_weights:
+                - 1
+                - 0.2
+                - 0.2
+                - 0.2
+                - 0.2
+                - 0.2
+            epochs: 28
+            colsample_bytree: 0.8879
+            learning_rate: 0.2
+            subsample: 0.8789
+            lambda_l1: 205.6999
+            lambda_l2: 580.9768
+            max_depth: 8
+            num_leaves: 210
+            num_threads: 20
+            verbosity: -1
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha158
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record:
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs:
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs:
+            config: *port_analysis_config
--- a/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha360.yaml
+++ b/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha360.yaml
@@ -0,0 +1,97 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors: []
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"]
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: DEnsembleModel
+        module_path: qlib.contrib.model.double_ensemble
+        kwargs:
+            base_model: "gbm"
+            loss: mse
+            num_models: 6
+            enable_sr: True
+            enable_fs: True
+            alpha1: 1
+            alpha2: 1
+            bins_sr: 10
+            bins_fs: 5
+            decay: 0.5
+            sample_ratios:
+                - 0.8
+                - 0.7
+                - 0.6
+                - 0.5
+                - 0.4
+            sub_weights:
+                - 1
+                - 0.2
+                - 0.2
+                - 0.2
+                - 0.2
+                - 0.2
+            epochs: 136
+            colsample_bytree: 0.8879
+            learning_rate: 0.0421
+            subsample: 0.8789
+            lambda_l1: 205.6999
+            lambda_l2: 580.9768
+            max_depth: 8
+            num_leaves: 210
+            num_threads: 20
+            verbosity: -1
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha360
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record:
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs:
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs:
+            config: *port_analysis_config
--- a/examples/benchmarks/GATs/workflow_config_gats_Alpha158.yaml
+++ b/examples/benchmarks/GATs/workflow_config_gats_Alpha158.yaml
@@ -0,0 +1,92 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors:
+        - class: FilterCol
+          kwargs:
+              fields_group: feature
+              col_list: ["RESI5", "WVMA5", "RSQR5", "KLEN", "RSQR10", "CORR5", "CORD5", "CORR10", 
+                            "ROC60", "RESI10", "VSTD5", "RSQR60", "CORR60", "WVMA60", "STD5", 
+                            "RSQR20", "CORD60", "CORD10", "CORR20", "KLOW"
+                        ]
+        - class: RobustZScoreNorm
+          kwargs:
+              fields_group: feature
+              clip_outlier: true
+        - class: Fillna
+          kwargs:
+              fields_group: feature
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"] 
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: GATs
+        module_path: qlib.contrib.model.pytorch_gats_ts
+        kwargs:
+            d_feat: 20
+            hidden_size: 64
+            num_layers: 2
+            dropout: 0.7
+            n_epochs: 200
+            lr: 1e-4
+            early_stop: 10
+            metric: loss
+            loss: mse
+            base_model: LSTM
+            with_pretrain: True
+            model_path: "benchmarks/LSTM/csi300_lstm_ts.pkl"
+            GPU: 0
+    dataset:
+        class: TSDatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha158
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+            step_len: 20
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/GATs/workflow_config_gats_Alpha360.yaml
+++ b/examples/benchmarks/GATs/workflow_config_gats_Alpha360.yaml
@@ -56,14 +56,13 @@ task:
            base_model: LSTM
            with_pretrain: True
            model_path: "benchmarks/LSTM/model_lstm_csi300.pkl"
-            seed: 0
            GPU: 0
    dataset:
        class: DatasetH
        module_path: qlib.data.dataset
        kwargs:
            handler:
-                class: ALPHA360
+                class: Alpha360
                module_path: qlib.contrib.data.handler
                kwargs: *data_handler_config
            segments:
@@ -74,6 +73,11 @@ task:
        - class: SignalRecord
          module_path: qlib.workflow.record_temp
          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
        - class: PortAnaRecord
          module_path: qlib.workflow.record_temp
          kwargs: 
--- a/examples/benchmarks/GRU/csi300_gru_ts.pkl
+++ b/examples/benchmarks/GRU/csi300_gru_ts.pkl
--- a/examples/benchmarks/GRU/workflow_config_gru_Alpha158.yaml
+++ b/examples/benchmarks/GRU/workflow_config_gru_Alpha158.yaml
@@ -0,0 +1,92 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors:
+        - class: FilterCol
+          kwargs:
+              fields_group: feature
+              col_list: ["RESI5", "WVMA5", "RSQR5", "KLEN", "RSQR10", "CORR5", "CORD5", "CORR10", 
+                            "ROC60", "RESI10", "VSTD5", "RSQR60", "CORR60", "WVMA60", "STD5", 
+                            "RSQR20", "CORD60", "CORD10", "CORR20", "KLOW"
+                        ]
+        - class: RobustZScoreNorm
+          kwargs:
+              fields_group: feature
+              clip_outlier: true
+        - class: Fillna
+          kwargs:
+              fields_group: feature
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"] 
+
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: GRU
+        module_path: qlib.contrib.model.pytorch_gru_ts
+        kwargs:
+            d_feat: 20
+            hidden_size: 64
+            num_layers: 2
+            dropout: 0.0
+            n_epochs: 200
+            lr: 2e-4
+            early_stop: 10
+            batch_size: 800
+            metric: loss
+            loss: mse
+            n_jobs: 20
+            GPU: 0
+    dataset:
+        class: TSDatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha158
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+            step_len: 20
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/GRU/workflow_config_gru_Alpha360.yaml
+++ b/examples/benchmarks/GRU/workflow_config_gru_Alpha360.yaml
@@ -54,14 +54,13 @@ task:
            batch_size: 800
            metric: loss
            loss: mse
-            seed: 0
            GPU: 0
    dataset:
        class: DatasetH
        module_path: qlib.data.dataset
        kwargs:
            handler:
-                class: ALPHA360
+                class: Alpha360
                module_path: qlib.contrib.data.handler
                kwargs: *data_handler_config
            segments:
--- a/examples/benchmarks/LSTM/csi300_lstm_ts.pkl
+++ b/examples/benchmarks/LSTM/csi300_lstm_ts.pkl
--- a/examples/benchmarks/LSTM/workflow_config_lstm_Alpha158.yaml
+++ b/examples/benchmarks/LSTM/workflow_config_lstm_Alpha158.yaml
@@ -0,0 +1,92 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors:
+        - class: FilterCol
+          kwargs:
+              fields_group: feature
+              col_list: ["RESI5", "WVMA5", "RSQR5", "KLEN", "RSQR10", "CORR5", "CORD5", "CORR10", 
+                            "ROC60", "RESI10", "VSTD5", "RSQR60", "CORR60", "WVMA60", "STD5", 
+                            "RSQR20", "CORD60", "CORD10", "CORR20", "KLOW"
+                        ]
+        - class: RobustZScoreNorm
+          kwargs:
+              fields_group: feature
+              clip_outlier: true
+        - class: Fillna
+          kwargs:
+              fields_group: feature
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"]
+
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: LSTM
+        module_path: qlib.contrib.model.pytorch_lstm_ts
+        kwargs:
+            d_feat: 20
+            hidden_size: 64
+            num_layers: 2
+            dropout: 0.0
+            n_epochs: 200
+            lr: 1e-3
+            early_stop: 10
+            batch_size: 800
+            metric: loss
+            loss: mse
+            n_jobs: 20
+            GPU: 0
+    dataset:
+        class: TSDatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha158
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+            step_len: 20
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/LSTM/workflow_config_lstm_Alpha360.yaml
+++ b/examples/benchmarks/LSTM/workflow_config_lstm_Alpha360.yaml
@@ -54,14 +54,13 @@ task:
            batch_size: 800
            metric: loss
            loss: mse
-            seed: 0
            GPU: 0
    dataset:
        class: DatasetH
        module_path: qlib.data.dataset
        kwargs:
            handler:
-                class: ALPHA360
+                class: Alpha360
                module_path: qlib.contrib.data.handler
                kwargs: *data_handler_config
            segments:
--- a/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
+++ b/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
@@ -32,7 +32,7 @@ task:
        kwargs:
            loss: mse
            colsample_bytree: 0.8879
-            learning_rate: 0.0421
+            learning_rate: 0.2
            subsample: 0.8789
            lambda_l1: 205.6999
            lambda_l2: 580.9768
--- a/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha360.yaml
+++ b/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha360.yaml
@@ -0,0 +1,73 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors: []
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"]
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: LGBModel
+        module_path: qlib.contrib.model.gbdt
+        kwargs:
+            loss: mse
+            colsample_bytree: 0.8879
+            learning_rate: 0.0421
+            subsample: 0.8789
+            lambda_l1: 205.6999
+            lambda_l2: 580.9768
+            max_depth: 8
+            num_leaves: 210
+            num_threads: 20
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha360
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/LightGBM/workflow_config_lightgbm_configurable_dataset.yaml
+++ b/examples/benchmarks/LightGBM/workflow_config_lightgbm_configurable_dataset.yaml
@@ -0,0 +1,81 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    instruments: *market
+    data_loader:
+        class: QlibDataLoader
+        kwargs:
+            config:
+                feature:
+                    - ["Resi($close, 15)/$close", "Std(Abs($close/Ref($close, 1)-1)*$volume, 5)/(Mean(Abs($close/Ref($close, 1)-1)*$volume, 5)+1e-12)", "Rsquare($close, 5)", "($high-$low)/$open", "Rsquare($close, 10)", "Corr($close, Log($volume+1), 5)", "Corr($close/Ref($close,1), Log($volume/Ref($volume, 1)+1), 5)", "Corr($close, Log($volume+1), 10)", "Rsquare($close, 20)", "Corr($close/Ref($close,1), Log($volume/Ref($volume, 1)+1), 60)", "Corr($close/Ref($close,1), Log($volume/Ref($volume, 1)+1), 10)", "Corr($close, Log($volume+1), 20)", "(Less($open, $close)-$low)/$open"]
+                    - ["RESI5", "WVMA5", "RSQR5", "KLEN", "RSQR10", "CORR5", "CORD5", "CORR10", "RSQR20", "CORD60", "CORD10", "CORR20", "KLOW"]
+                label:
+                    - ["Ref($close, -2)/Ref($close, -1) - 1"]
+                    - ["LABEL0"]
+            freq: day
+
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSZScoreNorm
+          kwargs:
+            fields_group: label
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: LGBModel
+        module_path: qlib.contrib.model.gbdt
+        kwargs:
+            loss: mse
+            colsample_bytree: 0.8879
+            learning_rate: 0.2
+            subsample: 0.8789
+            lambda_l1: 205.6999
+            lambda_l2: 580.9768
+            max_depth: 8
+            num_leaves: 210
+            num_threads: 20
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: DataHandlerLP
+                module_path: qlib.data.dataset.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/Linear/workflow_config_linear_Alpha158.yaml
+++ b/examples/benchmarks/Linear/workflow_config_linear_Alpha158.yaml
--- a/examples/benchmarks/MLP/workflow_config_mlp_Alpha158.yaml
+++ b/examples/benchmarks/MLP/workflow_config_mlp_Alpha158.yaml
@@ -65,8 +65,9 @@ task:
            lr_decay_steps: 100
            optimizer: adam
            max_steps: 8000
-            batch_size: 4096
+            batch_size: 8192
            GPU: 0
+            weight_decay: 0.0002
    dataset:
        class: DatasetH
        module_path: qlib.data.dataset
--- a/examples/benchmarks/MLP/workflow_config_mlp_Alpha360.yaml
+++ b/examples/benchmarks/MLP/workflow_config_mlp_Alpha360.yaml
@@ -0,0 +1,82 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors:
+        - class: RobustZScoreNorm
+          kwargs:
+              fields_group: feature
+              clip_outlier: true
+        - class: Fillna
+          kwargs:
+              fields_group: feature
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"]
+
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: DNNModelPytorch
+        module_path: qlib.contrib.model.pytorch_nn
+        kwargs:
+            loss: mse
+            input_dim: 360
+            output_dim: 1
+            lr: 0.002
+            lr_decay: 0.96
+            lr_decay_steps: 100
+            optimizer: adam
+            max_steps: 8000
+            batch_size: 4096
+            GPU: 0
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha360
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/README.md
+++ b/examples/benchmarks/README.md
@@ -0,0 +1,39 @@
+# Benchmarks Performance
+
+Here are the results of each benchmark model running on Qlib's `Alpha360` and `Alpha158` dataset with China's A shared-stock & CSI300 data respectively. The values of each metric are the mean and std calculated based on 20 runs.
+
+The numbers shown below demonstrate the performance of the entire `workflow` of each model. We will update the `workflow` as well as models in the near future for better results.
+
+## Alpha360 dataset
+| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown |
+|---|---|---|---|---|---|---|---|---|
+| Linear | Alpha360 | 0.0150±0.00 | 0.1049±0.00| 0.0284±0.00 | 0.1970±0.00 | -0.0659±0.00 | -0.7072±0.00| -0.2955±0.00 |
+| CatBoost (Liudmila Prokhorenkova, et al.) | Alpha360 | 0.0397±0.00 | 0.2878±0.00| 0.0470±0.00 | 0.3703±0.00 | 0.0342±0.00 | 0.4092±0.00| -0.1057±0.00 |
+| XGBoost (Tianqi Chen, et al.) | Alpha360 | 0.0400±0.00 | 0.3031±0.00| 0.0461±0.00 | 0.3862±0.00 | 0.0528±0.00 | 0.6307±0.00| -0.1113±0.00 |
+| LightGBM (Guolin Ke, et al.) | Alpha360 | 0.0399±0.00 | 0.3075±0.00| 0.0492±0.00 | 0.4019±0.00 | 0.0323±0.00 | 0.4370±0.00| -0.0917±0.00 |
+| MLP | Alpha360 | 0.0285±0.00 | 0.1981±0.02| 0.0402±0.00 | 0.2993±0.02 | 0.0073±0.02 | 0.0880±0.22| -0.1446±0.03 |
+| GRU (Kyunghyun Cho, et al.) | Alpha360 | 0.0490±0.01 | 0.3787±0.05| 0.0581±0.00 | 0.4664±0.04 | 0.0726±0.02 | 0.9817±0.34| -0.0902±0.03 |
+| LSTM (Sepp Hochreiter, et al.) | Alpha360 | 0.0443±0.01 | 0.3401±0.05| 0.0536±0.01 | 0.4248±0.05 | 0.0627±0.03 | 0.8441±0.48| -0.0882±0.03 |
+| ALSTM (Yao Qin, et al.) | Alpha360 | 0.0493±0.01 | 0.3778±0.06| 0.0585±0.00 | 0.4606±0.04 | 0.0513±0.03 | 0.6727±0.38| -0.1085±0.02 |
+| GATs (Petar Velickovic, et al.) | Alpha360 | 0.0475±0.00 | 0.3515±0.02| 0.0592±0.00 | 0.4585±0.01 | 0.0876±0.02 | 1.1513±0.27| -0.0795±0.02 |
+| DoubleEnsemble (Chuheng Zhang, et al.) | Alpha360 | 0.0407±0.00| 0.3053±0.00 | 0.0490±0.00 | 0.3840±0.00 | 0.0380±0.02 | 0.5000±0.21 | -0.0984±0.02 |
+| TabNet (Sercan O. Arik, et al.)| Alpha360 | 0.0192±0.00 | 0.1401±0.00| 0.0291±0.00 | 0.2163±0.00 | -0.0258±0.00 | -0.2961±0.00| -0.1429±0.00 |
+
+## Alpha158 dataset
+| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown |
+|---|---|---|---|---|---|---|---|---|
+| Linear | Alpha158 | 0.0393±0.00 | 0.2980±0.00| 0.0475±0.00 | 0.3546±0.00 | 0.0795±0.00 | 1.0712±0.00| -0.1449±0.00 |
+| CatBoost (Liudmila Prokhorenkova, et al.) | Alpha158 | 0.0503±0.00 | 0.3586±0.00| 0.0483±0.00 | 0.3667±0.00 | 0.1080±0.00 | 1.1561±0.00| -0.0787±0.00 |
+| XGBoost (Tianqi Chen, et al.) | Alpha158 | 0.0481±0.00 | 0.3659±0.00| 0.0495±0.00 | 0.4033±0.00 | 0.1111±0.00 | 1.2915±0.00| -0.0893±0.00 |
+| LightGBM (Guolin Ke, et al.) | Alpha158 | 0.0475±0.00 | 0.3979±0.00| 0.0485±0.00 | 0.4123±0.00 | 0.1143±0.00 | 1.2744±0.00| -0.0800±0.00 |
+| MLP | Alpha158 | 0.0358±0.00 | 0.2738±0.03| 0.0425±0.00 | 0.3221±0.01 | 0.0836±0.02 | 1.0323±0.25| -0.1127±0.02 |
+| TFT (Bryan Lim, et al.) | Alpha158 (with selected 20 features) | 0.0343±0.00 | 0.2071±0.02| 0.0107±0.00 | 0.0660±0.02 | 0.0623±0.02 | 0.5818±0.20| -0.1762±0.01 |
+| GRU (Kyunghyun Cho, et al.) | Alpha158 (with selected 20 features) | 0.0311±0.00 | 0.2418±0.04| 0.0425±0.00 | 0.3434±0.02 | 0.0330±0.02 | 0.4805±0.30| -0.1021±0.02 |
+| LSTM (Sepp Hochreiter, et al.) | Alpha158 (with selected 20 features) | 0.0312±0.00 | 0.2394±0.04| 0.0418±0.00 | 0.3324±0.03 | 0.0298±0.02 | 0.4198±0.33| -0.1348±0.03 |
+| ALSTM (Yao Qin, et al.) | Alpha158 (with selected 20 features) | 0.0385±0.01 | 0.3022±0.06| 0.0478±0.00 | 0.3874±0.04 | 0.0486±0.03 | 0.7141±0.45| -0.1088±0.03 |
+| GATs (Petar Velickovic, et al.) | Alpha158 (with selected 20 features) | 0.0349±0.00 | 0.2511±0.01| 0.0457±0.00 | 0.3537±0.01 | 0.0578±0.02 | 0.8221±0.25| -0.0824±0.02 |
+| DoubleEnsemble (Chuheng Zhang, et al.) | Alpha158 | 0.0544±0.00 | 0.4338±0.01 | 0.0523±0.00 | 0.4257±0.01 | 0.1253±0.01 | 1.4105±0.14 | -0.0902±0.01 |
+| TabNet (Sercan O. Arik, et al.)| Alpha158 | 0.0383±0.00 | 0.3414±0.00| 0.0388±0.00 | 0.3460±0.00 | 0.0226±0.00 | 0.2652±0.00| -0.1072±0.00 |
+
+- The selected 20 features are based on the feature importance of a lightgbm-based model.
+- The base model of DoubleEnsemble is LGBM.
--- a/examples/benchmarks/SFM/README.md
+++ b/examples/benchmarks/SFM/README.md
@@ -1,3 +1,3 @@
 # State-Frequency-Memory
 - State Frequency Memory (SFM) is a novel recurrent network that uses Discrete Fourier Transform to decompose the hidden states of memory cells and capture the multi-frequency trading patterns from past market data to make stock price predictions. 
- Paper: Stock Price Prediction via Discovering Multi-Frequency Trading Patterns. [https://www.cs.ucf.edu/~gqi/publications/kdd2017_stock.pdf.](https://www.cs.ucf.edu/~gqi/publications/kdd2017_stock.pdf.)
+- Paper: Stock Price Prediction via Discovering Multi-Frequency Trading Patterns. [http://www.eecs.ucf.edu/~gqi/publications/kdd2017_stock.pdf.](http://www.eecs.ucf.edu/~gqi/publications/kdd2017_stock.pdf)
--- a/examples/benchmarks/SFM/workflow_config_sfm_Alpha360.yaml
+++ b/examples/benchmarks/SFM/workflow_config_sfm_Alpha360.yaml
@@ -57,14 +57,13 @@ task:
            eval_steps: 5
            loss: mse
            optimizer: adam
-            GPU: 1
-            seed: 710
+            GPU: 0
    dataset:
        class: DatasetH
        module_path: qlib.data.dataset
        kwargs:
            handler:
-                class: ALPHA360
+                class: Alpha360
                module_path: qlib.contrib.data.handler
                kwargs: *data_handler_config
            segments:
--- a/examples/benchmarks/TFT/data_formatters/base.py
+++ b/examples/benchmarks/TFT/data_formatters/base.py
@@ -132,7 +132,7 @@ class GenericDataFormatter(abc.ABC):
        return -1, -1

    def get_column_definition(self):
-        """"Returns formatted column definition in order expected by the TFT."""
+        """Returns formatted column definition in order expected by the TFT."""

        column_definition = self._column_definition

--- a/examples/benchmarks/TFT/data_formatters/qlib_Alpha158.py
+++ b/examples/benchmarks/TFT/data_formatters/qlib_Alpha158.py
@@ -1,219 +1,229 @@
-# coding=utf-8
-# Copyright 2020 The Google Research Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Lint as: python3
-"""Custom formatting functions for Alpha158 dataset.
-
-Defines dataset specific column definitions and data transformations.
-"""
-
-import data_formatters.base
-import libs.utils as utils
-import sklearn.preprocessing
-
-GenericDataFormatter = data_formatters.base.GenericDataFormatter
-DataTypes = data_formatters.base.DataTypes
-InputTypes = data_formatters.base.InputTypes
-
-
-class Alpha158Formatter(GenericDataFormatter):
-    """Defines and formats data for the Alpha158 dataset.
-
-    Attributes:
-      column_definition: Defines input and data type of column used in the
-        experiment.
-      identifiers: Entity identifiers used in experiments.
-    """
-
-    _column_definition = [
-        ("instrument", DataTypes.CATEGORICAL, InputTypes.ID),
-        ("LABEL0", DataTypes.REAL_VALUED, InputTypes.TARGET),
-        ("date", DataTypes.DATE, InputTypes.TIME),
-        ("month", DataTypes.CATEGORICAL, InputTypes.KNOWN_INPUT),
-        ("day_of_week", DataTypes.CATEGORICAL, InputTypes.KNOWN_INPUT),
-        # Selected 10 features
-        ("RESI5", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
-        ("WVMA5", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
-        ("RSQR5", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
-        ("KLEN", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
-        ("RSQR10", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
-        ("CORR5", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
-        ("CORD5", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
-        ("CORR10", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
-        ("ROC60", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
-        ("RESI10", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
-        ("const", DataTypes.CATEGORICAL, InputTypes.STATIC_INPUT),
-    ]
-
-    def __init__(self):
-        """Initialises formatter."""
-
-        self.identifiers = None
-        self._real_scalers = None
-        self._cat_scalers = None
-        self._target_scaler = None
-        self._num_classes_per_cat_input = None
-
-    def split_data(self, df, valid_boundary=2016, test_boundary=2018):
-        """Splits data frame into training-validation-test data frames.
-
-        This also calibrates scaling object, and transforms data for each split.
-
-        Args:
-          df: Source data frame to split.
-          valid_boundary: Starting year for validation data
-          test_boundary: Starting year for test data
-
-        Returns:
-          Tuple of transformed (train, valid, test) data.
-        """
-
-        print("Formatting train-valid-test splits.")
-
-        index = df["year"]
-        train = df.loc[index < valid_boundary]
-        valid = df.loc[(index >= valid_boundary) & (index < test_boundary)]
-        test = df.loc[index >= test_boundary]
-
-        self.set_scalers(train)
-
-        return (self.transform_inputs(data) for data in [train, valid, test])
-
-    def set_scalers(self, df):
-        """Calibrates scalers using the data supplied.
-
-        Args:
-          df: Data to use to calibrate scalers.
-        """
-        print("Setting scalers with training data...")
-
-        column_definitions = self.get_column_definition()
-        id_column = utils.get_single_col_by_input_type(InputTypes.ID, column_definitions)
-        target_column = utils.get_single_col_by_input_type(InputTypes.TARGET, column_definitions)
-
-        # Extract identifiers in case required
-        self.identifiers = list(df[id_column].unique())
-
-        # Format real scalers
-        real_inputs = utils.extract_cols_from_data_type(
-            DataTypes.REAL_VALUED, column_definitions, {InputTypes.ID, InputTypes.TIME}
-        )
-
-        data = df[real_inputs].values
-        self._real_scalers = sklearn.preprocessing.StandardScaler().fit(data)
-        self._target_scaler = sklearn.preprocessing.StandardScaler().fit(
-            df[[target_column]].values
-        )  # used for predictions
-
-        # Format categorical scalers
-        categorical_inputs = utils.extract_cols_from_data_type(
-            DataTypes.CATEGORICAL, column_definitions, {InputTypes.ID, InputTypes.TIME}
-        )
-
-        categorical_scalers = {}
-        num_classes = []
-        for col in categorical_inputs:
-            # Set all to str so that we don't have mixed integer/string columns
-            srs = df[col].apply(str)
-            categorical_scalers[col] = sklearn.preprocessing.LabelEncoder().fit(srs.values)
-            num_classes.append(srs.nunique())
-
-        # Set categorical scaler outputs
-        self._cat_scalers = categorical_scalers
-        self._num_classes_per_cat_input = num_classes
-
-    def transform_inputs(self, df):
-        """Performs feature transformations.
-
-        This includes both feature engineering, preprocessing and normalisation.
-
-        Args:
-          df: Data frame to transform.
-
-        Returns:
-          Transformed data frame.
-
-        """
-        output = df.copy()
-
-        if self._real_scalers is None and self._cat_scalers is None:
-            raise ValueError("Scalers have not been set!")
-
-        column_definitions = self.get_column_definition()
-
-        real_inputs = utils.extract_cols_from_data_type(
-            DataTypes.REAL_VALUED, column_definitions, {InputTypes.ID, InputTypes.TIME}
-        )
-        categorical_inputs = utils.extract_cols_from_data_type(
-            DataTypes.CATEGORICAL, column_definitions, {InputTypes.ID, InputTypes.TIME}
-        )
-
-        # Format real inputs
-        output[real_inputs] = self._real_scalers.transform(df[real_inputs].values)
-
-        # Format categorical inputs
-        for col in categorical_inputs:
-            string_df = df[col].apply(str)
-            output[col] = self._cat_scalers[col].transform(string_df)
-
-        return output
-
-    def format_predictions(self, predictions):
-        """Reverts any normalisation to give predictions in original scale.
-
-        Args:
-          predictions: Dataframe of model predictions.
-
-        Returns:
-          Data frame of unnormalised predictions.
-        """
-        output = predictions.copy()
-
-        column_names = predictions.columns
-
-        for col in column_names:
-            if col not in {"forecast_time", "identifier"}:
-                output[col] = self._target_scaler.inverse_transform(predictions[col])
-
-        return output
-
-    # Default params
-    def get_fixed_params(self):
-        """Returns fixed model parameters for experiments."""
-
-        fixed_params = {
-            "total_time_steps": 6 + 6,
-            "num_encoder_steps": 6,
-            "num_epochs": 100,
-            "early_stopping_patience": 10,
-            "multiprocessing_workers": 5,
-        }
-
-        return fixed_params
-
-    def get_default_model_params(self):
-        """Returns default optimised model parameters."""
-
-        model_params = {
-            "dropout_rate": 0.4,
-            "hidden_layer_size": 16,
-            "learning_rate": 0.0001,
-            "minibatch_size": 128,
-            "max_gradient_norm": 0.0135,
-            "num_heads": 1,
-            "stack_size": 1,
-        }
-
-        return model_params
+# coding=utf-8
+# Copyright 2020 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Custom formatting functions for Alpha158 dataset.
+
+Defines dataset specific column definitions and data transformations.
+"""
+
+import data_formatters.base
+import libs.utils as utils
+import sklearn.preprocessing
+
+GenericDataFormatter = data_formatters.base.GenericDataFormatter
+DataTypes = data_formatters.base.DataTypes
+InputTypes = data_formatters.base.InputTypes
+
+
+class Alpha158Formatter(GenericDataFormatter):
+    """Defines and formats data for the Alpha158 dataset.
+
+    Attributes:
+      column_definition: Defines input and data type of column used in the
+        experiment.
+      identifiers: Entity identifiers used in experiments.
+    """
+
+    _column_definition = [
+        ("instrument", DataTypes.CATEGORICAL, InputTypes.ID),
+        ("LABEL0", DataTypes.REAL_VALUED, InputTypes.TARGET),
+        ("date", DataTypes.DATE, InputTypes.TIME),
+        ("month", DataTypes.CATEGORICAL, InputTypes.KNOWN_INPUT),
+        ("day_of_week", DataTypes.CATEGORICAL, InputTypes.KNOWN_INPUT),
+        # Selected features
+        ("RESI5", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("WVMA5", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("RSQR5", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("KLEN", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("RSQR10", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("CORR5", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("CORD5", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("CORR10", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("ROC60", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("RESI10", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("VSTD5", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("RSQR60", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("CORR60", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("WVMA60", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("STD5", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("RSQR20", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("CORD60", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("CORD10", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("CORR20", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("KLOW", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("const", DataTypes.CATEGORICAL, InputTypes.STATIC_INPUT),
+    ]
+
+    def __init__(self):
+        """Initialises formatter."""
+
+        self.identifiers = None
+        self._real_scalers = None
+        self._cat_scalers = None
+        self._target_scaler = None
+        self._num_classes_per_cat_input = None
+
+    def split_data(self, df, valid_boundary=2016, test_boundary=2018):
+        """Splits data frame into training-validation-test data frames.
+
+        This also calibrates scaling object, and transforms data for each split.
+
+        Args:
+          df: Source data frame to split.
+          valid_boundary: Starting year for validation data
+          test_boundary: Starting year for test data
+
+        Returns:
+          Tuple of transformed (train, valid, test) data.
+        """
+
+        print("Formatting train-valid-test splits.")
+
+        index = df["year"]
+        train = df.loc[index < valid_boundary]
+        valid = df.loc[(index >= valid_boundary) & (index < test_boundary)]
+        test = df.loc[index >= test_boundary]
+
+        self.set_scalers(train)
+
+        return (self.transform_inputs(data) for data in [train, valid, test])
+
+    def set_scalers(self, df):
+        """Calibrates scalers using the data supplied.
+
+        Args:
+          df: Data to use to calibrate scalers.
+        """
+        print("Setting scalers with training data...")
+
+        column_definitions = self.get_column_definition()
+        id_column = utils.get_single_col_by_input_type(InputTypes.ID, column_definitions)
+        target_column = utils.get_single_col_by_input_type(InputTypes.TARGET, column_definitions)
+
+        # Extract identifiers in case required
+        self.identifiers = list(df[id_column].unique())
+
+        # Format real scalers
+        real_inputs = utils.extract_cols_from_data_type(
+            DataTypes.REAL_VALUED, column_definitions, {InputTypes.ID, InputTypes.TIME}
+        )
+
+        data = df[real_inputs].values
+        self._real_scalers = sklearn.preprocessing.StandardScaler().fit(data)
+        self._target_scaler = sklearn.preprocessing.StandardScaler().fit(
+            df[[target_column]].values
+        )  # used for predictions
+
+        # Format categorical scalers
+        categorical_inputs = utils.extract_cols_from_data_type(
+            DataTypes.CATEGORICAL, column_definitions, {InputTypes.ID, InputTypes.TIME}
+        )
+
+        categorical_scalers = {}
+        num_classes = []
+        for col in categorical_inputs:
+            # Set all to str so that we don't have mixed integer/string columns
+            srs = df[col].apply(str)
+            categorical_scalers[col] = sklearn.preprocessing.LabelEncoder().fit(srs.values)
+            num_classes.append(srs.nunique())
+
+        # Set categorical scaler outputs
+        self._cat_scalers = categorical_scalers
+        self._num_classes_per_cat_input = num_classes
+
+    def transform_inputs(self, df):
+        """Performs feature transformations.
+
+        This includes both feature engineering, preprocessing and normalisation.
+
+        Args:
+          df: Data frame to transform.
+
+        Returns:
+          Transformed data frame.
+
+        """
+        output = df.copy()
+
+        if self._real_scalers is None and self._cat_scalers is None:
+            raise ValueError("Scalers have not been set!")
+
+        column_definitions = self.get_column_definition()
+
+        real_inputs = utils.extract_cols_from_data_type(
+            DataTypes.REAL_VALUED, column_definitions, {InputTypes.ID, InputTypes.TIME}
+        )
+        categorical_inputs = utils.extract_cols_from_data_type(
+            DataTypes.CATEGORICAL, column_definitions, {InputTypes.ID, InputTypes.TIME}
+        )
+
+        # Format real inputs
+        output[real_inputs] = self._real_scalers.transform(df[real_inputs].values)
+
+        # Format categorical inputs
+        for col in categorical_inputs:
+            string_df = df[col].apply(str)
+            output[col] = self._cat_scalers[col].transform(string_df)
+
+        return output
+
+    def format_predictions(self, predictions):
+        """Reverts any normalisation to give predictions in original scale.
+
+        Args:
+          predictions: Dataframe of model predictions.
+
+        Returns:
+          Data frame of unnormalised predictions.
+        """
+        output = predictions.copy()
+
+        column_names = predictions.columns
+
+        for col in column_names:
+            if col not in {"forecast_time", "identifier"}:
+                output[col] = self._target_scaler.inverse_transform(predictions[col])
+
+        return output
+
+    # Default params
+    def get_fixed_params(self):
+        """Returns fixed model parameters for experiments."""
+
+        fixed_params = {
+            "total_time_steps": 6 + 6,
+            "num_encoder_steps": 6,
+            "num_epochs": 100,
+            "early_stopping_patience": 10,
+            "multiprocessing_workers": 5,
+        }
+
+        return fixed_params
+
+    def get_default_model_params(self):
+        """Returns default optimised model parameters."""
+
+        model_params = {
+            "dropout_rate": 0.4,
+            "hidden_layer_size": 160,
+            "learning_rate": 0.0001,
+            "minibatch_size": 128,
+            "max_gradient_norm": 0.0135,
+            "num_heads": 1,
+            "stack_size": 1,
+        }
+
+        return model_params
--- a/examples/benchmarks/TFT/expt_settings/configs.py
+++ b/examples/benchmarks/TFT/expt_settings/configs.py
@@ -25,7 +25,7 @@ import os
 import data_formatters.qlib_Alpha158


-class ExperimentConfig(object):
+class ExperimentConfig:
    """Defines experiment configs and paths to outputs.

    Attributes:
--- a/examples/benchmarks/TFT/libs/tft_model.py
+++ b/examples/benchmarks/TFT/libs/tft_model.py
@@ -320,7 +320,7 @@ class InterpretableMultiHeadAttention:
        return outputs, attn


-class TFTDataCache(object):
+class TFTDataCache:
    """Caches data for the TFT."""

    _data_cache = {}
@@ -348,7 +348,7 @@ class TFTDataCache(object):


 # TFT model definitions.
-class TemporalFusionTransformer(object):
+class TemporalFusionTransformer:
    """Defines Temporal Fusion Transformer.

    Attributes:
@@ -972,7 +972,7 @@ class TemporalFusionTransformer(object):
            valid_quantiles = self.quantiles
            output_size = self.output_size

-            class QuantileLossCalculator(object):
+            class QuantileLossCalculator:
                """Computes the combined quantile loss for prespecified quantiles.

                Attributes:
--- a/examples/benchmarks/TFT/tft.py
+++ b/examples/benchmarks/TFT/tft.py
@@ -1,249 +1,291 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-
-import numpy as np
-import pandas as pd
-import tensorflow.compat.v1 as tf
-import data_formatters.base
-import expt_settings.configs
-import libs.hyperparam_opt
-import libs.tft_model
-import libs.utils as utils
-import os
-import datetime as dte
-
-
-from qlib.model.base import ModelFT
-from qlib.data.dataset import DatasetH
-from qlib.data.dataset.handler import DataHandlerLP
-
-
-# To register new datasets, please add them here.
-ALLOW_DATASET = ["Alpha158"]
-DATASET_SETTING = {
-    "Alpha158": {
-        "feature_col": ["RESI5", "WVMA5", "RSQR5", "KLEN", "RSQR10", "CORR5", "CORD5", "CORR10", "ROC60", "RESI10"],
-        "label_col": ["LABEL0"],
-    },
-}
-# To register new datasets, please add their configurations here.
-
-
-def get_shifted_label(data_df, shifts=5, col_shift="LABEL0"):
-    return data_df[[col_shift]].groupby("instrument").apply(lambda df: df.shift(shifts))
-
-
-def fill_test_na(test_df):
-    test_df_res = test_df.copy()
-    feature_cols = ~test_df_res.columns.str.contains("label", case=False)
-    test_feature_fna = test_df_res.loc[:, feature_cols].groupby("datetime").apply(lambda df: df.fillna(df.mean()))
-    test_df_res.loc[:, feature_cols] = test_feature_fna
-    return test_df_res
-
-
-def process_qlib_data(df, dataset, fillna=False):
-    """Prepare data to fit the TFT model.
-
-    Args:
-      df: Original DataFrame.
-      fillna: Whether to fill the data with the mean values.
-
-    Returns:
-      Transformed DataFrame.
-
-    """
-    # Several features selected manually
-    feature_col = DATASET_SETTING[dataset]["feature_col"]
-    label_col = DATASET_SETTING[dataset]["label_col"]
-    temp_df = df.loc[:, feature_col + label_col]
-    if fillna:
-        temp_df = fill_test_na(temp_df)
-    temp_df = temp_df.swaplevel()
-    temp_df = temp_df.sort_index()
-    temp_df = temp_df.reset_index(level=0)
-    dates = pd.to_datetime(temp_df.index)
-    temp_df["date"] = dates
-    temp_df["day_of_week"] = dates.dayofweek
-    temp_df["month"] = dates.month
-    temp_df["year"] = dates.year
-    temp_df["const"] = 1.0
-    return temp_df
-
-
-def process_predicted(df, col_name):
-    """Transform the TFT predicted data into Qlib format.
-
-    Args:
-      df: Original DataFrame.
-      fillna: New column name.
-
-    Returns:
-      Transformed DataFrame.
-
-    """
-    df_res = df.copy()
-    df_res = df_res.rename(columns={"forecast_time": "datetime", "identifier": "instrument", "t+4": col_name})
-    df_res = df_res.set_index(["datetime", "instrument"]).sort_index()
-    df_res = df_res[[col_name]]
-    return df_res
-
-
-def format_score(forecast_df, col_name="pred", label_shift=5):
-    pred = process_predicted(forecast_df, col_name=col_name)
-    pred = get_shifted_label(pred, shifts=-label_shift, col_shift=col_name)
-    pred = pred.dropna()[col_name]
-    return pred
-
-
-def transform_df(df, col_name="LABEL0"):
-    df_res = df["feature"]
-    df_res[col_name] = df["label"]
-    return df_res
-
-
-class TFTModel(ModelFT):
-    """TFT Model"""
-
-    def __init__(self, **kwargs):
-        self.model = None
-
-    def _prepare_data(self, dataset: DatasetH):
-        df_train, df_valid = dataset.prepare(
-            ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
-        )
-        return transform_df(df_train), transform_df(df_valid)
-
-    def fit(
-        self,
-        dataset: DatasetH,
-        DATASET="Alpha158",
-        MODEL_FOLDER="qlib_alpha158_model",
-        LABEL_COL="LABEL0",
-        LABEL_SHIFT=5,
-        USE_GPU_ID=0,
-        **kwargs
-    ):
-
-        if DATASET not in ALLOW_DATASET:
-            raise AssertionError("The dataset is not supported, please make a new formatter to fit this dataset")
-
-        dtrain, dvalid = self._prepare_data(dataset)
-        dtrain.loc[:, LABEL_COL] = get_shifted_label(dtrain, shifts=LABEL_SHIFT, col_shift=LABEL_COL)
-        dvalid.loc[:, LABEL_COL] = get_shifted_label(dvalid, shifts=LABEL_SHIFT, col_shift=LABEL_COL)
-
-        train = process_qlib_data(dtrain, DATASET, fillna=True).dropna()
-        valid = process_qlib_data(dvalid, DATASET, fillna=True).dropna()
-
-        ExperimentConfig = expt_settings.configs.ExperimentConfig
-        config = ExperimentConfig(DATASET)
-        self.data_formatter = config.make_data_formatter()
-        self.model_folder = MODEL_FOLDER
-        self.gpu_id = USE_GPU_ID
-        self.label_shift = LABEL_SHIFT
-        self.expt_name = DATASET
-        self.label_col = LABEL_COL
-
-        use_gpu = (True, self.gpu_id)
-        # ===========================Training Process===========================
-        ModelClass = libs.tft_model.TemporalFusionTransformer
-        if not isinstance(self.data_formatter, data_formatters.base.GenericDataFormatter):
-            raise ValueError(
-                "Data formatters should inherit from"
-                + "AbstractDataFormatter! Type={}".format(type(self.data_formatter))
-            )
-
-        default_keras_session = tf.keras.backend.get_session()
-
-        if use_gpu[0]:
-            self.tf_config = utils.get_default_tensorflow_config(tf_device="gpu", gpu_id=use_gpu[1])
-        else:
-            self.tf_config = utils.get_default_tensorflow_config(tf_device="cpu")
-
-        self.data_formatter.set_scalers(train)
-
-        # Sets up default params
-        fixed_params = self.data_formatter.get_experiment_params()
-        params = self.data_formatter.get_default_model_params()
-
-        # Wendi: 合并调优的参数和非调优的参数
-        params = {**params, **fixed_params}
-
-        if not os.path.exists(self.model_folder):
-            os.makedirs(self.model_folder)
-        params["model_folder"] = self.model_folder
-
-        print("*** Begin training ***")
-        best_loss = np.Inf
-
-        tf.reset_default_graph()
-
-        self.tf_graph = tf.Graph()
-        with self.tf_graph.as_default():
-            self.sess = tf.Session(config=self.tf_config)
-            tf.keras.backend.set_session(self.sess)
-            self.model = ModelClass(params, use_cudnn=use_gpu[0])
-            self.sess.run(tf.global_variables_initializer())
-            self.model.fit(train_df=train, valid_df=valid)
-            print("*** Finished training ***")
-            saved_model_dir = self.model_folder + "/" + "saved_model"
-            if not os.path.exists(saved_model_dir):
-                os.makedirs(saved_model_dir)
-            self.model.save(saved_model_dir)
-
-            def extract_numerical_data(data):
-                """Strips out forecast time and identifier columns."""
-                return data[[col for col in data.columns if col not in {"forecast_time", "identifier"}]]
-
-            # p50_loss = utils.numpy_normalised_quantile_loss(
-            #    extract_numerical_data(targets), extract_numerical_data(p50_forecast),
-            #    0.5)
-            # p90_loss = utils.numpy_normalised_quantile_loss(
-            #    extract_numerical_data(targets), extract_numerical_data(p90_forecast),
-            #    0.9)
-            tf.keras.backend.set_session(default_keras_session)
-        print("Training completed.".format(dte.datetime.now()))
-        # ===========================Training Process===========================
-
-    def predict(self, dataset):
-        if self.model is None:
-            raise ValueError("model is not fitted yet!")
-        d_test = dataset.prepare("test", col_set=["feature", "label"])
-        d_test = transform_df(d_test)
-        d_test.loc[:, self.label_col] = get_shifted_label(d_test, shifts=self.label_shift, col_shift=self.label_col)
-        test = process_qlib_data(d_test, self.expt_name, fillna=True).dropna()
-
-        use_gpu = (True, self.gpu_id)
-        # ===========================Predicting Process===========================
-        default_keras_session = tf.keras.backend.get_session()
-
-        # Sets up default params
-        fixed_params = self.data_formatter.get_experiment_params()
-        params = self.data_formatter.get_default_model_params()
-        params = {**params, **fixed_params}
-
-        print("*** Begin predicting ***")
-        tf.reset_default_graph()
-
-        with self.tf_graph.as_default():
-            tf.keras.backend.set_session(self.sess)
-            output_map = self.model.predict(test, return_targets=True)
-            targets = self.data_formatter.format_predictions(output_map["targets"])
-            p50_forecast = self.data_formatter.format_predictions(output_map["p50"])
-            p90_forecast = self.data_formatter.format_predictions(output_map["p90"])
-            tf.keras.backend.set_session(default_keras_session)
-
-        predict50 = format_score(p50_forecast, "pred", 1)
-        predict90 = format_score(p90_forecast, "pred", 1)
-        predict = (predict50 + predict90) / 2  # self.label_shift
-        # ===========================Predicting Process===========================
-        return predict
-
-    def finetune(self, dataset: DatasetH):
-        """
-        finetune model
-        Parameters
-        ----------
-        dataset : DatasetH
-            dataset for finetuning
-        """
-        pass
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import numpy as np
+import pandas as pd
+import tensorflow.compat.v1 as tf
+import data_formatters.base
+import expt_settings.configs
+import libs.hyperparam_opt
+import libs.tft_model
+import libs.utils as utils
+import os
+import datetime as dte
+
+
+from qlib.model.base import ModelFT
+from qlib.data.dataset import DatasetH
+from qlib.data.dataset.handler import DataHandlerLP
+
+
+# To register new datasets, please add them here.
+ALLOW_DATASET = ["Alpha158", "Alpha360"]
+# To register new datasets, please add their configurations here.
+DATASET_SETTING = {
+    "Alpha158": {
+        "feature_col": [
+            "RESI5",
+            "WVMA5",
+            "RSQR5",
+            "KLEN",
+            "RSQR10",
+            "CORR5",
+            "CORD5",
+            "CORR10",
+            "ROC60",
+            "RESI10",
+            "VSTD5",
+            "RSQR60",
+            "CORR60",
+            "WVMA60",
+            "STD5",
+            "RSQR20",
+            "CORD60",
+            "CORD10",
+            "CORR20",
+            "KLOW",
+        ],
+        "label_col": "LABEL0",
+    },
+    "Alpha360": {
+        "feature_col": [
+            "HIGH0",
+            "LOW0",
+            "OPEN0",
+            "CLOSE1",
+            "HIGH1",
+            "VOLUME1",
+            "LOW1",
+            "VOLUME3",
+            "OPEN1",
+            "VOLUME4",
+            "CLOSE2",
+            "CLOSE4",
+            "VOLUME5",
+            "LOW2",
+            "CLOSE3",
+            "VOLUME2",
+            "HIGH2",
+            "LOW4",
+            "VOLUME8",
+            "VOLUME11",
+        ],
+        "label_col": "LABEL0",
+    },
+}
+
+
+def get_shifted_label(data_df, shifts=5, col_shift="LABEL0"):
+    return data_df[[col_shift]].groupby("instrument").apply(lambda df: df.shift(shifts))
+
+
+def fill_test_na(test_df):
+    test_df_res = test_df.copy()
+    feature_cols = ~test_df_res.columns.str.contains("label", case=False)
+    test_feature_fna = test_df_res.loc[:, feature_cols].groupby("datetime").apply(lambda df: df.fillna(df.mean()))
+    test_df_res.loc[:, feature_cols] = test_feature_fna
+    return test_df_res
+
+
+def process_qlib_data(df, dataset, fillna=False):
+    """Prepare data to fit the TFT model.
+
+    Args:
+      df: Original DataFrame.
+      fillna: Whether to fill the data with the mean values.
+
+    Returns:
+      Transformed DataFrame.
+
+    """
+    # Several features selected manually
+    feature_col = DATASET_SETTING[dataset]["feature_col"]
+    label_col = [DATASET_SETTING[dataset]["label_col"]]
+    temp_df = df.loc[:, feature_col + label_col]
+    if fillna:
+        temp_df = fill_test_na(temp_df)
+    temp_df = temp_df.swaplevel()
+    temp_df = temp_df.sort_index()
+    temp_df = temp_df.reset_index(level=0)
+    dates = pd.to_datetime(temp_df.index)
+    temp_df["date"] = dates
+    temp_df["day_of_week"] = dates.dayofweek
+    temp_df["month"] = dates.month
+    temp_df["year"] = dates.year
+    temp_df["const"] = 1.0
+    return temp_df
+
+
+def process_predicted(df, col_name):
+    """Transform the TFT predicted data into Qlib format.
+
+    Args:
+      df: Original DataFrame.
+      fillna: New column name.
+
+    Returns:
+      Transformed DataFrame.
+
+    """
+    df_res = df.copy()
+    df_res = df_res.rename(columns={"forecast_time": "datetime", "identifier": "instrument", "t+4": col_name})
+    df_res = df_res.set_index(["datetime", "instrument"]).sort_index()
+    df_res = df_res[[col_name]]
+    return df_res
+
+
+def format_score(forecast_df, col_name="pred", label_shift=5):
+    pred = process_predicted(forecast_df, col_name=col_name)
+    pred = get_shifted_label(pred, shifts=-label_shift, col_shift=col_name)
+    pred = pred.dropna()[col_name]
+    return pred
+
+
+def transform_df(df, col_name="LABEL0"):
+    df_res = df["feature"]
+    df_res[col_name] = df["label"]
+    return df_res
+
+
+class TFTModel(ModelFT):
+    """TFT Model"""
+
+    def __init__(self, **kwargs):
+        self.model = None
+        self.params = {"DATASET": "Alpha158", "label_shift": 5}
+        self.params.update(kwargs)
+
+    def _prepare_data(self, dataset: DatasetH):
+        df_train, df_valid = dataset.prepare(
+            ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
+        )
+        return transform_df(df_train), transform_df(df_valid)
+
+    def fit(self, dataset: DatasetH, MODEL_FOLDER="qlib_tft_model", USE_GPU_ID=0, **kwargs):
+        DATASET = self.params["DATASET"]
+        LABEL_SHIFT = self.params["label_shift"]
+        LABEL_COL = DATASET_SETTING[DATASET]["label_col"]
+
+        if DATASET not in ALLOW_DATASET:
+            raise AssertionError("The dataset is not supported, please make a new formatter to fit this dataset")
+
+        dtrain, dvalid = self._prepare_data(dataset)
+        dtrain.loc[:, LABEL_COL] = get_shifted_label(dtrain, shifts=LABEL_SHIFT, col_shift=LABEL_COL)
+        dvalid.loc[:, LABEL_COL] = get_shifted_label(dvalid, shifts=LABEL_SHIFT, col_shift=LABEL_COL)
+
+        train = process_qlib_data(dtrain, DATASET, fillna=True).dropna()
+        valid = process_qlib_data(dvalid, DATASET, fillna=True).dropna()
+
+        ExperimentConfig = expt_settings.configs.ExperimentConfig
+        config = ExperimentConfig(DATASET)
+        self.data_formatter = config.make_data_formatter()
+        self.model_folder = MODEL_FOLDER
+        self.gpu_id = USE_GPU_ID
+        self.label_shift = LABEL_SHIFT
+        self.expt_name = DATASET
+        self.label_col = LABEL_COL
+
+        use_gpu = (True, self.gpu_id)
+        # ===========================Training Process===========================
+        ModelClass = libs.tft_model.TemporalFusionTransformer
+        if not isinstance(self.data_formatter, data_formatters.base.GenericDataFormatter):
+            raise ValueError(
+                "Data formatters should inherit from"
+                + "AbstractDataFormatter! Type={}".format(type(self.data_formatter))
+            )
+
+        default_keras_session = tf.keras.backend.get_session()
+
+        if use_gpu[0]:
+            self.tf_config = utils.get_default_tensorflow_config(tf_device="gpu", gpu_id=use_gpu[1])
+        else:
+            self.tf_config = utils.get_default_tensorflow_config(tf_device="cpu")
+
+        self.data_formatter.set_scalers(train)
+
+        # Sets up default params
+        fixed_params = self.data_formatter.get_experiment_params()
+        params = self.data_formatter.get_default_model_params()
+
+        # Wendi: 合并调优的参数和非调优的参数
+        params = {**params, **fixed_params}
+
+        if not os.path.exists(self.model_folder):
+            os.makedirs(self.model_folder)
+        params["model_folder"] = self.model_folder
+
+        print("*** Begin training ***")
+        best_loss = np.Inf
+
+        tf.reset_default_graph()
+
+        self.tf_graph = tf.Graph()
+        with self.tf_graph.as_default():
+            self.sess = tf.Session(config=self.tf_config)
+            tf.keras.backend.set_session(self.sess)
+            self.model = ModelClass(params, use_cudnn=use_gpu[0])
+            self.sess.run(tf.global_variables_initializer())
+            self.model.fit(train_df=train, valid_df=valid)
+            print("*** Finished training ***")
+            saved_model_dir = self.model_folder + "/" + "saved_model"
+            if not os.path.exists(saved_model_dir):
+                os.makedirs(saved_model_dir)
+            self.model.save(saved_model_dir)
+
+            def extract_numerical_data(data):
+                """Strips out forecast time and identifier columns."""
+                return data[[col for col in data.columns if col not in {"forecast_time", "identifier"}]]
+
+            # p50_loss = utils.numpy_normalised_quantile_loss(
+            #    extract_numerical_data(targets), extract_numerical_data(p50_forecast),
+            #    0.5)
+            # p90_loss = utils.numpy_normalised_quantile_loss(
+            #    extract_numerical_data(targets), extract_numerical_data(p90_forecast),
+            #    0.9)
+            tf.keras.backend.set_session(default_keras_session)
+        print("Training completed.".format(dte.datetime.now()))
+        # ===========================Training Process===========================
+
+    def predict(self, dataset):
+        if self.model is None:
+            raise ValueError("model is not fitted yet!")
+        d_test = dataset.prepare("test", col_set=["feature", "label"])
+        d_test = transform_df(d_test)
+        d_test.loc[:, self.label_col] = get_shifted_label(d_test, shifts=self.label_shift, col_shift=self.label_col)
+        test = process_qlib_data(d_test, self.expt_name, fillna=True).dropna()
+
+        use_gpu = (True, self.gpu_id)
+        # ===========================Predicting Process===========================
+        default_keras_session = tf.keras.backend.get_session()
+
+        # Sets up default params
+        fixed_params = self.data_formatter.get_experiment_params()
+        params = self.data_formatter.get_default_model_params()
+        params = {**params, **fixed_params}
+
+        print("*** Begin predicting ***")
+        tf.reset_default_graph()
+
+        with self.tf_graph.as_default():
+            tf.keras.backend.set_session(self.sess)
+            output_map = self.model.predict(test, return_targets=True)
+            targets = self.data_formatter.format_predictions(output_map["targets"])
+            p50_forecast = self.data_formatter.format_predictions(output_map["p50"])
+            p90_forecast = self.data_formatter.format_predictions(output_map["p90"])
+            tf.keras.backend.set_session(default_keras_session)
+
+        predict50 = format_score(p50_forecast, "pred", 1)
+        predict90 = format_score(p90_forecast, "pred", 1)
+        predict = (predict50 + predict90) / 2  # self.label_shift
+        # ===========================Predicting Process===========================
+        return predict
+
+    def finetune(self, dataset: DatasetH):
+        """
+        finetune model
+        Parameters
+        ----------
+        dataset : DatasetH
+            dataset for finetuning
+        """
+        pass
--- a/examples/benchmarks/TFT/workflow_config_tft_Alpha158.yaml
+++ b/examples/benchmarks/TFT/workflow_config_tft_Alpha158.yaml
--- a/examples/benchmarks/TabNet/requirements.txt
+++ b/examples/benchmarks/TabNet/requirements.txt
@@ -0,0 +1,4 @@
+pandas==1.1.2
+numpy==1.17.4
+scikit_learn==0.23.2
+torch==1.7.0
--- a/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha158.yaml
+++ b/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha158.yaml
@@ -0,0 +1,75 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors:
+        - class: RobustZScoreNorm
+          kwargs:
+              fields_group: feature
+              clip_outlier: true
+        - class: Fillna
+          kwargs:
+              fields_group: feature
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"]
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: TabnetModel
+        module_path: qlib.contrib.model.pytorch_tabnet
+        kwargs:
+            d_feat: 158
+            pretrain: True
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha158
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                pretrain: [2008-01-01, 2014-12-31]
+                pretrain_validation: [2015-01-01, 2016-12-31]
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha360.yaml
+++ b/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha360.yaml
@@ -0,0 +1,75 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors:
+        - class: RobustZScoreNorm
+          kwargs:
+              fields_group: feature
+              clip_outlier: true
+        - class: Fillna
+          kwargs:
+              fields_group: feature
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"]
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: TabnetModel
+        module_path: qlib.contrib.model.pytorch_tabnet
+        kwargs:
+            d_feat: 360
+            pretrain: True
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha360
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                pretrain: [2008-01-01, 2014-12-31]
+                pretrain_validation: [2015-01-01, 2016-12-31]
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/XGBoost/workflow_config_xgboost_Alpha158.yaml
+++ b/examples/benchmarks/XGBoost/workflow_config_xgboost_Alpha158.yaml
--- a/examples/benchmarks/XGBoost/workflow_config_xgboost_Alpha360.yaml
+++ b/examples/benchmarks/XGBoost/workflow_config_xgboost_Alpha360.yaml
@@ -0,0 +1,71 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors: []
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"]
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: XGBModel
+        module_path: qlib.contrib.model.xgboost
+        kwargs:
+            eval_metric: rmse
+            colsample_bytree: 0.8879
+            eta: 0.0421
+            max_depth: 8
+            n_estimators: 647
+            subsample: 0.8789
+            nthread: 20
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha360
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/data/monitor.py
+++ b/examples/data/monitor.py
@@ -0,0 +1,208 @@
+"""
+This script is the demonstrating the implementation of Metric Extractor and Detector
+
+NOTE: A lot of details is not considered in this script
+- Corner case that will raise error( std == 0)
+
+
+
+The following functions are used to demonstrate the following examples
+
+
+· Metric Extractor:
+	case 1) Basic statistics on different slices of the DataFrame df:
+		1) The statistics include:
+			· STD, Mean, Skewnes, Kurtosis
+		2) The above statistics can be calculated on the following data slices:
+			· df.groupby(['datetime'])
+			· df.groupby(['datetime', 'industry' ])
+                3) The statistics could be calculated on the time dimension for each instruments and factor(the factor can be represented by experssion)
+			· <df implemented by expresion>.groupby(['instrument', 'factor'])
+	case 2) Advanced statistics on different slices of the DataFrame df:
+		1) Auto-correlation:
+			· Calculate corr(df.loc[t, :, :], df.loc[t-w, :, :]), w=1, 2, ….
+		2) Correlation between factors:
+			· For any pair of factors (i, j): calculate corr(df.loc[t, :, i], df.loc[t, :,  j]). The result is a correlation matrix with each element corresponds to a correlation value between a pair of factors.
+
+· Detector:  detect the abnormality of the extracted metric;
+	a) Algorithms:
+		§ Basic checks:  NaN.
+		§ Point anomaly detection.
+		§ Segment anomaly detection.
+	b) Scenarios:
+		§ Online anomaly detection: monitoring streaming data.
+The usage of the detectors are demonstrated in the `case_1_*`and `case_2_*`
+
+
+case 3): Examples to use MetricExt to monitor IC and rank IC
+        1) IC(Information Coefficient)  #case_3_1
+        2) RankIC   #case_3_2
+"""
+
+# AUTO download data
+from typing import List, Union
+from qlib.utils import exists_qlib_data
+from qlib.tests.data import GetData
+from qlib.config import REG_CN
+
+provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
+if not exists_qlib_data(provider_uri):
+    print(f"Qlib data is not found in {provider_uri}")
+    GetData().qlib_data(target_dir=provider_uri, region=REG_CN)
+
+import qlib
+import pandas as pd
+from qlib.contrib.data.handler import Alpha158
+from qlib.data.dataset.loader import QlibDataLoader
+from qlib.data.monitor.metric import format_conv
+from qlib.data.monitor.metric import MeanM, SkewM, KurtM, StdM, AutoCM, CorrM
+from qlib.data.monitor.detector import NDDetector, SWNDD, ThresholdD
+from qlib.data import D
+import fire
+
+UNIVERSE = "csi300"
+START_TIME = "20200101"
+
+# ------------------ a helper function to get data to demonstrate the functionality --------------------
+
+
+def get_data_df(col_idx: Union[int, List[int]] = 0, verbose: bool = True):
+    """
+    a helper function to get data to demonstrate the functionality.
+
+    Parameters
+    ----------
+    col_idx : Union[int, List[int]]
+        column index of the metrics
+    """
+    dh = Alpha158(instruments=UNIVERSE, infer_processors=[], learn_processors=[], start_time=START_TIME)
+    df = dh.fetch()
+
+    if verbose:
+        print(df.head())
+
+    # We don't have industries in dataframe, we generate the with fake data
+    industry = pd.Series(df.index.get_level_values("instrument").str.slice(stop=2).to_list(), index=df.index)
+
+    # select a factor
+    factor_df = format_conv(df.iloc[:, col_idx], industry=industry)
+    if verbose:
+        print(f"Selected metric: {df.columns[col_idx]}")
+        print(factor_df)
+    return factor_df
+
+
+def get_target(horizon=5):
+    target = f"Ref($close, -{horizon + 1})/Ref($close, -1) - 1"  # There are lots of targets: return is one of them
+    qdl = QlibDataLoader(config=([target], ["target"]))
+    df = qdl.load(instruments=UNIVERSE, start_time=START_TIME)  # Aligning with factor will improve performance
+    df = format_conv(df["target"])
+    return df
+
+
+# -----------------  Cases to demonstrate the usage of detector and examples ----------------------
+
+
+def case_1_1():
+    factor_df = get_data_df()
+    # 1) Extract metrics
+
+    # 1.1) df.groupby(["datetime"])
+    mtrc = MeanM()
+    m_mean = mtrc.extract(factor_df)
+    print(m_mean)
+
+    ndd = NDDetector()
+    ndd.fit(m_mean)  # use historical data to fit detector
+    check_res = ndd.check(m_mean)
+    print(check_res)  #  detecting on new data or historical data
+    print(check_res.value_counts())
+
+
+def case_1_2():
+    factor_df = get_data_df()
+    # 1.2) df.groupby("datetime", "industry")
+    mtrc = MeanM(group=["industry"])
+    m_multi = mtrc.extract(factor_df)
+    print(m_multi)
+
+    for col_name, s in m_multi.iteritems():
+        print(col_name)
+        ndd = NDDetector()
+        ndd.fit(s)  # use historical data to fit detector
+        check_res = ndd.check(s)
+        print(check_res)  #  detecting on new data or historical data
+        print(check_res.value_counts())
+
+
+def case_1_3():
+    # case 1.3
+    # factor_df = get_data_df()
+    qdl = QlibDataLoader(config=(["$close/Ref($close, 1) - 1"], ["return"]))
+    df = qdl.load(instruments=["SH600519"], start_time=START_TIME)
+    df = format_conv(df)
+    s = df.iloc[:, 0]
+    print(s)
+    dtc = SWNDD(window=20)
+    dtc.fit(s)  # fit use historical data (TODO: updating will be supported in the future)
+    check_res = dtc.check(s)  #
+    print(check_res)
+    print(check_res.value_counts())
+    print(check_res[check_res])
+
+
+def case_2_1():
+    # · Calculate corr(df.loc[t, :, :], df.loc[t-w, :, :]), w=1, 2, ….
+    factor_df = get_data_df()
+    acm = AutoCM()
+    mtrc = acm.extract(factor_df)
+    print(mtrc)
+
+    thd = ThresholdD(0.0, reverse=True)
+    check_res = thd.check(mtrc)
+
+    print(check_res)
+    print(check_res.value_counts())
+
+
+def case_2_2():
+    factor_df1, factor_df2 = get_data_df(0), get_data_df(1)
+
+    cm = CorrM()
+    mtrc = cm.extract(factor_df1, factor_df2)
+    print(mtrc)
+
+    thd = ThresholdD(0.0, reverse=True)
+    check_res = thd.check(mtrc)
+
+    print(check_res)
+    print(check_res.value_counts())
+
+
+def case_3_1_3_2():
+    target, factor = get_target(), get_data_df(0)
+    ic_m, rank_ic_m = CorrM(), CorrM(mode="spearman")
+    ic, rank_ic = ic_m.extract(factor, target), rank_ic_m.extract(factor, target)
+    print(pd.DataFrame({"ic": ic, "rank_ic": rank_ic}))
+
+
+def run(test_list=["case_1_1", "case_1_2", "case_1_3", "case_2_1", "case_2_2", "case_3_1_3_2"]):
+    """
+    run the specific tests
+
+    python monitor.py case_3_1_3_2
+
+    Parameters
+    ----------
+    test_list :  str[]
+        The tests to run
+    """
+    if isinstance(test_list, str):
+        test_list = [test_list]
+    for fn in test_list:
+        globals()[fn]()
+
+
+if __name__ == "__main__":
+    qlib.init()
+    fire.Fire(run)
--- a/examples/data/monitor_analyser_demo.ipynb
+++ b/examples/data/monitor_analyser_demo.ipynb
@@ -0,0 +1,130 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0e62a81e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "from tqdm.auto import tqdm\n",
+    "%matplotlib inline\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c503217b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from qlib.data.monitor.analyser import Analyser\n",
+    "import qlib\n",
+    "qlib.init()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9c276470",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class SimpleDFA(Analyser):\n",
+    "    \"\"\"Simple (D)ata(F)rame (A)nalyser\"\"\"\n",
+    "    def analyse(self, data: pd.DataFrame, *args, **kwargs):\n",
+    "        data.plot(*args, **kwargs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "110262e4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from monitor import get_data_df, AutoCM"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0ea38c62",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# get data\n",
+    "factor_df = get_data_df([1], verbose=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dbded6fe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# metric extractor\n",
+    "acm = AutoCM()\n",
+    "mtrc = acm.extract(factor_df)\n",
+    "print(mtrc)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "65517c81",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Analyser\n",
+    "sa = SimpleDFA()\n",
+    "sa.analyse(mtrc, title='Auto Correlation')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dab6fb2e",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/examples/highfreq/README.md
+++ b/examples/highfreq/README.md
@@ -0,0 +1,35 @@
+# High-Frequency Dataset
+
+This dataset is an example for RL high frequency trading.
+
+## Get High-Frequency Data
+
+Get high-frequency data by running the following command:
+```bash
+    python workflow.py get_data
+```
+
+## Dump & Reload & Reinitialize the Dataset
+
+
+The High-Frequency Dataset is implemented as `qlib.data.dataset.DatasetH` in the `workflow.py`. `DatatsetH` is the subclass of [`qlib.utils.serial.Serializable`](https://qlib.readthedocs.io/en/latest/advanced/serial.html), whose state can be dumped in or loaded from disk in `pickle` format.
+
+### About Reinitialization
+
+After reloading `Dataset` from disk, `Qlib` also support reinitializing the dataset. It means that users can reset some states of `Dataset` or `DataHandler` such as `instruments`, `start_time`, `end_time` and `segments`, etc.,  and generate new data according to the states.
+
+The example is given in `workflow.py`, users can run the code as follows.
+
+### Run the Code
+
+Run the example by running the following command:
+```bash
+    python workflow.py dump_and_load_dataset
+```
+
+## Benchmarks Performance
+### Signal Test
+Here are the results of signal test for benchmark models. We will keep updating benchmark models in future.
+| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Long precision| Short Precision | Long-Short Average Return | Long-Short Average Sharpe |
+|---|---|---|---|---|---|---|---|---|---|
+| LightGBM | Alpha158 | 0.3042±0.00 | 1.5372±0.00| 0.3117±0.00 | 1.6258±0.00 | 0.6720±0.00 | 0.6870±0.00 | 0.000769±0.00 | 1.0190±0.00 |
--- a/examples/highfreq/highfreq_handler.py
+++ b/examples/highfreq/highfreq_handler.py
@@ -0,0 +1,174 @@
+from qlib.data.dataset.handler import DataHandler, DataHandlerLP
+from qlib.data.dataset.processor import Processor
+from qlib.utils import get_cls_kwargs
+from qlib.log import TimeInspector
+
+
+class HighFreqHandler(DataHandlerLP):
+    def __init__(
+        self,
+        instruments="csi300",
+        start_time=None,
+        end_time=None,
+        infer_processors=[],
+        learn_processors=[],
+        fit_start_time=None,
+        fit_end_time=None,
+        drop_raw=True,
+    ):
+        def check_transform_proc(proc_l):
+            new_l = []
+            for p in proc_l:
+                p["kwargs"].update(
+                    {
+                        "fit_start_time": fit_start_time,
+                        "fit_end_time": fit_end_time,
+                    }
+                )
+                new_l.append(p)
+            return new_l
+
+        infer_processors = check_transform_proc(infer_processors)
+        learn_processors = check_transform_proc(learn_processors)
+
+        data_loader = {
+            "class": "QlibDataLoader",
+            "kwargs": {
+                "config": self.get_feature_config(),
+                "swap_level": False,
+                "freq": "1min",
+            },
+        }
+        super().__init__(
+            instruments=instruments,
+            start_time=start_time,
+            end_time=end_time,
+            data_loader=data_loader,
+            infer_processors=infer_processors,
+            learn_processors=learn_processors,
+            drop_raw=drop_raw,
+        )
+
+    def get_feature_config(self):
+        fields = []
+        names = []
+
+        template_if = "If(IsNull({1}), {0}, {1})"
+        template_paused = "Select(Or(IsNull($paused), Eq($paused, 0.0)), {0})"
+        template_fillnan = "BFillNan(FFillNan({0}))"
+        # Because there is no vwap field in the yahoo data, a method similar to Simpson integration is used to approximate vwap
+        simpson_vwap = "($open + 2*$high + 2*$low + $close)/6"
+
+        def get_normalized_price_feature(price_field, shift=0):
+            """Get normalized price feature ops"""
+            if shift == 0:
+                template_norm = "Cut({0}/Ref(DayLast({1}), 240), 240, None)"
+            else:
+                template_norm = "Cut(Ref({0}, " + str(shift) + ")/Ref(DayLast({1}), 240), 240, None)"
+
+            feature_ops = template_norm.format(
+                template_if.format(
+                    template_fillnan.format(template_paused.format("$close")),
+                    template_paused.format(price_field),
+                ),
+                template_fillnan.format(template_paused.format("$close")),
+            )
+            return feature_ops
+
+        fields += [get_normalized_price_feature("$open", 0)]
+        fields += [get_normalized_price_feature("$high", 0)]
+        fields += [get_normalized_price_feature("$low", 0)]
+        fields += [get_normalized_price_feature("$close", 0)]
+        fields += [get_normalized_price_feature(simpson_vwap, 0)]
+        names += ["$open", "$high", "$low", "$close", "$vwap"]
+
+        fields += [get_normalized_price_feature("$open", 240)]
+        fields += [get_normalized_price_feature("$high", 240)]
+        fields += [get_normalized_price_feature("$low", 240)]
+        fields += [get_normalized_price_feature("$close", 240)]
+        fields += [get_normalized_price_feature(simpson_vwap, 240)]
+        names += ["$open_1", "$high_1", "$low_1", "$close_1", "$vwap_1"]
+
+        fields += [
+            "Cut({0}/Ref(DayLast(Mean({0}, 7200)), 240), 240, None)".format(
+                "If(IsNull({0}), 0, If(Or(Gt({1}, Mul(1.001, {3})), Lt({1}, Mul(0.999, {2}))), 0, {0}))".format(
+                    template_paused.format("$volume"),
+                    template_paused.format(simpson_vwap),
+                    template_paused.format("$low"),
+                    template_paused.format("$high"),
+                )
+            )
+        ]
+        names += ["$volume"]
+        fields += [
+            "Cut(Ref({0}, 240)/Ref(DayLast(Mean({0}, 7200)), 240), 240, None)".format(
+                "If(IsNull({0}), 0, If(Or(Gt({1}, Mul(1.001, {3})), Lt({1}, Mul(0.999, {2}))), 0, {0}))".format(
+                    template_paused.format("$volume"),
+                    template_paused.format(simpson_vwap),
+                    template_paused.format("$low"),
+                    template_paused.format("$high"),
+                )
+            )
+        ]
+        names += ["$volume_1"]
+
+        fields += ["Cut({0}, 240, None)".format(template_paused.format("Date($close)"))]
+        names += ["date"]
+        return fields, names
+
+
+class HighFreqBacktestHandler(DataHandler):
+    def __init__(
+        self,
+        instruments="csi300",
+        start_time=None,
+        end_time=None,
+    ):
+        data_loader = {
+            "class": "QlibDataLoader",
+            "kwargs": {
+                "config": self.get_feature_config(),
+                "swap_level": False,
+                "freq": "1min",
+            },
+        }
+        super().__init__(
+            instruments=instruments,
+            start_time=start_time,
+            end_time=end_time,
+            data_loader=data_loader,
+        )
+
+    def get_feature_config(self):
+        fields = []
+        names = []
+
+        template_if = "If(IsNull({1}), {0}, {1})"
+        template_paused = "Select(Or(IsNull($paused), Eq($paused, 0.0)), {0})"
+        template_fillnan = "BFillNan(FFillNan({0}))"
+        # Because there is no vwap field in the yahoo data, a method similar to Simpson integration is used to approximate vwap
+        simpson_vwap = "($open + 2*$high + 2*$low + $close)/6"
+        fields += [
+            "Cut({0}, 240, None)".format(template_fillnan.format(template_paused.format("$close"))),
+        ]
+        names += ["$close0"]
+        fields += [
+            "Cut({0}, 240, None)".format(
+                template_if.format(
+                    template_fillnan.format(template_paused.format("$close")),
+                    template_paused.format(simpson_vwap),
+                )
+            )
+        ]
+        names += ["$vwap0"]
+        fields += [
+            "Cut(If(IsNull({0}), 0, If(Or(Gt({1}, Mul(1.001, {3})), Lt({1}, Mul(0.999, {2}))), 0, {0})), 240, None)".format(
+                template_paused.format("$volume"),
+                template_paused.format(simpson_vwap),
+                template_paused.format("$low"),
+                template_paused.format("$high"),
+            )
+        ]
+        names += ["$volume0"]
+
+        return fields, names
--- a/examples/highfreq/highfreq_ops.py
+++ b/examples/highfreq/highfreq_ops.py
@@ -0,0 +1,190 @@
+import numpy as np
+import pandas as pd
+import importlib
+from qlib.data.ops import ElemOperator, PairOperator
+from qlib.config import C
+from qlib.data.cache import H
+from qlib.data.data import Cal
+
+
+def get_calendar_day(freq="day", future=False):
+    """Load High-Freq Calendar Date Using Memcache.
+
+    Parameters
+    ----------
+    freq : str
+        frequency of read calendar file.
+    future : bool
+        whether including future trading day.
+
+    Returns
+    -------
+    _calendar:
+        array of date.
+    """
+    flag = f"{freq}_future_{future}_day"
+    if flag in H["c"]:
+        _calendar = H["c"][flag]
+    else:
+        _calendar = np.array(list(map(lambda x: x.date(), Cal.load_calendar(freq, future))))
+        H["c"][flag] = _calendar
+    return _calendar
+
+
+class DayLast(ElemOperator):
+    """DayLast Operator
+
+    Parameters
+    ----------
+    feature : Expression
+        feature instance
+
+    Returns
+    ----------
+    feature:
+        a series of that each value equals the last value of its day
+    """
+
+    def _load_internal(self, instrument, start_index, end_index, freq):
+        _calendar = get_calendar_day(freq=freq)
+        series = self.feature.load(instrument, start_index, end_index, freq)
+        return series.groupby(_calendar[series.index]).transform("last")
+
+
+class FFillNan(ElemOperator):
+    """FFillNan Operator
+
+    Parameters
+    ----------
+    feature : Expression
+        feature instance
+
+    Returns
+    ----------
+    feature:
+        a forward fill nan feature
+    """
+
+    def _load_internal(self, instrument, start_index, end_index, freq):
+        series = self.feature.load(instrument, start_index, end_index, freq)
+        return series.fillna(method="ffill")
+
+
+class BFillNan(ElemOperator):
+    """BFillNan Operator
+
+    Parameters
+    ----------
+    feature : Expression
+        feature instance
+
+    Returns
+    ----------
+    feature:
+        a backfoward fill nan feature
+    """
+
+    def _load_internal(self, instrument, start_index, end_index, freq):
+        series = self.feature.load(instrument, start_index, end_index, freq)
+        return series.fillna(method="bfill")
+
+
+class Date(ElemOperator):
+    """Date Operator
+
+    Parameters
+    ----------
+    feature : Expression
+        feature instance
+
+    Returns
+    ----------
+    feature:
+        a series of that each value is the date corresponding to feature.index
+    """
+
+    def _load_internal(self, instrument, start_index, end_index, freq):
+        _calendar = get_calendar_day(freq=freq)
+        series = self.feature.load(instrument, start_index, end_index, freq)
+        return pd.Series(_calendar[series.index], index=series.index)
+
+
+class Select(PairOperator):
+    """Select Operator
+
+    Parameters
+    ----------
+    feature_left : Expression
+        feature instance, select condition
+    feature_right : Expression
+        feature instance, select value
+
+    Returns
+    ----------
+    feature:
+        value(feature_right) that meets the condition(feature_left)
+
+    """
+
+    def _load_internal(self, instrument, start_index, end_index, freq):
+        series_condition = self.feature_left.load(instrument, start_index, end_index, freq)
+        series_feature = self.feature_right.load(instrument, start_index, end_index, freq)
+        return series_feature.loc[series_condition]
+
+
+class IsNull(ElemOperator):
+    """IsNull Operator
+
+    Parameters
+    ----------
+    feature : Expression
+        feature instance
+
+    Returns
+    ----------
+    feature:
+        A series indicating whether the feature is nan
+    """
+
+    def _load_internal(self, instrument, start_index, end_index, freq):
+        series = self.feature.load(instrument, start_index, end_index, freq)
+        return series.isnull()
+
+
+class Cut(ElemOperator):
+    """Cut Operator
+
+    Parameters
+    ----------
+    feature : Expression
+        feature instance
+    l : int
+        l > 0, delete the first l elements of feature (default is None, which means 0)
+    r : int
+        r < 0, delete the last -r elements of feature (default is None, which means 0)
+    Returns
+    ----------
+    feature:
+        A series with the first l and last -r elements deleted from the feature.
+        Note: It is deleted from the raw data, not the sliced data
+    """
+
+    def __init__(self, feature, l=None, r=None):
+        self.l = l
+        self.r = r
+        if (self.l is not None and self.l <= 0) or (self.r is not None and self.r >= 0):
+            raise ValueError("Cut operator l shoud > 0 and r should < 0")
+
+        super(Cut, self).__init__(feature)
+
+    def _load_internal(self, instrument, start_index, end_index, freq):
+        series = self.feature.load(instrument, start_index, end_index, freq)
+        return series.iloc[self.l : self.r]
+
+    def get_extended_window_size(self):
+        ll = 0 if self.l is None else self.l
+        rr = 0 if self.r is None else abs(self.r)
+        lft_etd, rght_etd = self.feature.get_extended_window_size()
+        lft_etd = lft_etd + ll
+        rght_etd = rght_etd + rr
+        return lft_etd, rght_etd
--- a/examples/highfreq/highfreq_processor.py
+++ b/examples/highfreq/highfreq_processor.py
@@ -0,0 +1,72 @@
+import numpy as np
+import pandas as pd
+from qlib.data.dataset.processor import Processor
+from qlib.data.dataset.utils import fetch_df_by_index
+
+
+class HighFreqNorm(Processor):
+    def __init__(self, fit_start_time, fit_end_time):
+        self.fit_start_time = fit_start_time
+        self.fit_end_time = fit_end_time
+
+    def fit(self, df_features):
+        fetch_df = fetch_df_by_index(df_features, slice(self.fit_start_time, self.fit_end_time), level="datetime")
+        del df_features
+        df_values = fetch_df.values
+        names = {
+            "price": slice(0, 10),
+            "volume": slice(10, 12),
+        }
+        self.feature_med = {}
+        self.feature_std = {}
+        self.feature_vmax = {}
+        self.feature_vmin = {}
+        for name, name_val in names.items():
+            part_values = df_values[:, name_val].astype(np.float32)
+            if name == "volume":
+                part_values = np.log1p(part_values)
+            self.feature_med[name] = np.nanmedian(part_values)
+            part_values = part_values - self.feature_med[name]
+            self.feature_std[name] = np.nanmedian(np.absolute(part_values)) * 1.4826 + 1e-12
+            part_values = part_values / self.feature_std[name]
+            self.feature_vmax[name] = np.nanmax(part_values)
+            self.feature_vmin[name] = np.nanmin(part_values)
+
+    def __call__(self, df_features):
+        df_features.set_index("date", append=True, drop=True, inplace=True)
+        df_values = df_features.values
+        names = {
+            "price": slice(0, 10),
+            "volume": slice(10, 12),
+        }
+
+        for name, name_val in names.items():
+            if name == "volume":
+                df_values[:, name_val] = np.log1p(df_values[:, name_val])
+            df_values[:, name_val] -= self.feature_med[name]
+            df_values[:, name_val] /= self.feature_std[name]
+            slice0 = df_values[:, name_val] > 3.0
+            slice1 = df_values[:, name_val] > 3.5
+            slice2 = df_values[:, name_val] < -3.0
+            slice3 = df_values[:, name_val] < -3.5
+
+            df_values[:, name_val][slice0] = (
+                3.0 + (df_values[:, name_val][slice0] - 3.0) / (self.feature_vmax[name] - 3) * 0.5
+            )
+            df_values[:, name_val][slice1] = 3.5
+            df_values[:, name_val][slice2] = (
+                -3.0 - (df_values[:, name_val][slice2] + 3.0) / (self.feature_vmin[name] + 3) * 0.5
+            )
+            df_values[:, name_val][slice3] = -3.5
+        idx = df_features.index.droplevel("datetime").drop_duplicates()
+        idx.set_names(["instrument", "datetime"], inplace=True)
+
+        # Reshape is specifically for adapting to RL high-freq executor
+        feat = df_values[:, [0, 1, 2, 3, 4, 10]].reshape(-1, 6 * 240)
+        feat_1 = df_values[:, [5, 6, 7, 8, 9, 11]].reshape(-1, 6 * 240)
+        df_new_features = pd.DataFrame(
+            data=np.concatenate((feat, feat_1), axis=1),
+            index=idx,
+            columns=["FEATURE_%d" % i for i in range(12 * 240)],
+        ).sort_index()
+        return df_new_features
--- a/examples/highfreq/workflow.py
+++ b/examples/highfreq/workflow.py
@@ -0,0 +1,175 @@
+#  Copyright (c) Microsoft Corporation.
+#  Licensed under the MIT License.
+
+import fire
+
+import qlib
+import pickle
+from qlib.config import REG_CN, HIGH_FREQ_CONFIG
+
+from qlib.utils import init_instance_by_config
+from qlib.data.dataset.handler import DataHandlerLP
+from qlib.data.ops import Operators
+from qlib.data.data import Cal
+from qlib.tests.data import GetData
+
+from highfreq_ops import get_calendar_day, DayLast, FFillNan, BFillNan, Date, Select, IsNull, Cut
+
+
+class HighfreqWorkflow:
+
+    SPEC_CONF = {"custom_ops": [DayLast, FFillNan, BFillNan, Date, Select, IsNull, Cut], "expression_cache": None}
+
+    MARKET = "all"
+
+    start_time = "2020-09-15 00:00:00"
+    end_time = "2021-01-18 16:00:00"
+    train_end_time = "2020-11-30 16:00:00"
+    test_start_time = "2020-12-01 00:00:00"
+
+    DATA_HANDLER_CONFIG0 = {
+        "start_time": start_time,
+        "end_time": end_time,
+        "fit_start_time": start_time,
+        "fit_end_time": train_end_time,
+        "instruments": MARKET,
+        "infer_processors": [{"class": "HighFreqNorm", "module_path": "highfreq_processor", "kwargs": {}}],
+    }
+    DATA_HANDLER_CONFIG1 = {
+        "start_time": start_time,
+        "end_time": end_time,
+        "instruments": MARKET,
+    }
+
+    task = {
+        "dataset": {
+            "class": "DatasetH",
+            "module_path": "qlib.data.dataset",
+            "kwargs": {
+                "handler": {
+                    "class": "HighFreqHandler",
+                    "module_path": "highfreq_handler",
+                    "kwargs": DATA_HANDLER_CONFIG0,
+                },
+                "segments": {
+                    "train": (start_time, train_end_time),
+                    "test": (
+                        test_start_time,
+                        end_time,
+                    ),
+                },
+            },
+        },
+        "dataset_backtest": {
+            "class": "DatasetH",
+            "module_path": "qlib.data.dataset",
+            "kwargs": {
+                "handler": {
+                    "class": "HighFreqBacktestHandler",
+                    "module_path": "highfreq_handler",
+                    "kwargs": DATA_HANDLER_CONFIG1,
+                },
+                "segments": {
+                    "train": (start_time, train_end_time),
+                    "test": (
+                        test_start_time,
+                        end_time,
+                    ),
+                },
+            },
+        },
+    }
+
+    def _init_qlib(self):
+        """initialize qlib"""
+        # use yahoo_cn_1min data
+        QLIB_INIT_CONFIG = {**HIGH_FREQ_CONFIG, **self.SPEC_CONF}
+        provider_uri = QLIB_INIT_CONFIG.get("provider_uri")
+        GetData().qlib_data(target_dir=provider_uri, interval="1min", region=REG_CN, exists_skip=True)
+        qlib.init(**QLIB_INIT_CONFIG)
+
+    def _prepare_calender_cache(self):
+        """preload the calendar for cache"""
+
+        # This code used the copy-on-write feature of Linux to avoid calculating the calendar multiple times in the subprocess
+        # This code may accelerate, but may be not useful on Windows and Mac Os
+        Cal.calendar(freq="1min")
+        get_calendar_day(freq="1min")
+
+    def get_data(self):
+        """use dataset to get highreq data"""
+        self._init_qlib()
+        self._prepare_calender_cache()
+
+        dataset = init_instance_by_config(self.task["dataset"])
+        xtrain, xtest = dataset.prepare(["train", "test"])
+        print(xtrain, xtest)
+
+        dataset_backtest = init_instance_by_config(self.task["dataset_backtest"])
+        backtest_train, backtest_test = dataset_backtest.prepare(["train", "test"])
+        print(backtest_train, backtest_test)
+
+        return
+
+    def dump_and_load_dataset(self):
+        """dump and load dataset state on disk"""
+        self._init_qlib()
+        self._prepare_calender_cache()
+        dataset = init_instance_by_config(self.task["dataset"])
+        dataset_backtest = init_instance_by_config(self.task["dataset_backtest"])
+
+        ##=============dump dataset=============
+        dataset.to_pickle(path="dataset.pkl")
+        dataset_backtest.to_pickle(path="dataset_backtest.pkl")
+
+        del dataset, dataset_backtest
+        ##=============reload dataset=============
+        with open("dataset.pkl", "rb") as file_dataset:
+            dataset = pickle.load(file_dataset)
+
+        with open("dataset_backtest.pkl", "rb") as file_dataset_backtest:
+            dataset_backtest = pickle.load(file_dataset_backtest)
+
+        self._prepare_calender_cache()
+        ##=============reinit dataset=============
+        dataset.config(
+            handler_kwargs={
+                "start_time": "2021-01-19 00:00:00",
+                "end_time": "2021-01-25 16:00:00",
+            },
+            segments={
+                "test": (
+                    "2021-01-19 00:00:00",
+                    "2021-01-25 16:00:00",
+                ),
+            },
+        )
+        dataset.setup_data(
+            handler_kwargs={
+                "init_type": DataHandlerLP.IT_LS,
+            },
+        )
+        dataset_backtest.config(
+            handler_kwargs={
+                "start_time": "2021-01-19 00:00:00",
+                "end_time": "2021-01-25 16:00:00",
+            },
+            segments={
+                "test": (
+                    "2021-01-19 00:00:00",
+                    "2021-01-25 16:00:00",
+                ),
+            },
+        )
+        dataset_backtest.setup_data(handler_kwargs={})
+
+        ##=============get data=============
+        xtest = dataset.prepare("test")
+        backtest_test = dataset_backtest.prepare("test")
+
+        print(xtest, backtest_test)
+        return
+
+
+if __name__ == "__main__":
+    fire.Fire(HighfreqWorkflow)
--- a/examples/highfreq/workflow_config_High_Freq_Tree_Alpha158.yaml
+++ b/examples/highfreq/workflow_config_High_Freq_Tree_Alpha158.yaml
@@ -0,0 +1,65 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data_1min"
+    region: cn
+market: &market 'csi300'
+start_time: &start_time "2020-09-15 00:00:00"
+end_time: &end_time "2021-01-18 16:00:00"
+train_end_time: &train_end_time "2020-11-15 16:00:00"
+valid_start_time: &valid_start_time "2020-11-16 00:00:00"
+valid_end_time: &valid_end_time "2020-11-30 16:00:00"
+test_start_time: &test_start_time "2020-12-01 00:00:00"
+data_handler_config: &data_handler_config
+    start_time: *start_time
+    end_time: *end_time
+    fit_start_time: *start_time
+    fit_end_time: *train_end_time
+    instruments: *market
+    freq: '1min'
+    infer_processors:
+        - class: 'RobustZScoreNorm'
+          kwargs:
+              fields_group: 'feature'
+              clip_outlier: false
+        - class: "Fillna"
+          kwargs:
+              fields_group: 'feature'
+    learn_processors:
+        - class: 'DropnaLabel'
+        - class: 'CSRankNorm'
+          kwargs:
+              fields_group: 'label'
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"]
+    
+task:
+    model:
+        class: "HFLGBModel"
+        module_path: "qlib.contrib.model.highfreq_gdbt_model"
+        kwargs:
+            objective: 'binary'
+            metric: ['binary_logloss','auc']
+            verbosity: -1
+            learning_rate: 0.01
+            max_depth: 8
+            num_leaves: 150
+            lambda_l1: 1.5
+            lambda_l2: 1
+            num_threads: 20
+    dataset:
+        class: "DatasetH"
+        module_path: "qlib.data.dataset"
+        kwargs:
+            handler:
+                class: "Alpha158"
+                module_path: "qlib.contrib.data.handler"
+                kwargs: *data_handler_config
+            segments:
+                train: [*start_time, *train_end_time]
+                valid: [*train_end_time, *valid_end_time]
+                test: [*test_start_time, *end_time]
+    record: 
+        - class: "SignalRecord"
+          module_path: "qlib.workflow.record_temp"
+          kwargs: {}
+        - class: "HFSignalRecord"
+          module_path: "qlib.workflow.record_temp"
+          kwargs: {}
--- a/examples/hyperparameter/LightGBM/Readme.md
+++ b/examples/hyperparameter/LightGBM/Readme.md
@@ -0,0 +1,23 @@
+# LightGBM hyperparameter
+
+## Alpha158
+First terminal
+```
+optuna create-study --study LGBM_158 --storage sqlite:///db.sqlite3
+optuna-dashboard --port 5000 --host 0.0.0.0 sqlite:///db.sqlite3
+```
+Second terminal
+```
+python hyperparameter_158.py
+```
+
+## Alpha360
+First terminal
+```
+optuna create-study --study LGBM_360 --storage sqlite:///db.sqlite3
+optuna-dashboard --port 5000 --host 0.0.0.0 sqlite:///db.sqlite3
+```
+Second terminal
+```
+python hyperparameter_360.py
+```
--- a/examples/hyperparameter/LightGBM/hyperparameter_158.py
+++ b/examples/hyperparameter/LightGBM/hyperparameter_158.py
@@ -0,0 +1,46 @@
+import qlib
+import optuna
+from qlib.config import REG_CN
+from qlib.utils import init_instance_by_config
+from qlib.tests.config import CSI300_DATASET_CONFIG
+from qlib.tests.data import GetData
+
+
+def objective(trial):
+    task = {
+        "model": {
+            "class": "LGBModel",
+            "module_path": "qlib.contrib.model.gbdt",
+            "kwargs": {
+                "loss": "mse",
+                "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1),
+                "learning_rate": trial.suggest_uniform("learning_rate", 0, 1),
+                "subsample": trial.suggest_uniform("subsample", 0, 1),
+                "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 1e4),
+                "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 1e4),
+                "max_depth": 10,
+                "num_leaves": trial.suggest_int("num_leaves", 1, 1024),
+                "feature_fraction": trial.suggest_uniform("feature_fraction", 0.4, 1.0),
+                "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.4, 1.0),
+                "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
+                "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 50),
+                "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
+            },
+        },
+    }
+    evals_result = dict()
+    model = init_instance_by_config(task["model"])
+    model.fit(dataset, evals_result=evals_result)
+    return min(evals_result["valid"])
+
+
+if __name__ == "__main__":
+
+    provider_uri = "~/.qlib/qlib_data/cn_data"
+    GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
+    qlib.init(provider_uri=provider_uri, region="cn")
+
+    dataset = init_instance_by_config(CSI300_DATASET_CONFIG)
+
+    study = optuna.Study(study_name="LGBM_158", storage="sqlite:///db.sqlite3")
+    study.optimize(objective, n_jobs=6)
--- a/examples/hyperparameter/LightGBM/hyperparameter_360.py
+++ b/examples/hyperparameter/LightGBM/hyperparameter_360.py
@@ -0,0 +1,49 @@
+import qlib
+import optuna
+from qlib.config import REG_CN
+from qlib.utils import init_instance_by_config
+from qlib.tests.data import GetData
+from qlib.tests.config import get_dataset_config, CSI300_MARKET, DATASET_ALPHA360_CLASS
+
+DATASET_CONFIG = get_dataset_config(market=CSI300_MARKET, dataset_class=DATASET_ALPHA360_CLASS)
+
+
+def objective(trial):
+    task = {
+        "model": {
+            "class": "LGBModel",
+            "module_path": "qlib.contrib.model.gbdt",
+            "kwargs": {
+                "loss": "mse",
+                "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1),
+                "learning_rate": trial.suggest_uniform("learning_rate", 0, 1),
+                "subsample": trial.suggest_uniform("subsample", 0, 1),
+                "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 1e4),
+                "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 1e4),
+                "max_depth": 10,
+                "num_leaves": trial.suggest_int("num_leaves", 1, 1024),
+                "feature_fraction": trial.suggest_uniform("feature_fraction", 0.4, 1.0),
+                "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.4, 1.0),
+                "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
+                "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 50),
+                "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
+            },
+        },
+    }
+
+    evals_result = dict()
+    model = init_instance_by_config(task["model"])
+    model.fit(dataset, evals_result=evals_result)
+    return min(evals_result["valid"])
+
+
+if __name__ == "__main__":
+
+    provider_uri = "~/.qlib/qlib_data/cn_data"
+    GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
+    qlib.init(provider_uri=provider_uri, region=REG_CN)
+
+    dataset = init_instance_by_config(DATASET_CONFIG)
+
+    study = optuna.Study(study_name="LGBM_360", storage="sqlite:///db.sqlite3")
+    study.optimize(objective, n_jobs=6)
--- a/examples/hyperparameter/LightGBM/requirements.txt
+++ b/examples/hyperparameter/LightGBM/requirements.txt
@@ -0,0 +1,5 @@
+pandas==1.1.2
+numpy==1.17.4
+lightgbm==3.1.0
+optuna==2.7.0
+optuna-dashboard==0.4.1
--- a/examples/model_interpreter/feature.py
+++ b/examples/model_interpreter/feature.py
@@ -0,0 +1,32 @@
+#  Copyright (c) Microsoft Corporation.
+#  Licensed under the MIT License.
+
+
+import qlib
+from qlib.config import REG_CN
+
+from qlib.utils import init_instance_by_config
+from qlib.tests.data import GetData
+from qlib.tests.config import CSI300_GBDT_TASK
+
+
+if __name__ == "__main__":
+
+    # use default data
+    provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
+    GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
+
+    qlib.init(provider_uri=provider_uri, region=REG_CN)
+
+    ###################################
+    # train model
+    ###################################
+    # model initialization
+    model = init_instance_by_config(CSI300_GBDT_TASK["model"])
+    dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"])
+    model.fit(dataset)
+
+    # get model feature importance
+    feature_importance = model.get_feature_importance()
+    print("feature importance:")
+    print(feature_importance)
--- a/examples/model_rolling/task_manager_rolling.py
+++ b/examples/model_rolling/task_manager_rolling.py
@@ -0,0 +1,105 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""
+This example shows how a TrainerRM works based on TaskManager with rolling tasks.
+After training, how to collect the rolling results will be shown in task_collecting.
+"""
+
+from pprint import pprint
+
+import fire
+import qlib
+from qlib.config import REG_CN
+from qlib.workflow import R
+from qlib.workflow.task.gen import RollingGen, task_generator
+from qlib.workflow.task.manage import TaskManager
+from qlib.workflow.task.collect import RecorderCollector
+from qlib.model.ens.group import RollingGroup
+from qlib.model.trainer import TrainerRM
+from qlib.tests.config import CSI100_RECORD_LGB_TASK_CONFIG, CSI100_RECORD_XGBOOST_TASK_CONFIG
+
+
+class RollingTaskExample:
+    def __init__(
+        self,
+        provider_uri="~/.qlib/qlib_data/cn_data",
+        region=REG_CN,
+        task_url="mongodb://10.0.0.4:27017/",
+        task_db_name="rolling_db",
+        experiment_name="rolling_exp",
+        task_pool="rolling_task",
+        task_config=None,
+        rolling_step=550,
+        rolling_type=RollingGen.ROLL_SD,
+    ):
+        # TaskManager config
+        if task_config is None:
+            task_config = [CSI100_RECORD_XGBOOST_TASK_CONFIG, CSI100_RECORD_LGB_TASK_CONFIG]
+        mongo_conf = {
+            "task_url": task_url,
+            "task_db_name": task_db_name,
+        }
+        qlib.init(provider_uri=provider_uri, region=region, mongo=mongo_conf)
+        self.experiment_name = experiment_name
+        self.task_pool = task_pool
+        self.task_config = task_config
+        self.rolling_gen = RollingGen(step=rolling_step, rtype=rolling_type)
+
+    # Reset all things to the first status, be careful to save important data
+    def reset(self):
+        print("========== reset ==========")
+        TaskManager(task_pool=self.task_pool).remove()
+        exp = R.get_exp(experiment_name=self.experiment_name)
+        for rid in exp.list_recorders():
+            exp.delete_recorder(rid)
+
+    def task_generating(self):
+        print("========== task_generating ==========")
+        tasks = task_generator(
+            tasks=self.task_config,
+            generators=self.rolling_gen,  # generate different date segments
+        )
+        pprint(tasks)
+        return tasks
+
+    def task_training(self, tasks):
+        print("========== task_training ==========")
+        trainer = TrainerRM(self.experiment_name, self.task_pool)
+        trainer.train(tasks)
+
+    def task_collecting(self):
+        print("========== task_collecting ==========")
+
+        def rec_key(recorder):
+            task_config = recorder.load_object("task")
+            model_key = task_config["model"]["class"]
+            rolling_key = task_config["dataset"]["kwargs"]["segments"]["test"]
+            return model_key, rolling_key
+
+        def my_filter(recorder):
+            # only choose the results of "LGBModel"
+            model_key, rolling_key = rec_key(recorder)
+            if model_key == "LGBModel":
+                return True
+            return False
+
+        collector = RecorderCollector(
+            experiment=self.experiment_name,
+            process_list=RollingGroup(),
+            rec_key_func=rec_key,
+            rec_filter_func=my_filter,
+        )
+        print(collector())
+
+    def main(self):
+        self.reset()
+        tasks = self.task_generating()
+        self.task_training(tasks)
+        self.task_collecting()
+
+
+if __name__ == "__main__":
+    ## to see the whole process with your own parameters, use the command below
+    # python task_manager_rolling.py main --experiment_name="your_exp_name"
+    fire.Fire(RollingTaskExample)
--- a/examples/online_srv/online_management_simulate.py
+++ b/examples/online_srv/online_management_simulate.py
@@ -0,0 +1,92 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""
+This example is about how can simulate the OnlineManager based on rolling tasks. 
+"""
+
+import fire
+import qlib
+from qlib.model.trainer import DelayTrainerR, DelayTrainerRM, TrainerR, TrainerRM
+from qlib.workflow import R
+from qlib.workflow.online.manager import OnlineManager
+from qlib.workflow.online.strategy import RollingStrategy
+from qlib.workflow.task.gen import RollingGen
+from qlib.workflow.task.manage import TaskManager
+from qlib.tests.config import CSI100_RECORD_LGB_TASK_CONFIG, CSI100_RECORD_XGBOOST_TASK_CONFIG
+
+
+class OnlineSimulationExample:
+    def __init__(
+        self,
+        provider_uri="~/.qlib/qlib_data/cn_data",
+        region="cn",
+        exp_name="rolling_exp",
+        task_url="mongodb://10.0.0.4:27017/",
+        task_db_name="rolling_db",
+        task_pool="rolling_task",
+        rolling_step=80,
+        start_time="2018-09-10",
+        end_time="2018-10-31",
+        tasks=None,
+    ):
+        """
+        Init OnlineManagerExample.
+
+        Args:
+            provider_uri (str, optional): the provider uri. Defaults to "~/.qlib/qlib_data/cn_data".
+            region (str, optional): the stock region. Defaults to "cn".
+            exp_name (str, optional): the experiment name. Defaults to "rolling_exp".
+            task_url (str, optional): your MongoDB url. Defaults to "mongodb://10.0.0.4:27017/".
+            task_db_name (str, optional): database name. Defaults to "rolling_db".
+            task_pool (str, optional): the task pool name (a task pool is a collection in MongoDB). Defaults to "rolling_task".
+            rolling_step (int, optional): the step for rolling. Defaults to 80.
+            start_time (str, optional): the start time of simulating. Defaults to "2018-09-10".
+            end_time (str, optional): the end time of simulating. Defaults to "2018-10-31".
+            tasks (dict or list[dict]): a set of the task config waiting for rolling and training
+        """
+        if tasks is None:
+            tasks = [CSI100_RECORD_XGBOOST_TASK_CONFIG, CSI100_RECORD_LGB_TASK_CONFIG]
+        self.exp_name = exp_name
+        self.task_pool = task_pool
+        self.start_time = start_time
+        self.end_time = end_time
+        mongo_conf = {
+            "task_url": task_url,
+            "task_db_name": task_db_name,
+        }
+        qlib.init(provider_uri=provider_uri, region=region, mongo=mongo_conf)
+        self.rolling_gen = RollingGen(
+            step=rolling_step, rtype=RollingGen.ROLL_SD, ds_extra_mod_func=None
+        )  # The rolling tasks generator, ds_extra_mod_func is None because we just need to simulate to 2018-10-31 and needn't change the handler end time.
+        self.trainer = DelayTrainerRM(self.exp_name, self.task_pool)  # Also can be TrainerR, TrainerRM, DelayTrainerR
+        self.rolling_online_manager = OnlineManager(
+            RollingStrategy(exp_name, task_template=tasks, rolling_gen=self.rolling_gen),
+            trainer=self.trainer,
+            begin_time=self.start_time,
+        )
+        self.tasks = tasks
+
+    # Reset all things to the first status, be careful to save important data
+    def reset(self):
+        TaskManager(self.task_pool).remove()
+        exp = R.get_exp(experiment_name=self.exp_name)
+        for rid in exp.list_recorders():
+            exp.delete_recorder(rid)
+
+    # Run this to run all workflow automatically
+    def main(self):
+        print("========== reset ==========")
+        self.reset()
+        print("========== simulate ==========")
+        self.rolling_online_manager.simulate(end_time=self.end_time)
+        print("========== collect results ==========")
+        print(self.rolling_online_manager.get_collector()())
+        print("========== signals ==========")
+        print(self.rolling_online_manager.get_signals())
+
+
+if __name__ == "__main__":
+    ## to run all workflow automatically with your own parameters, use the command below
+    # python online_management_simulate.py main --experiment_name="your_exp_name" --rolling_step=60
+    fire.Fire(OnlineSimulationExample)
--- a/examples/online_srv/rolling_online_management.py
+++ b/examples/online_srv/rolling_online_management.py
@@ -0,0 +1,130 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""
+This example shows how OnlineManager works with rolling tasks.
+There are four parts including first train, routine 1, add strategy and routine 2.
+Firstly, the OnlineManager will finish the first training and set trained models to `online` models.
+Next, the OnlineManager will finish a routine process, including update online prediction -> prepare tasks -> prepare new models -> prepare signals
+Then, we will add some new strategies to the OnlineManager. This will finish first training of new strategies.
+Finally, the OnlineManager will finish second routine and update all strategies.
+"""
+
+import os
+import fire
+import qlib
+from qlib.workflow import R
+from qlib.workflow.online.strategy import RollingStrategy
+from qlib.workflow.task.gen import RollingGen
+from qlib.workflow.online.manager import OnlineManager
+from qlib.tests.config import CSI100_RECORD_XGBOOST_TASK_CONFIG, CSI100_RECORD_LGB_TASK_CONFIG
+
+
+class RollingOnlineExample:
+    def __init__(
+        self,
+        provider_uri="~/.qlib/qlib_data/cn_data",
+        region="cn",
+        task_url="mongodb://10.0.0.4:27017/",
+        task_db_name="rolling_db",
+        rolling_step=550,
+        tasks=None,
+        add_tasks=None,
+    ):
+        if add_tasks is None:
+            add_tasks = [CSI100_RECORD_LGB_TASK_CONFIG]
+        if tasks is None:
+            tasks = [CSI100_RECORD_XGBOOST_TASK_CONFIG]
+        mongo_conf = {
+            "task_url": task_url,  # your MongoDB url
+            "task_db_name": task_db_name,  # database name
+        }
+        qlib.init(provider_uri=provider_uri, region=region, mongo=mongo_conf)
+        self.tasks = tasks
+        self.add_tasks = add_tasks
+        self.rolling_step = rolling_step
+        strategies = []
+        for task in tasks:
+            name_id = task["model"]["class"]  # NOTE: Assumption: The model class can specify only one strategy
+            strategies.append(
+                RollingStrategy(
+                    name_id,
+                    task,
+                    RollingGen(step=rolling_step, rtype=RollingGen.ROLL_SD),
+                )
+            )
+
+        self.rolling_online_manager = OnlineManager(strategies)
+
+    _ROLLING_MANAGER_PATH = (
+        ".RollingOnlineExample"  # the OnlineManager will dump to this file, for it can be loaded when calling routine.
+    )
+
+    # Reset all things to the first status, be careful to save important data
+    def reset(self):
+        for task in self.tasks + self.add_tasks:
+            name_id = task["model"]["class"]
+            exp = R.get_exp(experiment_name=name_id)
+            for rid in exp.list_recorders():
+                exp.delete_recorder(rid)
+
+        if os.path.exists(self._ROLLING_MANAGER_PATH):
+            os.remove(self._ROLLING_MANAGER_PATH)
+
+    def first_run(self):
+        print("========== reset ==========")
+        self.reset()
+        print("========== first_run ==========")
+        self.rolling_online_manager.first_train()
+        print("========== collect results ==========")
+        print(self.rolling_online_manager.get_collector()())
+        print("========== dump ==========")
+        self.rolling_online_manager.to_pickle(self._ROLLING_MANAGER_PATH)
+
+    def routine(self):
+        print("========== load ==========")
+        self.rolling_online_manager = OnlineManager.load(self._ROLLING_MANAGER_PATH)
+        print("========== routine ==========")
+        self.rolling_online_manager.routine()
+        print("========== collect results ==========")
+        print(self.rolling_online_manager.get_collector()())
+        print("========== signals ==========")
+        print(self.rolling_online_manager.get_signals())
+        print("========== dump ==========")
+        self.rolling_online_manager.to_pickle(self._ROLLING_MANAGER_PATH)
+
+    def add_strategy(self):
+        print("========== load ==========")
+        self.rolling_online_manager = OnlineManager.load(self._ROLLING_MANAGER_PATH)
+        print("========== add strategy ==========")
+        strategies = []
+        for task in self.add_tasks:
+            name_id = task["model"]["class"]  # NOTE: Assumption: The model class can specify only one strategy
+            strategies.append(
+                RollingStrategy(
+                    name_id,
+                    task,
+                    RollingGen(step=self.rolling_step, rtype=RollingGen.ROLL_SD),
+                )
+            )
+        self.rolling_online_manager.add_strategy(strategies=strategies)
+        print("========== dump ==========")
+        self.rolling_online_manager.to_pickle(self._ROLLING_MANAGER_PATH)
+
+    def main(self):
+        self.first_run()
+        self.routine()
+        self.add_strategy()
+        self.routine()
+
+
+if __name__ == "__main__":
+    ####### to train the first version's models, use the command below
+    # python rolling_online_management.py first_run
+
+    ####### to update the models and predictions after the trading time, use the command below
+    # python rolling_online_management.py routine
+
+    ####### to define your own parameters, use `--`
+    # python rolling_online_management.py first_run --exp_name='your_exp_name' --rolling_step=40
+    fire.Fire(RollingOnlineExample)
--- a/examples/online_srv/update_online_pred.py
+++ b/examples/online_srv/update_online_pred.py
@@ -0,0 +1,54 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""
+This example shows how OnlineTool works when we need update prediction.
+There are two parts including first_train and update_online_pred.
+Firstly, we will finish the training and set the trained models to the `online` models.
+Next, we will finish updating online predictions.
+"""
+import copy
+import fire
+import qlib
+from qlib.config import REG_CN
+from qlib.model.trainer import task_train
+from qlib.workflow.online.utils import OnlineToolR
+from qlib.tests.config import CSI300_GBDT_TASK
+
+task = copy.deepcopy(CSI300_GBDT_TASK)
+
+task["record"] = {
+    "class": "SignalRecord",
+    "module_path": "qlib.workflow.record_temp",
+}
+
+
+class UpdatePredExample:
+    def __init__(
+        self, provider_uri="~/.qlib/qlib_data/cn_data", region=REG_CN, experiment_name="online_srv", task_config=task
+    ):
+        qlib.init(provider_uri=provider_uri, region=region)
+        self.experiment_name = experiment_name
+        self.online_tool = OnlineToolR(self.experiment_name)
+        self.task_config = task_config
+
+    def first_train(self):
+        rec = task_train(self.task_config, experiment_name=self.experiment_name)
+        self.online_tool.reset_online_tag(rec)  # set to online model
+
+    def update_online_pred(self):
+        self.online_tool.update_online_pred()
+
+    def main(self):
+        self.first_train()
+        self.update_online_pred()
+
+
+if __name__ == "__main__":
+    ## to train a model and set it to online model, use the command below
+    # python update_online_pred.py first_train
+    ## to update online predictions once a day, use the command below
+    # python update_online_pred.py update_online_pred
+    ## to see the whole process with your own parameters, use the command below
+    # python update_online_pred.py main --experiment_name="your_exp_name"
+    fire.Fire(UpdatePredExample)
--- a/examples/rolling_process_data/README.md
+++ b/examples/rolling_process_data/README.md
@@ -0,0 +1,17 @@
+# Rolling Process Data
+
+This workflow is an example for `Rolling Process Data`.
+
+## Background
+
+When rolling train the models, data also needs to be generated in the different rolling windows. When the rolling window moves, the training data will change, and the processor's learnable state (such as standard deviation, mean, etc.) will also change. 
+
+In order to avoid regenerating data, this example uses the `DataHandler-based DataLoader` to load the raw features that are not related to the rolling window, and then used Processors to generate processed-features related to the rolling window.
+
+
+## Run the Code
+
+Run the example by running the following command:
+```bash
+    python workflow.py rolling_process
+```
--- a/examples/rolling_process_data/rolling_handler.py
+++ b/examples/rolling_process_data/rolling_handler.py
@@ -0,0 +1,32 @@
+from qlib.data.dataset.handler import DataHandlerLP
+from qlib.data.dataset.loader import DataLoaderDH
+from qlib.contrib.data.handler import check_transform_proc
+
+
+class RollingDataHandler(DataHandlerLP):
+    def __init__(
+        self,
+        start_time=None,
+        end_time=None,
+        infer_processors=[],
+        learn_processors=[],
+        fit_start_time=None,
+        fit_end_time=None,
+        data_loader_kwargs={},
+    ):
+        infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
+        learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
+
+        data_loader = {
+            "class": "DataLoaderDH",
+            "kwargs": {**data_loader_kwargs},
+        }
+
+        super().__init__(
+            instruments=None,
+            start_time=start_time,
+            end_time=end_time,
+            data_loader=data_loader,
+            infer_processors=infer_processors,
+            learn_processors=learn_processors,
+        )
--- a/examples/rolling_process_data/workflow.py
+++ b/examples/rolling_process_data/workflow.py
@@ -0,0 +1,137 @@
+#  Copyright (c) Microsoft Corporation.
+#  Licensed under the MIT License.
+
+import qlib
+import fire
+import pickle
+
+from datetime import datetime
+from qlib.config import REG_CN
+from qlib.data.dataset.handler import DataHandlerLP
+from qlib.utils import init_instance_by_config
+from qlib.tests.data import GetData
+
+
+class RollingDataWorkflow:
+
+    MARKET = "csi300"
+    start_time = "2010-01-01"
+    end_time = "2019-12-31"
+    rolling_cnt = 5
+
+    def _init_qlib(self):
+        """initialize qlib"""
+        # use yahoo_cn_1min data
+        provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
+        GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
+        qlib.init(provider_uri=provider_uri, region=REG_CN)
+
+    def _dump_pre_handler(self, path):
+        handler_config = {
+            "class": "Alpha158",
+            "module_path": "qlib.contrib.data.handler",
+            "kwargs": {
+                "start_time": self.start_time,
+                "end_time": self.end_time,
+                "instruments": self.MARKET,
+                "infer_processors": [],
+                "learn_processors": [],
+            },
+        }
+        pre_handler = init_instance_by_config(handler_config)
+        pre_handler.config(dump_all=True)
+        pre_handler.to_pickle(path)
+
+    def _load_pre_handler(self, path):
+        with open(path, "rb") as file_dataset:
+            pre_handler = pickle.load(file_dataset)
+        return pre_handler
+
+    def rolling_process(self):
+        self._init_qlib()
+        self._dump_pre_handler("pre_handler.pkl")
+        pre_handler = self._load_pre_handler("pre_handler.pkl")
+
+        train_start_time = (2010, 1, 1)
+        train_end_time = (2012, 12, 31)
+        valid_start_time = (2013, 1, 1)
+        valid_end_time = (2013, 12, 31)
+        test_start_time = (2014, 1, 1)
+        test_end_time = (2014, 12, 31)
+
+        dataset_config = {
+            "class": "DatasetH",
+            "module_path": "qlib.data.dataset",
+            "kwargs": {
+                "handler": {
+                    "class": "RollingDataHandler",
+                    "module_path": "rolling_handler",
+                    "kwargs": {
+                        "start_time": datetime(*train_start_time),
+                        "end_time": datetime(*test_end_time),
+                        "fit_start_time": datetime(*train_start_time),
+                        "fit_end_time": datetime(*train_end_time),
+                        "infer_processors": [
+                            {"class": "RobustZScoreNorm", "kwargs": {"fields_group": "feature"}},
+                        ],
+                        "learn_processors": [
+                            {"class": "DropnaLabel"},
+                            {"class": "CSZScoreNorm", "kwargs": {"fields_group": "label"}},
+                        ],
+                        "data_loader_kwargs": {
+                            "handler_config": pre_handler,
+                        },
+                    },
+                },
+                "segments": {
+                    "train": (datetime(*train_start_time), datetime(*train_end_time)),
+                    "valid": (datetime(*valid_start_time), datetime(*valid_end_time)),
+                    "test": (datetime(*test_start_time), datetime(*test_end_time)),
+                },
+            },
+        }
+
+        dataset = init_instance_by_config(dataset_config)
+
+        for rolling_offset in range(self.rolling_cnt):
+
+            print(f"===========rolling{rolling_offset} start===========")
+            if rolling_offset:
+                dataset.config(
+                    handler_kwargs={
+                        "start_time": datetime(train_start_time[0] + rolling_offset, *train_start_time[1:]),
+                        "end_time": datetime(test_end_time[0] + rolling_offset, *test_end_time[1:]),
+                        "processor_kwargs": {
+                            "fit_start_time": datetime(train_start_time[0] + rolling_offset, *train_start_time[1:]),
+                            "fit_end_time": datetime(train_end_time[0] + rolling_offset, *train_end_time[1:]),
+                        },
+                    },
+                    segments={
+                        "train": (
+                            datetime(train_start_time[0] + rolling_offset, *train_start_time[1:]),
+                            datetime(train_end_time[0] + rolling_offset, *train_end_time[1:]),
+                        ),
+                        "valid": (
+                            datetime(valid_start_time[0] + rolling_offset, *valid_start_time[1:]),
+                            datetime(valid_end_time[0] + rolling_offset, *valid_end_time[1:]),
+                        ),
+                        "test": (
+                            datetime(test_start_time[0] + rolling_offset, *test_start_time[1:]),
+                            datetime(test_end_time[0] + rolling_offset, *test_end_time[1:]),
+                        ),
+                    },
+                )
+                dataset.setup_data(
+                    handler_kwargs={
+                        "init_type": DataHandlerLP.IT_FIT_SEQ,
+                    }
+                )
+
+            dtrain, dvalid, dtest = dataset.prepare(["train", "valid", "test"])
+            print(dtrain, dvalid, dtest)
+            ## print or dump data
+            print(f"===========rolling{rolling_offset} end===========")
+
+
+if __name__ == "__main__":
+    fire.Fire(RollingDataWorkflow)
--- a/examples/run_all_model.py
+++ b/examples/run_all_model.py
@@ -5,16 +5,15 @@ import os
 import sys
 import fire
 import time
-import venv
 import glob
 import shutil
 import signal
 import inspect
 import tempfile
-import traceback
 import functools
 import statistics
 import subprocess
+from datetime import datetime
 from pathlib import Path
 from operator import xor
 from pprint import pprint
@@ -22,8 +21,7 @@ from pprint import pprint
 import qlib
 from qlib.config import REG_CN
 from qlib.workflow import R
-from qlib.workflow.cli import workflow
-from qlib.utils import exists_qlib_data
+from qlib.tests.data import GetData


 # init qlib
@@ -38,15 +36,9 @@ exp_manager = {
        "default_exp_name": "Experiment",
    },
 }
-if not exists_qlib_data(provider_uri):
-    print(f"Qlib data is not found in {provider_uri}")
-    sys.path.append(str(Path(__file__).resolve().parent.parent.joinpath("scripts")))
-    from get_data import GetData

-    GetData().qlib_data(target_dir=provider_uri, region=REG_CN)
+GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
 qlib.init(provider_uri=provider_uri, region=REG_CN, exp_manager=exp_manager)
-if os.path.isdir(exp_path):
-    shutil.rmtree(exp_path)

 # decorator to check the arguments
 def only_allow_defined_args(function_to_decorate):
@@ -70,9 +62,9 @@ def handler(signum, frame):
    os.system("kill -9 %d" % os.getpid())


-signal.signal(signal.SIGTSTP, handler)
 signal.signal(signal.SIGINT, handler)

+
 # function to calculate the mean and std of a list in the results dictionary
 def cal_mean_std(results) -> dict:
    mean_std = dict()
@@ -136,9 +128,9 @@ def get_all_folders(models, exclude) -> dict:


 # function to get all the files under the model folder
-def get_all_files(folder_path) -> (str, str):
-    yaml_path = str(Path(f"{folder_path}") / "*.yaml")
-    req_path = str(Path(f"{folder_path}") / "*.txt")
+def get_all_files(folder_path, dataset) -> (str, str):
+    yaml_path = str(Path(f"{folder_path}") / f"*{dataset}*.yaml")
+    req_path = str(Path(f"{folder_path}") / f"*.txt")
    return glob.glob(yaml_path)[0], glob.glob(req_path)[0]


@@ -152,6 +144,10 @@ def get_all_results(folders) -> dict:
        result["annualized_return_with_cost"] = list()
        result["information_ratio_with_cost"] = list()
        result["max_drawdown_with_cost"] = list()
+        result["ic"] = list()
+        result["icir"] = list()
+        result["rank_ic"] = list()
+        result["rank_icir"] = list()
        for recorder_id in recorders:
            if recorders[recorder_id].status == "FINISHED":
                recorder = R.get_recorder(recorder_id=recorder_id, experiment_name=fn)
@@ -159,19 +155,27 @@ def get_all_results(folders) -> dict:
                result["annualized_return_with_cost"].append(metrics["excess_return_with_cost.annualized_return"])
                result["information_ratio_with_cost"].append(metrics["excess_return_with_cost.information_ratio"])
                result["max_drawdown_with_cost"].append(metrics["excess_return_with_cost.max_drawdown"])
+                result["ic"].append(metrics["IC"])
+                result["icir"].append(metrics["ICIR"])
+                result["rank_ic"].append(metrics["Rank IC"])
+                result["rank_icir"].append(metrics["Rank ICIR"])
        results[fn] = result
    return results


 # function to generate and save markdown table
-def gen_and_save_md_table(metrics):
-    table = "| Model Name | Annualized Return | Information Ratio | Max Drawdown |\n"
-    table += "|---|---|---|---|\n"
+def gen_and_save_md_table(metrics, dataset):
+    table = "| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown |\n"
+    table += "|---|---|---|---|---|---|---|---|---|\n"
    for fn in metrics:
+        ic = metrics[fn]["ic"]
+        icir = metrics[fn]["icir"]
+        ric = metrics[fn]["rank_ic"]
+        ricir = metrics[fn]["rank_icir"]
        ar = metrics[fn]["annualized_return_with_cost"]
        ir = metrics[fn]["information_ratio_with_cost"]
        md = metrics[fn]["max_drawdown_with_cost"]
-        table += f"| {fn} | {ar[0]:9.4f}±{ar[1]:9.2f} | {ir[0]:9.4f}±{ir[1]:9.2f}| {md[0]:9.4f}±{md[1]:9.2f} |\n"
+        table += f"| {fn} | {dataset} | {ic[0]:5.4f}±{ic[1]:2.2f} | {icir[0]:5.4f}±{icir[1]:2.2f}| {ric[0]:5.4f}±{ric[1]:2.2f} | {ricir[0]:5.4f}±{ricir[1]:2.2f} | {ar[0]:5.4f}±{ar[1]:2.2f} | {ir[0]:5.4f}±{ir[1]:2.2f}| {md[0]:5.4f}±{md[1]:2.2f} |\n"
    pprint(table)
    with open("table.md", "w") as f:
        f.write(table)
@@ -180,10 +184,11 @@ def gen_and_save_md_table(metrics):

 # function to run the all the models
@only_allow_defined_args
-def run(times=1, models=None, exclude=False):
+def run(times=1, models=None, dataset="Alpha360", exclude=False):
    """
    Please be aware that this function can only work under Linux. MacOS and Windows will be supported in the future.
-    Any PR to enhance this method is highly welcomed.
+    Any PR to enhance this method is highly welcomed. Besides, this script doesn't support parrallel running the same model
+    for multiple times, and this will be fixed in the future development.

    Parameters:
    -----------
@@ -193,6 +198,8 @@ def run(times=1, models=None, exclude=False):
        determines the specific model or list of models to run or exclude.
    exclude : boolean
        determines whether the model being used is excluded or included.
+    dataset : str
+        determines the dataset to be used for each model.

    Usage:
    -------
@@ -206,13 +213,16 @@ def run(times=1, models=None, exclude=False):
        # Case 2 - run specific models multiple times
        python run_all_model.py 3 mlp

-        # Case 3 - run other models except those are given as arguments for multiple times
-        python run_all_model.py 3 [mlp,tft,lstm] True
+        # Case 3 - run specific models multiple times with specific dataset
+        python run_all_model.py 3 mlp Alpha158

-        # Case 4 - run specific models for one time
+        # Case 4 - run other models except those are given as arguments for multiple times
+        python run_all_model.py 3 [mlp,tft,lstm] --exclude=True
+
+        # Case 5 - run specific models for one time
        python run_all_model.py --models=[mlp,lightgbm]

-        # Case 5 - run other models except those are given as aruments for one time
+        # Case 6 - run other models except those are given as aruments for one time
        python run_all_model.py --models=[mlp,tft,sfm] --exclude=True

    """
@@ -226,7 +236,7 @@ def run(times=1, models=None, exclude=False):
        env_path, python_path, conda_activate = create_env()
        # get all files
        sys.stderr.write("Retrieving files...\n")
-        yaml_path, req_path = get_all_files(folders[fn])
+        yaml_path, req_path = get_all_files(folders[fn], dataset)
        sys.stderr.write("\n")
        # install requirements.txt
        sys.stderr.write("Installing requirements.txt...\n")
@@ -240,6 +250,7 @@ def run(times=1, models=None, exclude=False):
            sys.stderr.write("\n")
        # install qlib
        sys.stderr.write("Installing qlib...\n")
+        execute(f"{python_path} -m pip install --upgrade pip")  # TODO: FIX ME!
        execute(f"{python_path} -m pip install --upgrade cython")  # TODO: FIX ME!
        if fn == "TFT":
            execute(
@@ -272,12 +283,15 @@ def run(times=1, models=None, exclude=False):
    results = cal_mean_std(results)
    # generating md table
    sys.stderr.write(f"Generating markdown table...\n")
-    gen_and_save_md_table(results)
+    gen_and_save_md_table(results, dataset)
    sys.stderr.write("\n")
    # print erros
    sys.stderr.write(f"Here are some of the errors of the models...\n")
    pprint(errors)
    sys.stderr.write("\n")
+    # move results folder
+    shutil.move(exp_path, exp_path + f"_{dataset}_{datetime.now().strftime('%Y-%m-%d_%H:%M:%S')}")
+    shutil.move("table.md", f"table_{dataset}_{datetime.now().strftime('%Y-%m-%d_%H:%M:%S')}.md")


 if __name__ == "__main__":
--- a/examples/workflow_by_code.ipynb
+++ b/examples/workflow_by_code.ipynb
@@ -28,11 +28,17 @@
    "import sys, site\n",
    "from pathlib import Path\n",
    "\n",
+    "################################# NOTE #################################\n",
+    "#  Please be aware that if colab installs the latest numpy and pyqlib  #\n",
+    "#  in this cell, users should RESTART the runtime in order to run the  #\n",
+    "#  following cells successfully.                                       #\n",
+    "########################################################################\n",
    "\n",
    "try:\n",
    "    import qlib\n",
    "except ImportError:\n",
    "    # install qlib\n",
+    "    ! pip install --upgrade numpy\n",
    "    ! pip install pyqlib\n",
    "    # reload\n",
    "    site.main()\n",
@@ -238,9 +244,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "from qlib.contrib.report import analysis_model, analysis_position\n",
@@ -359,7 +363,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.7.9"
+   "version": "3.8.3"
  },
  "toc": {
   "base_numbering": 1,
@@ -377,4 +381,4 @@
 },
 "nbformat": 4,
 "nbformat_minor": 4
-}
+}
--- a/examples/workflow_by_code.py
+++ b/examples/workflow_by_code.py
@@ -1,85 +1,22 @@
 #  Copyright (c) Microsoft Corporation.
 #  Licensed under the MIT License.

-import sys
-from pathlib import Path
-
 import qlib
-import pandas as pd
 from qlib.config import REG_CN
-from qlib.contrib.model.gbdt import LGBModel
-from qlib.contrib.data.handler import Alpha158
-from qlib.contrib.strategy.strategy import TopkDropoutStrategy
-from qlib.contrib.evaluate import (
-    backtest as normal_backtest,
-    risk_analysis,
-)
-from qlib.utils import exists_qlib_data, init_instance_by_config, flatten_dict
+from qlib.utils import init_instance_by_config, flatten_dict
 from qlib.workflow import R
 from qlib.workflow.record_temp import SignalRecord, PortAnaRecord
+from qlib.tests.data import GetData
+from qlib.tests.config import CSI300_BENCH, CSI300_GBDT_TASK


 if __name__ == "__main__":

    # use default data
    provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
-    if not exists_qlib_data(provider_uri):
-        print(f"Qlib data is not found in {provider_uri}")
-        sys.path.append(str(Path(__file__).resolve().parent.parent.joinpath("scripts")))
-        from get_data import GetData
-
-        GetData().qlib_data(target_dir=provider_uri, region=REG_CN)
-
+    GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
    qlib.init(provider_uri=provider_uri, region=REG_CN)

-    market = "csi300"
-    benchmark = "SH000300"
-
-    ###################################
-    # train model
-    ###################################
-    data_handler_config = {
-        "start_time": "2008-01-01",
-        "end_time": "2020-08-01",
-        "fit_start_time": "2008-01-01",
-        "fit_end_time": "2014-12-31",
-        "instruments": market,
-    }
-
-    task = {
-        "model": {
-            "class": "LGBModel",
-            "module_path": "qlib.contrib.model.gbdt",
-            "kwargs": {
-                "loss": "mse",
-                "colsample_bytree": 0.8879,
-                "learning_rate": 0.0421,
-                "subsample": 0.8789,
-                "lambda_l1": 205.6999,
-                "lambda_l2": 580.9768,
-                "max_depth": 8,
-                "num_leaves": 210,
-                "num_threads": 20,
-            },
-        },
-        "dataset": {
-            "class": "DatasetH",
-            "module_path": "qlib.data.dataset",
-            "kwargs": {
-                "handler": {
-                    "class": "Alpha158",
-                    "module_path": "qlib.contrib.data.handler",
-                    "kwargs": data_handler_config,
-                },
-                "segments": {
-                    "train": ("2008-01-01", "2014-12-31"),
-                    "valid": ("2015-01-01", "2016-12-31"),
-                    "test": ("2017-01-01", "2020-08-01"),
-                },
-            },
-        },
-    }
-
    port_analysis_config = {
        "strategy": {
            "class": "TopkDropoutStrategy",
@@ -93,28 +30,36 @@ if __name__ == "__main__":
            "verbose": False,
            "limit_threshold": 0.095,
            "account": 100000000,
-            "benchmark": benchmark,
+            "benchmark": CSI300_BENCH,
            "deal_price": "close",
            "open_cost": 0.0005,
            "close_cost": 0.0015,
            "min_cost": 5,
+            "return_order": True,
        },
    }

-    # model initiaiton
-    model = init_instance_by_config(task["model"])
-    dataset = init_instance_by_config(task["dataset"])
+    # model initialization
+    model = init_instance_by_config(CSI300_GBDT_TASK["model"])
+    dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"])
+
+    # NOTE: This line is optional
+    # It demonstrates that the dataset can be used standalone.
+    example_df = dataset.prepare("train")
+    print(example_df.head())

    # start exp
    with R.start(experiment_name="workflow"):
-        R.log_params(**flatten_dict(task))
+        R.log_params(**flatten_dict(CSI300_GBDT_TASK))
        model.fit(dataset)
+        R.save_objects(**{"params.pkl": model})

        # prediction
        recorder = R.get_recorder()
        sr = SignalRecord(model, dataset, recorder)
        sr.generate()

-        # backtest
+        # backtest. If users want to use backtest based on their own prediction,
+        # please refer to https://qlib.readthedocs.io/en/latest/component/recorder.html#record-template.
        par = PortAnaRecord(recorder, port_analysis_config)
        par.generate()
--- a/qlib/init.py
+++ b/qlib/init.py
@@ -2,95 +2,54 @@
 # Licensed under the MIT License.


-__version__ = "0.6.0"
+__version__ = "0.6.3.99"
+__version__bak = __version__  # This version is backup for QlibConfig.reset_qlib_version
+

 import os
-import re
-import sys
-import copy
 import yaml
 import logging
 import platform
 import subprocess
 from pathlib import Path
+from .log import get_module_logger

-from .utils import can_use_cache, init_instance_by_config, get_module_by_module_path
-from .workflow.utils import experiment_exit_handler

 # init qlib
 def init(default_conf="client", **kwargs):
-    from .config import C, REG_CN, REG_US, QlibConfig
-    from .data.data import register_all_wrappers
-    from .log import get_module_logger, set_log_with_config
+    from .config import C
    from .data.cache import H
-    from .workflow import R, QlibRecorder

-    C.reset()
    H.clear()

-    _logging_config = C.logging_config
-    if "logging_config" in kwargs:
-        _logging_config = kwargs["logging_config"]
-
-    # set global config
-    if _logging_config:
-        set_log_with_config(_logging_config)
-
    # FIXME: this logger ignored the level in config
-    LOG = get_module_logger("Initialization", level=logging.INFO)
-    LOG.info(f"default_conf: {default_conf}.")
+    logger = get_module_logger("Initialization", level=logging.INFO)

-    C.set_mode(default_conf)
-    C.set_region(kwargs.get("region", C["region"] if "region" in C else REG_CN))
-
-    for k, v in kwargs.items():
-        C[k] = v
-        if k not in C:
-            LOG.warning("Unrecognized config %s" % k)
-
-    C.resolve_path()
-
-    if not (C["expression_cache"] is None and C["dataset_cache"] is None):
-        # check redis
-        if not can_use_cache():
-            LOG.warning(
-                f"redis connection failed(host={C['redis_host']} port={C['redis_port']}), cache will not be used!"
-            )
-            C["expression_cache"] = None
-            C["dataset_cache"] = None
+    C.set(default_conf, **kwargs)

    # check path if server/local
-    if C.get_uri_type() == QlibConfig.LOCAL_URI:
+    if C.get_uri_type() == C.LOCAL_URI:
        if not os.path.exists(C["provider_uri"]):
            if C["auto_mount"]:
-                LOG.error(
+                logger.error(
                    f"Invalid provider uri: {C['provider_uri']}, please check if a valid provider uri has been set. This path does not exist."
                )
            else:
-                LOG.warning(f"auto_path is False, please make sure {C['mount_path']} is mounted")
-    elif C.get_uri_type() == QlibConfig.NFS_URI:
+                logger.warning(f"auto_path is False, please make sure {C['mount_path']} is mounted")
+    elif C.get_uri_type() == C.NFS_URI:
        _mount_nfs_uri(C)
    else:
        raise NotImplementedError(f"This type of URI is not supported")

-    LOG.info("qlib successfully initialized based on %s settings." % default_conf)
-    register_all_wrappers()
-
-    LOG.info(f"data_path={C.get_data_path()}")
+    C.register()

    if "flask_server" in C:
-        LOG.info(f"flask_server={C['flask_server']}, flask_port={C['flask_port']}")
-
-    # set up QlibRecorder
-    exp_manager = init_instance_by_config(C["exp_manager"])
-    qr = QlibRecorder(exp_manager)
-    R.register(qr)
-    # clean up experiment when python program ends
-    experiment_exit_handler()
+        logger.info(f"flask_server={C['flask_server']}, flask_port={C['flask_port']}")
+    logger.info("qlib successfully initialized based on %s settings." % default_conf)
+    logger.info(f"data_path={C.get_data_path()}")


 def _mount_nfs_uri(C):
-    from .log import get_module_logger

    LOG = get_module_logger("mount nfs", level=logging.INFO)

@@ -189,7 +148,78 @@ def init_from_yaml_conf(conf_path, **kwargs):
    """

    with open(conf_path) as f:
-        config = yaml.load(f, Loader=yaml.FullLoader)
+        config = yaml.safe_load(f)
    config.update(kwargs)
    default_conf = config.pop("default_conf", "client")
    init(default_conf, **config)
+
+
+def get_project_path(config_name="config.yaml", cur_path=None) -> Path:
+    """
+    If users are building a project follow the following pattern.
+    - Qlib is a sub folder in project path
+    - There is a file named `config.yaml` in qlib.
+
+    For example:
+        If your project file system stucuture follows such a pattern
+
+            <project_path>/
+              - config.yaml
+              - ...some folders...
+                - qlib/
+
+        This folder will return <project_path>
+
+        NOTE: link is not supported here.
+
+
+    This method is often used when
+    - user want to use a relative config path instead of hard-coding qlib config path in code
+
+    Raises
+    ------
+    FileNotFoundError:
+        If project path is not found
+    """
+    if cur_path is None:
+        cur_path = Path(__file__).absolute().resolve()
+    while True:
+        if (cur_path / config_name).exists():
+            return cur_path
+        if cur_path == cur_path.parent:
+            raise FileNotFoundError("We can't find the project path")
+        cur_path = cur_path.parent
+
+
+def auto_init(**kwargs):
+    """
+    This function will init qlib automatically with following priority
+    - Find the project configuration and init qlib
+        - The parsing process will be affected by the `conf_type` of the configuration file
+    - Init qlib with default config
+    """
+
+    try:
+        pp = get_project_path(cur_path=kwargs.pop("cur_path", None))
+    except FileNotFoundError:
+        init(**kwargs)
+    else:
+
+        conf_pp = pp / "config.yaml"
+        with conf_pp.open() as f:
+            conf = yaml.safe_load(f)
+
+        conf_type = conf.get("conf_type", "origin")
+        if conf_type == "origin":
+            # The type of config is just like original qlib config
+            init_from_yaml_conf(conf_pp, **kwargs)
+        elif conf_type == "ref":
+            # This config type will be more convenient in following scenario
+            # - There is a shared configure file and you don't want to edit it inplace.
+            # - The shared configure may be updated later and you don't want to copy it.
+            # - You have some customized config.
+            qlib_conf_path = conf["qlib_cfg"]
+            qlib_conf_update = conf.get("qlib_cfg_update")
+            init_from_yaml_conf(qlib_conf_path, **qlib_conf_update, **kwargs)
+        logger = get_module_logger("Initialization")
+        logger.info(f"Auto load project config: {conf_pp}")
--- a/qlib/config.py
+++ b/qlib/config.py
@@ -11,26 +11,30 @@ Two modes are supported

 """

-import copy
-from pathlib import Path
-import re
 import os
+import re
+import copy
+import logging
 import multiprocessing
+from pathlib import Path


 class Config:
    def __init__(self, default_conf):
-        self.__dict__["_default_config"] = default_conf  # avoiding conflictions with __getattr__
+        self.__dict__["_default_config"] = copy.deepcopy(default_conf)  # avoiding conflictions with __getattr__
        self.reset()

    def __getitem__(self, key):
        return self.__dict__["_config"][key]

    def __getattr__(self, attr):
-        try:
+        if attr in self.__dict__["_config"]:
            return self.__dict__["_config"][attr]
-        except KeyError:
-            return AttributeError(f"No such {attr} in self._config")
+
+        raise AttributeError(f"No such {attr} in self._config")
+
+    def get(self, key, default=None):
+        return self.__dict__["_config"].get(key, default)

    def __setitem__(self, key, value):
        self.__dict__["_config"][key] = value
@@ -59,6 +63,9 @@ class Config:
    def update(self, *args, **kwargs):
        self.__dict__["_config"].update(*args, **kwargs)

+    def set_conf_from_C(self, config_c):
+        self.update(**config_c.__dict__["_config"])
+

 # REGION CONST
 REG_CN = "cn"
@@ -86,7 +93,6 @@ _default_config = {
    # How many tasks belong to one process. Recommend 1 for high-frequency data and None for daily data.
    "maxtasksperchild": None,
    "default_disk_cache": 1,  # 0:skip/1:use
-    "disable_disk_cache": False,  # disable disk cache; if High-frequency data generally disable_disk_cache=True
    "mem_cache_size_limit": 500,
    # memory cache expire second, only in used 'DatasetURICache' and 'client D.calendar'
    # default 1 hour
@@ -102,7 +108,7 @@ _default_config = {
    "redis_port": 6379,
    "redis_task_db": 1,
    # This value can be reset via qlib.init
-    "logging_level": "INFO",
+    "logging_level": logging.INFO,
    # Global configuration of qlib log
    # logging_level can control the logging level more finely
    "logging_config": {
@@ -121,14 +127,14 @@ _default_config = {
        "handlers": {
            "console": {
                "class": "logging.StreamHandler",
-                "level": "DEBUG",
+                "level": logging.DEBUG,
                "formatter": "logger_format",
                "filters": ["field_not_found"],
            }
        },
-        "loggers": {"qlib": {"level": "DEBUG", "handlers": ["console"]}},
+        "loggers": {"qlib": {"level": logging.DEBUG, "handlers": ["console"]}},
    },
-    # Defatult config for experiment manager
+    # Default config for experiment manager
    "exp_manager": {
        "class": "MLflowExpManager",
        "module_path": "qlib.workflow.expm",
@@ -137,6 +143,11 @@ _default_config = {
            "default_exp_name": "Experiment",
        },
    },
+    # Default config for MongoDB
+    "mongo": {
+        "task_url": "mongodb://localhost:27017/",
+        "task_db_name": "default_task_db",
+    },
 }

 MODE_CONF = {
@@ -182,11 +193,19 @@ MODE_CONF = {
        # The nfs should be auto-mounted by qlib on other
        # serversS(such as PAI) [auto_mount:True]
        "timeout": 100,
-        "logging_level": "INFO",
+        "logging_level": logging.INFO,
        "region": REG_CN,
+        ## Custom Operator
+        "custom_ops": [],
    },
 }

+HIGH_FREQ_CONFIG = {
+    "provider_uri": "~/.qlib/qlib_data/yahoo_cn_1min",
+    "dataset_cache": None,
+    "expression_cache": "DiskExpressionCache",
+    "region": REG_CN,
+}

 _default_region_config = {
    REG_CN: {
@@ -207,6 +226,10 @@ class QlibConfig(Config):
    LOCAL_URI = "local"
    NFS_URI = "nfs"

+    def __init__(self, default_conf):
+        super().__init__(default_conf)
+        self._registered = False
+
    def set_mode(self, mode):
        # raise KeyError
        self.update(MODE_CONF[mode])
@@ -243,6 +266,78 @@ class QlibConfig(Config):
        else:
            raise NotImplementedError(f"This type of uri is not supported")

+    def set(self, default_conf="client", **kwargs):
+        from .utils import set_log_with_config, get_module_logger, can_use_cache
+
+        self.reset()
+
+        _logging_config = self.logging_config
+        if "logging_config" in kwargs:
+            _logging_config = kwargs["logging_config"]
+
+        # set global config
+        if _logging_config:
+            set_log_with_config(_logging_config)
+
+        # FIXME: this logger ignored the level in config
+        logger = get_module_logger("Initialization", level=logging.INFO)
+        logger.info(f"default_conf: {default_conf}.")
+
+        self.set_mode(default_conf)
+        self.set_region(kwargs.get("region", self["region"] if "region" in self else REG_CN))
+
+        for k, v in kwargs.items():
+            if k not in self:
+                logger.warning("Unrecognized config %s" % k)
+            self[k] = v
+
+        self.resolve_path()
+
+        if not (self["expression_cache"] is None and self["dataset_cache"] is None):
+            # check redis
+            if not can_use_cache():
+                logger.warning(
+                    f"redis connection failed(host={self['redis_host']} port={self['redis_port']}), cache will not be used!"
+                )
+                self["expression_cache"] = None
+                self["dataset_cache"] = None
+
+    def register(self):
+        from .utils import init_instance_by_config
+        from .data.ops import register_all_ops
+        from .data.data import register_all_wrappers
+        from .workflow import R, QlibRecorder
+        from .workflow.utils import experiment_exit_handler
+
+        register_all_ops(self)
+        register_all_wrappers(self)
+        # set up QlibRecorder
+        exp_manager = init_instance_by_config(self["exp_manager"])
+        qr = QlibRecorder(exp_manager)
+        R.register(qr)
+        # clean up experiment when python program ends
+        experiment_exit_handler()
+
+        # Supporting user reset qlib version (useful when user want to connect to qlib server with old version)
+        self.reset_qlib_version()
+
+        self._registered = True
+
+    def reset_qlib_version(self):
+        import qlib
+
+        reset_version = self.get("qlib_reset_version", None)
+        if reset_version is not None:
+            qlib.__version__ = reset_version
+        else:
+            qlib.__version__ = getattr(qlib, "__version__bak")
+            # Due to a bug? that converting __version__ to _QlibConfig__version__bak
+            # Using  __version__bak instead of __version__
+
+    @property
+    def registered(self):
+        return self._registered
+

 # global config
 C = QlibConfig(_default_config)
--- a/qlib/contrib/backtest/init.py
+++ b/qlib/contrib/backtest/init.py
@@ -1,9 +1,324 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.

-# -*- coding: utf-8 -*-
 from .order import Order
 from .account import Account
 from .position import Position
 from .exchange import Exchange
 from .report import Report
+from .backtest import backtest as backtest_func, get_date_range
+
+import numpy as np
+import inspect
+from ...utils import init_instance_by_config
+from ...log import get_module_logger
+from ...config import C
+
+logger = get_module_logger("backtest caller")
+
+
+def get_strategy(
+    strategy=None,
+    topk=50,
+    margin=0.5,
+    n_drop=5,
+    risk_degree=0.95,
+    str_type="dropout",
+    adjust_dates=None,
+):
+    """get_strategy
+
+    There will be 3 ways to return a stratgy. Please follow the code.
+
+
+    Parameters
+    ----------
+
+    strategy : Strategy()
+        strategy used in backtest.
+    topk : int (Default value: 50)
+        top-N stocks to buy.
+    margin : int or float(Default value: 0.5)
+        - if isinstance(margin, int):
+
+            sell_limit = margin
+
+        - else:
+
+            sell_limit = pred_in_a_day.count() * margin
+
+        buffer margin, in single score_mode, continue holding stock if it is in nlargest(sell_limit).
+        sell_limit should be no less than topk.
+    n_drop : int
+        number of stocks to be replaced in each trading date.
+    risk_degree: float
+        0-1, 0.95 for example, use 95% money to trade.
+    str_type: 'amount', 'weight' or 'dropout'
+        strategy type: TopkAmountStrategy ,TopkWeightStrategy or TopkDropoutStrategy.
+
+    Returns
+    -------
+    :class: Strategy
+    an initialized strategy object
+    """
+
+    # There  will be 3 ways to return a strategy.
+    if strategy is None:
+        # 1) create strategy with param `strategy`
+        str_cls_dict = {
+            "amount": "TopkAmountStrategy",
+            "weight": "TopkWeightStrategy",
+            "dropout": "TopkDropoutStrategy",
+        }
+        logger.info("Create new strategy ")
+        from .. import strategy as strategy_pool
+
+        str_cls = getattr(strategy_pool, str_cls_dict.get(str_type))
+        strategy = str_cls(
+            topk=topk,
+            buffer_margin=margin,
+            n_drop=n_drop,
+            risk_degree=risk_degree,
+            adjust_dates=adjust_dates,
+        )
+    elif isinstance(strategy, (dict, str)):
+        # 2) create strategy with init_instance_by_config
+        logger.info("Create new strategy ")
+        strategy = init_instance_by_config(strategy)
+
+    from ..strategy.strategy import BaseStrategy
+
+    # else: nothing happens. 3) Use the strategy directly
+    if not isinstance(strategy, BaseStrategy):
+        raise TypeError("Strategy not supported")
+    return strategy
+
+
+def get_exchange(
+    pred,
+    exchange=None,
+    subscribe_fields=[],
+    open_cost=0.0015,
+    close_cost=0.0025,
+    min_cost=5.0,
+    trade_unit=None,
+    limit_threshold=None,
+    deal_price=None,
+    extract_codes=False,
+    shift=1,
+):
+    """get_exchange
+
+    Parameters
+    ----------
+
+    # exchange related arguments
+    exchange: Exchange().
+    subscribe_fields: list
+        subscribe fields.
+    open_cost : float
+        open transaction cost.
+    close_cost : float
+        close transaction cost.
+    min_cost : float
+        min transaction cost.
+    trade_unit : int
+        100 for China A.
+    deal_price: str
+        dealing price type: 'close', 'open', 'vwap'.
+    limit_threshold : float
+        limit move 0.1 (10%) for example, long and short with same limit.
+    extract_codes: bool
+        will we pass the codes extracted from the pred to the exchange.
+        NOTE: This will be faster with offline qlib.
+
+    Returns
+    -------
+    :class: Exchange
+    an initialized Exchange object
+    """
+
+    if trade_unit is None:
+        trade_unit = C.trade_unit
+    if limit_threshold is None:
+        limit_threshold = C.limit_threshold
+    if deal_price is None:
+        deal_price = C.deal_price
+    if exchange is None:
+        logger.info("Create new exchange")
+        # handle exception for deal_price
+        if deal_price[0] != "$":
+            deal_price = "$" + deal_price
+        if extract_codes:
+            codes = sorted(pred.index.get_level_values("instrument").unique())
+        else:
+            codes = "all"  # TODO: We must ensure that 'all.txt' includes all the stocks
+
+        dates = sorted(pred.index.get_level_values("datetime").unique())
+        dates = np.append(dates, get_date_range(dates[-1], left_shift=1, right_shift=shift))
+
+        exchange = Exchange(
+            trade_dates=dates,
+            codes=codes,
+            deal_price=deal_price,
+            subscribe_fields=subscribe_fields,
+            limit_threshold=limit_threshold,
+            open_cost=open_cost,
+            close_cost=close_cost,
+            min_cost=min_cost,
+            trade_unit=trade_unit,
+        )
+    return exchange
+
+
+def get_executor(
+    executor=None,
+    trade_exchange=None,
+    verbose=True,
+):
+    """get_executor
+
+    There will be 3 ways to return a executor. Please follow the code.
+
+    Parameters
+    ----------
+
+    executor : BaseExecutor
+        executor used in backtest.
+    trade_exchange : Exchange
+        exchange used in executor
+    verbose : bool
+        whether to print log.
+
+    Returns
+    -------
+    :class: BaseExecutor
+    an initialized BaseExecutor object
+    """
+
+    # There  will be 3 ways to return a executor.
+    if executor is None:
+        # 1) create executor with param `executor`
+        logger.info("Create new executor ")
+        from ..online.executor import SimulatorExecutor
+
+        executor = SimulatorExecutor(trade_exchange=trade_exchange, verbose=verbose)
+    elif isinstance(executor, (dict, str)):
+        # 2) create executor with config
+        logger.info("Create new executor ")
+        executor = init_instance_by_config(executor)
+
+    from ..online.executor import BaseExecutor
+
+    # 3) Use the executor directly
+    if not isinstance(executor, BaseExecutor):
+        raise TypeError("Executor not supported")
+    return executor
+
+
+# This is the API for compatibility for legacy code
+def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, return_order=False, **kwargs):
+    """This function will help you set a reasonable Exchange and provide default value for strategy
+    Parameters
+    ----------
+
+    - **backtest workflow related or commmon arguments**
+
+    pred : pandas.DataFrame
+        predict should has <datetime, instrument> index and one `score` column.
+    account : float
+        init account value.
+    shift : int
+        whether to shift prediction by one day.
+    benchmark : str
+        benchmark code, default is SH000905 CSI 500.
+    verbose : bool
+        whether to print log.
+    return_order : bool
+        whether to return order list
+
+    - **strategy related arguments**
+
+    strategy : Strategy()
+        strategy used in backtest.
+    topk : int (Default value: 50)
+        top-N stocks to buy.
+    margin : int or float(Default value: 0.5)
+        - if isinstance(margin, int):
+
+            sell_limit = margin
+
+        - else:
+
+            sell_limit = pred_in_a_day.count() * margin
+
+        buffer margin, in single score_mode, continue holding stock if it is in nlargest(sell_limit).
+        sell_limit should be no less than topk.
+    n_drop : int
+        number of stocks to be replaced in each trading date.
+    risk_degree: float
+        0-1, 0.95 for example, use 95% money to trade.
+    str_type: 'amount', 'weight' or 'dropout'
+        strategy type: TopkAmountStrategy ,TopkWeightStrategy or TopkDropoutStrategy.
+
+    - **exchange related arguments**
+
+    exchange: Exchange()
+        pass the exchange for speeding up.
+    subscribe_fields: list
+        subscribe fields.
+    open_cost : float
+        open transaction cost. The default value is 0.002(0.2%).
+    close_cost : float
+        close transaction cost. The default value is 0.002(0.2%).
+    min_cost : float
+        min transaction cost.
+    trade_unit : int
+        100 for China A.
+    deal_price: str
+        dealing price type: 'close', 'open', 'vwap'.
+    limit_threshold : float
+        limit move 0.1 (10%) for example, long and short with same limit.
+    extract_codes: bool
+        will we pass the codes extracted from the pred to the exchange.
+
+        .. note:: This will be faster with offline qlib.
+
+    - **executor related arguments**
+
+    executor : BaseExecutor()
+        executor used in backtest.
+    verbose : bool
+        whether to print log.
+
+    """
+    # check strategy:
+    spec = inspect.getfullargspec(get_strategy)
+    str_args = {k: v for k, v in kwargs.items() if k in spec.args}
+    strategy = get_strategy(**str_args)
+
+    # init exchange:
+    spec = inspect.getfullargspec(get_exchange)
+    ex_args = {k: v for k, v in kwargs.items() if k in spec.args}
+    trade_exchange = get_exchange(pred, **ex_args)
+
+    # init executor:
+    executor = get_executor(executor=kwargs.get("executor"), trade_exchange=trade_exchange, verbose=verbose)
+
+    # run backtest
+    report_dict = backtest_func(
+        pred=pred,
+        strategy=strategy,
+        executor=executor,
+        trade_exchange=trade_exchange,
+        shift=shift,
+        verbose=verbose,
+        account=account,
+        benchmark=benchmark,
+        return_order=return_order,
+    )
+    # for  compatibility of the old API. return the dict positions
+
+    positions = report_dict.get("positions")
+    report_dict.update({"positions": {k: p.position for k, p in positions.items()}})
+    return report_dict
--- a/qlib/contrib/backtest/account.py
+++ b/qlib/contrib/backtest/account.py
@@ -104,10 +104,9 @@ class Account:
            # if suspend, no new price to be updated, profit is 0
            if trader.check_stock_suspended(code, today):
                continue
-            else:
-                today_close = trader.get_close(code, today)
-                profit += (today_close - self.current.position[code]["price"]) * self.current.position[code]["amount"]
-                self.current.update_stock_price(stock_id=code, price=today_close)
+            today_close = trader.get_close(code, today)
+            profit += (today_close - self.current.position[code]["price"]) * self.current.position[code]["amount"]
+            self.current.update_stock_price(stock_id=code, price=today_close)
        self.rtn += profit
        # update holding day count
        self.current.add_count_all()
--- a/qlib/contrib/backtest/backtest.py
+++ b/qlib/contrib/backtest/backtest.py
@@ -5,7 +5,6 @@
 import numpy as np
 import pandas as pd
 from ...utils import get_date_by_shift, get_date_range
-from ..online.executor import SimulatorExecutor
 from ...data import D
 from .account import Account
 from ...config import C
@@ -15,8 +14,9 @@ from ...data.dataset.utils import get_level_index
 LOG = get_module_logger("backtest")


-def backtest(pred, strategy, trade_exchange, shift, verbose, account, benchmark):
-    """Parameters
+def backtest(pred, strategy, executor, trade_exchange, shift, verbose, account, benchmark, return_order):
+    """
+    Parameters
    ----------
    pred : pandas.DataFrame
        predict should has <datetime, instrument> index and one `score` column
@@ -69,9 +69,9 @@ def backtest(pred, strategy, trade_exchange, shift, verbose, account, benchmark)
            raise ValueError(f"The benchmark {_codes} does not exist. Please provide the right benchmark")
        bench = _temp_result.groupby(level="datetime")[_temp_result.columns.tolist()[0]].mean()

-    trade_dates = np.append(predict_dates[shift:], get_date_range(predict_dates[-1], shift=shift))
-    executor = SimulatorExecutor(trade_exchange, verbose=verbose)
-
+    trade_dates = np.append(predict_dates[shift:], get_date_range(predict_dates[-1], left_shift=1, right_shift=shift))
+    if return_order:
+        multi_order_list = []
    # trading apart
    for pred_date, trade_date in zip(predict_dates, trade_dates):
        # for loop predict date and trading date
@@ -103,6 +103,8 @@ def backtest(pred, strategy, trade_exchange, shift, verbose, account, benchmark)
            )
        else:
            order_list = []
+        if return_order:
+            multi_order_list.append((trade_account, order_list, trade_date))
        # 4. Get result after executing order list
        # NOTE: The following operation will modify order.amount.
        # NOTE: If it is buy and the cash is insufficient, the tradable amount will be recalculated
@@ -115,11 +117,17 @@ def backtest(pred, strategy, trade_exchange, shift, verbose, account, benchmark)
    report_df = trade_account.report.generate_report_dataframe()
    report_df["bench"] = bench
    positions = trade_account.get_positions()
-    return report_df, positions
+
+    report_dict = {"report_df": report_df, "positions": positions}
+    if return_order:
+        report_dict.update({"order_list": multi_order_list})
+    return report_dict


 def update_account(trade_account, trade_info, trade_exchange, trade_date):
-    """Update the account and strategy
+    """
+    Update the account and strategy
+
    Parameters
    ----------
    trade_account : Account()
--- a/qlib/contrib/backtest/position.py
+++ b/qlib/contrib/backtest/position.py
@@ -1,10 +1,10 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.

-
-import pandas as pd
 import copy
 import pathlib
+import pandas as pd
+import numpy as np
 from .order import Order

 """
@@ -128,7 +128,7 @@ class Position:
        return self.position["cash"]

    def get_stock_amount_dict(self):
-        """generate stock amount dict {stock_id : amount of stock} """
+        """generate stock amount dict {stock_id : amount of stock}"""
        d = {}
        stock_list = self.get_stock_list()
        for stock_code in stock_list:
@@ -166,7 +166,7 @@ class Position:
    def save_position(self, path, last_trade_date):
        path = pathlib.Path(path)
        p = copy.deepcopy(self.position)
-        cash = pd.Series(dtype=np.float)
+        cash = pd.Series(dtype=float)
        cash["init_cash"] = self.init_cash
        cash["cash"] = p["cash"]
        cash["today_account_value"] = p["today_account_value"]
--- a/qlib/contrib/data/handler.py
+++ b/qlib/contrib/data/handler.py
@@ -26,6 +26,7 @@ def check_transform_proc(proc_l, fit_start_time, fit_end_time):
                        "fit_end_time": fit_end_time,
                    }
                )
+            # FIXME: the `module_path` parameter is missed.
            new_l.append({"class": klass.__name__, "kwargs": pkwargs})
        else:
            new_l.append(p)
@@ -43,16 +44,18 @@ _DEFAULT_INFER_PROCESSORS = [
 ]


-class ALPHA360(DataHandlerLP):
+class Alpha360(DataHandlerLP):
    def __init__(
        self,
        instruments="csi500",
        start_time=None,
        end_time=None,
+        freq="day",
        infer_processors=_DEFAULT_INFER_PROCESSORS,
        learn_processors=_DEFAULT_LEARN_PROCESSORS,
        fit_start_time=None,
        fit_end_time=None,
+        filter_pipe=None,
        **kwargs,
    ):
        infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
@@ -65,13 +68,15 @@ class ALPHA360(DataHandlerLP):
                    "feature": self.get_feature_config(),
                    "label": kwargs.get("label", self.get_label_config()),
                },
+                "filter_pipe": filter_pipe,
+                "freq": freq,
            },
        }

        super().__init__(
-            instruments,
-            start_time,
-            end_time,
+            instruments=instruments,
+            start_time=start_time,
+            end_time=end_time,
            data_loader=data_loader,
            learn_processors=learn_processors,
            infer_processors=infer_processors,
@@ -119,7 +124,7 @@ class ALPHA360(DataHandlerLP):
        return fields, names


-class ALPHA360vwap(ALPHA360):
+class Alpha360vwap(Alpha360):
    def get_label_config(self):
        return (["Ref($vwap, -2)/Ref($vwap, -1) - 1"], ["LABEL0"])

@@ -130,11 +135,13 @@ class Alpha158(DataHandlerLP):
        instruments="csi500",
        start_time=None,
        end_time=None,
+        freq="day",
        infer_processors=[],
        learn_processors=_DEFAULT_LEARN_PROCESSORS,
        fit_start_time=None,
        fit_end_time=None,
        process_type=DataHandlerLP.PTYPE_A,
+        filter_pipe=None,
        **kwargs,
    ):
        infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
@@ -143,13 +150,18 @@ class Alpha158(DataHandlerLP):
        data_loader = {
            "class": "QlibDataLoader",
            "kwargs": {
-                "config": {"feature": self.get_feature_config(), "label": kwargs.get("label", self.get_label_config())},
+                "config": {
+                    "feature": self.get_feature_config(),
+                    "label": kwargs.get("label", self.get_label_config()),
+                },
+                "filter_pipe": filter_pipe,
+                "freq": freq,
            },
        }
        super().__init__(
-            instruments,
-            start_time,
-            end_time,
+            instruments=instruments,
+            start_time=start_time,
+            end_time=end_time,
            data_loader=data_loader,
            infer_processors=infer_processors,
            learn_processors=learn_processors,
--- a/qlib/contrib/eva/alpha.py
+++ b/qlib/contrib/eva/alpha.py
@@ -8,6 +8,59 @@ import pandas as pd
 from typing import Tuple


+def calc_long_short_prec(
+    pred: pd.Series, label: pd.Series, date_col="datetime", quantile: float = 0.2, dropna=False, is_alpha=False
+) -> Tuple[pd.Series, pd.Series]:
+    """
+    calculate the precision for long and short operation
+
+
+    :param pred/label: index is **pd.MultiIndex**, index name is **[datetime, instruments]**; columns names is **[score]**.
+
+            .. code-block:: python
+                                                  score
+                datetime            instrument
+                2020-12-01 09:30:00 SH600068    0.553634
+                                    SH600195    0.550017
+                                    SH600276    0.540321
+                                    SH600584    0.517297
+                                    SH600715    0.544674
+    label :
+        label
+    date_col :
+        date_col
+
+    Returns
+    -------
+    (pd.Series, pd.Series)
+        long precision and short precision in time level
+    """
+    if is_alpha:
+        label = label - label.mean(level=date_col)
+    if int(1 / quantile) >= len(label.index.get_level_values(1).unique()):
+        raise ValueError("Need more instruments to calculate precision")
+
+    df = pd.DataFrame({"pred": pred, "label": label})
+    if dropna:
+        df.dropna(inplace=True)
+
+    group = df.groupby(level=date_col)
+
+    N = lambda x: int(len(x) * quantile)
+    # find the top/low quantile of prediction and treat them as long and short target
+    long = group.apply(lambda x: x.nlargest(N(x), columns="pred").label).reset_index(level=0, drop=True)
+    short = group.apply(lambda x: x.nsmallest(N(x), columns="pred").label).reset_index(level=0, drop=True)
+
+    groupll = long.groupby(date_col)
+    l_dom = groupll.apply(lambda x: x > 0)
+    l_c = groupll.count()
+
+    groups = short.groupby(date_col)
+    s_dom = groups.apply(lambda x: x < 0)
+    s_c = groups.count()
+    return (l_dom.groupby(date_col).sum() / l_c), (s_dom.groupby(date_col).sum() / s_c)
+
+
 def calc_ic(pred: pd.Series, label: pd.Series, date_col="datetime", dropna=False) -> Tuple[pd.Series, pd.Series]:
    """calc_ic.

--- a/qlib/contrib/evaluate.py
+++ b/qlib/contrib/evaluate.py
@@ -6,17 +6,16 @@ from __future__ import print_function

 import numpy as np
 import pandas as pd
-import inspect
+import warnings
 from ..log import get_module_logger
-from . import strategy as strategy_pool
-from .strategy.strategy import BaseStrategy
-from .backtest.exchange import Exchange
-from .backtest.backtest import backtest as backtest_func, get_date_range
+from .backtest import get_exchange, backtest as backtest_func
+from .backtest.backtest import get_date_range

 from ..data import D
 from ..config import C
 from ..data.dataset.utils import get_level_index

+
 logger = get_module_logger("Evaluate")


@@ -46,144 +45,6 @@ def risk_analysis(r, N=252):
    return res


-def get_strategy(
-    strategy=None,
-    topk=50,
-    margin=0.5,
-    n_drop=5,
-    risk_degree=0.95,
-    str_type="amount",
-    adjust_dates=None,
-):
-    """get_strategy
-
-    Parameters
-    ----------
-
-    strategy : Strategy()
-        strategy used in backtest.
-    topk : int (Default value: 50)
-        top-N stocks to buy.
-    margin : int or float(Default value: 0.5)
-        - if isinstance(margin, int):
-
-            sell_limit = margin
-
-        - else:
-
-            sell_limit = pred_in_a_day.count() * margin
-
-        buffer margin, in single score_mode, continue holding stock if it is in nlargest(sell_limit).
-        sell_limit should be no less than topk.
-    n_drop : int
-        number of stocks to be replaced in each trading date.
-    risk_degree: float
-        0-1, 0.95 for example, use 95% money to trade.
-    str_type: 'amount', 'weight' or 'dropout'
-        strategy type: TopkAmountStrategy ,TopkWeightStrategy or TopkDropoutStrategy.
-
-    Returns
-    -------
-    :class: Strategy
-    an initialized strategy object
-    """
-    if strategy is None:
-        str_cls_dict = {
-            "amount": "TopkAmountStrategy",
-            "weight": "TopkWeightStrategy",
-            "dropout": "TopkDropoutStrategy",
-        }
-        logger.info("Create new streategy ")
-        str_cls = getattr(strategy_pool, str_cls_dict.get(str_type))
-        strategy = str_cls(
-            topk=topk,
-            buffer_margin=margin,
-            n_drop=n_drop,
-            risk_degree=risk_degree,
-            adjust_dates=adjust_dates,
-        )
-    if not isinstance(strategy, BaseStrategy):
-        raise TypeError("Strategy not supported")
-    return strategy
-
-
-def get_exchange(
-    pred,
-    exchange=None,
-    subscribe_fields=[],
-    open_cost=0.0015,
-    close_cost=0.0025,
-    min_cost=5.0,
-    trade_unit=None,
-    limit_threshold=None,
-    deal_price=None,
-    extract_codes=False,
-    shift=1,
-):
-    """get_exchange
-
-    Parameters
-    ----------
-
-    # exchange related arguments
-    exchange: Exchange().
-    subscribe_fields: list
-        subscribe fields.
-    open_cost : float
-        open transaction cost.
-    close_cost : float
-        close transaction cost.
-    min_cost : float
-        min transaction cost.
-    trade_unit : int
-        100 for China A.
-    deal_price: str
-        dealing price type: 'close', 'open', 'vwap'.
-    limit_threshold : float
-        limit move 0.1 (10%) for example, long and short with same limit.
-    extract_codes: bool
-        will we pass the codes extracted from the pred to the exchange.
-        NOTE: This will be faster with offline qlib.
-
-    Returns
-    -------
-    :class: Exchange
-    an initialized Exchange object
-    """
-
-    if trade_unit is None:
-        trade_unit = C.trade_unit
-    if limit_threshold is None:
-        limit_threshold = C.limit_threshold
-    if deal_price is None:
-        deal_price = C.deal_price
-    if exchange is None:
-        logger.info("Create new exchange")
-        # handle exception for deal_price
-        if deal_price[0] != "$":
-            deal_price = "$" + deal_price
-        if extract_codes:
-            codes = sorted(pred.index.get_level_values("instrument").unique())
-        else:
-            codes = "all"  # TODO: We must ensure that 'all.txt' includes all the stocks
-
-        dates = sorted(pred.index.get_level_values("datetime").unique())
-        dates = np.append(dates, get_date_range(dates[-1], shift=shift))
-
-        exchange = Exchange(
-            trade_dates=dates,
-            codes=codes,
-            deal_price=deal_price,
-            subscribe_fields=subscribe_fields,
-            limit_threshold=limit_threshold,
-            open_cost=open_cost,
-            close_cost=close_cost,
-            min_cost=min_cost,
-            trade_unit=trade_unit,
-        )
-    return exchange
-
-
 # This is the API for compatibility for legacy code
 def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **kwargs):
    """This function will help you set a reasonable Exchange and provide default value for strategy
@@ -249,30 +110,22 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **k
        will we pass the codes extracted from the pred to the exchange.

        .. note:: This will be faster with offline qlib.
+
+    - **executor related arguments**
+
+    executor : BaseExecutor()
+        executor used in backtest.
+    verbose : bool
+        whether to print log.
+
    """
-    # check strategy:
-    spec = inspect.getfullargspec(get_strategy)
-    str_args = {k: v for k, v in kwargs.items() if k in spec.args}
-    strategy = get_strategy(**str_args)
-
-    # init exchange:
-    spec = inspect.getfullargspec(get_exchange)
-    ex_args = {k: v for k, v in kwargs.items() if k in spec.args}
-    trade_exchange = get_exchange(pred, **ex_args)
-
-    # run backtest
-    report_df, positions = backtest_func(
-        pred=pred,
-        strategy=strategy,
-        trade_exchange=trade_exchange,
-        shift=shift,
-        verbose=verbose,
-        account=account,
-        benchmark=benchmark,
+    warnings.warn(
+        "this function is deprecated, please use backtest function in qlib.contrib.backtest", DeprecationWarning
    )
-    # for  compatibility of the old API. return the dict positions
-    positions = {k: p.position for k, p in positions.items()}
-    return report_df, positions
+    report_dict = backtest_func(
+        pred=pred, account=account, shift=shift, benchmark=benchmark, verbose=verbose, return_order=False, **kwargs
+    )
+    return report_dict.get("report_df"), report_dict.get("positions")


 def long_short_backtest(
@@ -340,7 +193,7 @@ def long_short_backtest(

    _pred_dates = pred.index.get_level_values(level="datetime")
    predict_dates = D.calendar(start_time=_pred_dates.min(), end_time=_pred_dates.max())
-    trade_dates = np.append(predict_dates[shift:], get_date_range(predict_dates[-1], shift=shift))
+    trade_dates = np.append(predict_dates[shift:], get_date_range(predict_dates[-1], left_shift=1, right_shift=shift))

    long_returns = {}
    short_returns = {}
--- a/qlib/contrib/evaluate_portfolio.py
+++ b/qlib/contrib/evaluate_portfolio.py
@@ -61,7 +61,7 @@ def get_position_value(evaluate_date, position):
    # load close price for position
    # position should also consider cash
    instruments = list(position.keys())
-    instruments = list(set(instruments) - set(["cash"]))  # filter 'cash'
+    instruments = list(set(instruments) - {"cash"})  # filter 'cash'
    fields = ["$close"]
    close_data_df = D.features(
        instruments,
@@ -80,7 +80,7 @@ def get_position_list_value(positions):
    instruments = set()
    for day, position in positions.items():
        instruments.update(position.keys())
-    instruments = list(set(instruments) - set(["cash"]))  # filter 'cash'
+    instruments = list(set(instruments) - {"cash"})  # filter 'cash'
    instruments.sort()
    day_list = list(positions.keys())
    day_list.sort()
--- a/qlib/contrib/model/init.py
+++ b/qlib/contrib/model/init.py
@@ -0,0 +1,39 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+try:
+    from .catboost_model import CatBoostModel
+except ModuleNotFoundError:
+    CatBoostModel = None
+    print("Please install necessary libs for CatBoostModel.")
+try:
+    from .double_ensemble import DEnsembleModel
+    from .gbdt import LGBModel
+except ModuleNotFoundError:
+    DEnsembleModel, LGBModel = None, None
+    print("Please install necessary libs for DEnsembleModel and LGBModel, such as lightgbm.")
+try:
+    from .xgboost import XGBModel
+except ModuleNotFoundError:
+    XGBModel = None
+    print("Please install necessary libs for XGBModel, such as xgboost.")
+try:
+    from .linear import LinearModel
+except ModuleNotFoundError:
+    LinearModel = None
+    print("Please install necessary libs for LinearModel, such as scipy and sklearn.")
+# import pytorch models
+try:
+    from .pytorch_alstm import ALSTM
+    from .pytorch_gats import GATs
+    from .pytorch_gru import GRU
+    from .pytorch_lstm import LSTM
+    from .pytorch_nn import DNNModelPytorch
+    from .pytorch_tabnet import TabnetModel
+    from .pytorch_sfm import SFM_Model
+
+    pytorch_classes = (ALSTM, GATs, GRU, LSTM, DNNModelPytorch, TabnetModel, SFM_Model)
+except ModuleNotFoundError:
+    pytorch_classes = ()
+    print("Please install necessary libs for PyTorch models.")
+
+all_model_classes = (CatBoostModel, DEnsembleModel, LGBModel, XGBModel, LinearModel) + pytorch_classes
--- a/qlib/contrib/model/catboost_model.py
+++ b/qlib/contrib/model/catboost_model.py
@@ -3,15 +3,17 @@

 import numpy as np
 import pandas as pd
+from typing import Text, Union
 from catboost import Pool, CatBoost
 from catboost.utils import get_gpu_device_count

 from ...model.base import Model
 from ...data.dataset import DatasetH
 from ...data.dataset.handler import DataHandlerLP
+from ...model.interpret.base import FeatureInt


-class CatBoostModel(Model):
+class CatBoostModel(Model, FeatureInt):
    """CatBoost Model"""

    def __init__(self, loss="RMSE", **kwargs):
@@ -62,12 +64,24 @@ class CatBoostModel(Model):
        evals_result["train"] = list(evals_result["learn"].values())[0]
        evals_result["valid"] = list(evals_result["validation"].values())[0]

-    def predict(self, dataset):
+    def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
        if self.model is None:
            raise ValueError("model is not fitted yet!")
-        x_test = dataset.prepare("test", col_set="feature")
+        x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
        return pd.Series(self.model.predict(x_test.values), index=x_test.index)

+    def get_feature_importance(self, *args, **kwargs) -> pd.Series:
+        """get feature importance
+
+        Notes
+        -----
+            parameters references:
+            https://catboost.ai/docs/concepts/python-reference_catboost_get_feature_importance.html#python-reference_catboost_get_feature_importance
+        """
+        return pd.Series(
+            data=self.model.get_feature_importance(*args, **kwargs), index=self.model.feature_names_
+        ).sort_values(ascending=False)
+

 if __name__ == "__main__":
    cat = CatBoostModel()
--- a/qlib/contrib/model/double_ensemble.py
+++ b/qlib/contrib/model/double_ensemble.py
@@ -0,0 +1,265 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import lightgbm as lgb
+import numpy as np
+import pandas as pd
+from typing import Text, Union
+from ...model.base import Model
+from ...data.dataset import DatasetH
+from ...data.dataset.handler import DataHandlerLP
+from ...model.interpret.base import FeatureInt
+from ...log import get_module_logger
+
+
+class DEnsembleModel(Model, FeatureInt):
+    """Double Ensemble Model"""
+
+    def __init__(
+        self,
+        base_model="gbm",
+        loss="mse",
+        num_models=6,
+        enable_sr=True,
+        enable_fs=True,
+        alpha1=1.0,
+        alpha2=1.0,
+        bins_sr=10,
+        bins_fs=5,
+        decay=None,
+        sample_ratios=None,
+        sub_weights=None,
+        epochs=100,
+        **kwargs
+    ):
+        self.base_model = base_model  # "gbm" or "mlp", specifically, we use lgbm for "gbm"
+        self.num_models = num_models  # the number of sub-models
+        self.enable_sr = enable_sr
+        self.enable_fs = enable_fs
+        self.alpha1 = alpha1
+        self.alpha2 = alpha2
+        self.bins_sr = bins_sr
+        self.bins_fs = bins_fs
+        self.decay = decay
+        if sample_ratios is None:  # the default values for sample_ratios
+            sample_ratios = [0.8, 0.7, 0.6, 0.5, 0.4]
+        if sub_weights is None:  # the default values for sub_weights
+            sub_weights = [1.0, 0.2, 0.2, 0.2, 0.2, 0.2]
+        if not len(sample_ratios) == bins_fs:
+            raise ValueError("The length of sample_ratios should be equal to bins_fs.")
+        self.sample_ratios = sample_ratios
+        if not len(sub_weights) == num_models:
+            raise ValueError("The length of sub_weights should be equal to num_models.")
+        self.sub_weights = sub_weights
+        self.epochs = epochs
+        self.logger = get_module_logger("DEnsembleModel")
+        self.logger.info("Double Ensemble Model...")
+        self.ensemble = []  # the current ensemble model, a list contains all the sub-models
+        self.sub_features = []  # the features for each sub model in the form of pandas.Index
+        self.params = {"objective": loss}
+        self.params.update(kwargs)
+        self.loss = loss
+
+    def fit(self, dataset: DatasetH):
+        df_train, df_valid = dataset.prepare(
+            ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
+        )
+        x_train, y_train = df_train["feature"], df_train["label"]
+        # initialize the sample weights
+        N, F = x_train.shape
+        weights = pd.Series(np.ones(N, dtype=float))
+        # initialize the features
+        features = x_train.columns
+        pred_sub = pd.DataFrame(np.zeros((N, self.num_models), dtype=float), index=x_train.index)
+        # train sub-models
+        for k in range(self.num_models):
+            self.sub_features.append(features)
+            self.logger.info("Training sub-model: ({}/{})".format(k + 1, self.num_models))
+            model_k = self.train_submodel(df_train, df_valid, weights, features)
+            self.ensemble.append(model_k)
+            # no further sample re-weight and feature selection needed for the last sub-model
+            if k + 1 == self.num_models:
+                break
+
+            self.logger.info("Retrieving loss curve and loss values...")
+            loss_curve = self.retrieve_loss_curve(model_k, df_train, features)
+            pred_k = self.predict_sub(model_k, df_train, features)
+            pred_sub.iloc[:, k] = pred_k
+            pred_ensemble = pred_sub.iloc[:, : k + 1].mean(axis=1)
+            loss_values = pd.Series(self.get_loss(y_train.values.squeeze(), pred_ensemble.values))
+
+            if self.enable_sr:
+                self.logger.info("Sample re-weighting...")
+                weights = self.sample_reweight(loss_curve, loss_values, k + 1)
+
+            if self.enable_fs:
+                self.logger.info("Feature selection...")
+                features = self.feature_selection(df_train, loss_values)
+
+    def train_submodel(self, df_train, df_valid, weights, features):
+        dtrain, dvalid = self._prepare_data_gbm(df_train, df_valid, weights, features)
+        evals_result = dict()
+        model = lgb.train(
+            self.params,
+            dtrain,
+            num_boost_round=self.epochs,
+            valid_sets=[dtrain, dvalid],
+            valid_names=["train", "valid"],
+            verbose_eval=20,
+            evals_result=evals_result,
+        )
+        evals_result["train"] = list(evals_result["train"].values())[0]
+        evals_result["valid"] = list(evals_result["valid"].values())[0]
+        return model
+
+    def _prepare_data_gbm(self, df_train, df_valid, weights, features):
+        x_train, y_train = df_train["feature"].loc[:, features], df_train["label"]
+        x_valid, y_valid = df_valid["feature"].loc[:, features], df_valid["label"]
+
+        # Lightgbm need 1D array as its label
+        if y_train.values.ndim == 2 and y_train.values.shape[1] == 1:
+            y_train, y_valid = np.squeeze(y_train.values), np.squeeze(y_valid.values)
+        else:
+            raise ValueError("LightGBM doesn't support multi-label training")
+
+        dtrain = lgb.Dataset(x_train, label=y_train, weight=weights)
+        dvalid = lgb.Dataset(x_valid, label=y_valid)
+        return dtrain, dvalid
+
+    def sample_reweight(self, loss_curve, loss_values, k_th):
+        """
+        the SR module of Double Ensemble
+        :param loss_curve: the shape is NxT
+        the loss curve for the previous sub-model, where the element (i, t) if the error on the i-th sample
+        after the t-th iteration in the training of the previous sub-model.
+        :param loss_values: the shape is N
+        the loss of the current ensemble on the i-th sample.
+        :param k_th: the index of the current sub-model, starting from 1
+        :return: weights
+        the weights for all the samples.
+        """
+        # normalize loss_curve and loss_values with ranking
+        loss_curve_norm = loss_curve.rank(axis=0, pct=True)
+        loss_values_norm = (-loss_values).rank(pct=True)
+
+        # calculate l_start and l_end from loss_curve
+        N, T = loss_curve.shape
+        part = np.maximum(int(T * 0.1), 1)
+        l_start = loss_curve_norm.iloc[:, :part].mean(axis=1)
+        l_end = loss_curve_norm.iloc[:, -part:].mean(axis=1)
+
+        # calculate h-value for each sample
+        h1 = loss_values_norm
+        h2 = (l_end / l_start).rank(pct=True)
+        h = pd.DataFrame({"h_value": self.alpha1 * h1 + self.alpha2 * h2})
+
+        # calculate weights
+        h["bins"] = pd.cut(h["h_value"], self.bins_sr)
+        h_avg = h.groupby("bins")["h_value"].mean()
+        weights = pd.Series(np.zeros(N, dtype=float))
+        for i_b, b in enumerate(h_avg.index):
+            weights[h["bins"] == b] = 1.0 / (self.decay ** k_th * h_avg[i_b] + 0.1)
+        return weights
+
+    def feature_selection(self, df_train, loss_values):
+        """
+        the FS module of Double Ensemble
+        :param df_train: the shape is NxF
+        :param loss_values: the shape is N
+        the loss of the current ensemble on the i-th sample.
+        :return: res_feat: in the form of pandas.Index
+
+        """
+        x_train, y_train = df_train["feature"], df_train["label"]
+        features = x_train.columns
+        N, F = x_train.shape
+        g = pd.DataFrame({"g_value": np.zeros(F, dtype=float)})
+        M = len(self.ensemble)
+
+        # shuffle specific columns and calculate g-value for each feature
+        x_train_tmp = x_train.copy()
+        for i_f, feat in enumerate(features):
+            x_train_tmp.loc[:, feat] = np.random.permutation(x_train_tmp.loc[:, feat].values)
+            pred = pd.Series(np.zeros(N), index=x_train_tmp.index)
+            for i_s, submodel in enumerate(self.ensemble):
+                pred += (
+                    pd.Series(
+                        submodel.predict(x_train_tmp.loc[:, self.sub_features[i_s]].values), index=x_train_tmp.index
+                    )
+                    / M
+                )
+            loss_feat = self.get_loss(y_train.values.squeeze(), pred.values)
+            g.loc[i_f, "g_value"] = np.mean(loss_feat - loss_values) / (np.std(loss_feat - loss_values) + 1e-7)
+            x_train_tmp.loc[:, feat] = x_train.loc[:, feat].copy()
+
+        # one column in train features is all-nan # if g['g_value'].isna().any()
+        g["g_value"].replace(np.nan, 0, inplace=True)
+
+        # divide features into bins_fs bins
+        g["bins"] = pd.cut(g["g_value"], self.bins_fs)
+
+        # randomly sample features from bins to construct the new features
+        res_feat = []
+        sorted_bins = sorted(g["bins"].unique(), reverse=True)
+        for i_b, b in enumerate(sorted_bins):
+            b_feat = features[g["bins"] == b]
+            num_feat = int(np.ceil(self.sample_ratios[i_b] * len(b_feat)))
+            res_feat = res_feat + np.random.choice(b_feat, size=num_feat, replace=False).tolist()
+        return pd.Index(set(res_feat))
+
+    def get_loss(self, label, pred):
+        if self.loss == "mse":
+            return (label - pred) ** 2
+        else:
+            raise ValueError("not implemented yet")
+
+    def retrieve_loss_curve(self, model, df_train, features):
+        if self.base_model == "gbm":
+            num_trees = model.num_trees()
+            x_train, y_train = df_train["feature"].loc[:, features], df_train["label"]
+            # Lightgbm need 1D array as its label
+            if y_train.values.ndim == 2 and y_train.values.shape[1] == 1:
+                y_train = np.squeeze(y_train.values)
+            else:
+                raise ValueError("LightGBM doesn't support multi-label training")
+
+            N = x_train.shape[0]
+            loss_curve = pd.DataFrame(np.zeros((N, num_trees)))
+            pred_tree = np.zeros(N, dtype=float)
+            for i_tree in range(num_trees):
+                pred_tree += model.predict(x_train.values, start_iteration=i_tree, num_iteration=1)
+                loss_curve.iloc[:, i_tree] = self.get_loss(y_train, pred_tree)
+        else:
+            raise ValueError("not implemented yet")
+        return loss_curve
+
+    def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
+        if self.ensemble is None:
+            raise ValueError("model is not fitted yet!")
+        x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
+        pred = pd.Series(np.zeros(x_test.shape[0]), index=x_test.index)
+        for i_sub, submodel in enumerate(self.ensemble):
+            feat_sub = self.sub_features[i_sub]
+            pred += (
+                pd.Series(submodel.predict(x_test.loc[:, feat_sub].values), index=x_test.index)
+                * self.sub_weights[i_sub]
+            )
+        return pred
+
+    def predict_sub(self, submodel, df_data, features):
+        x_data, y_data = df_data["feature"].loc[:, features], df_data["label"]
+        pred_sub = pd.Series(submodel.predict(x_data.values), index=x_data.index)
+        return pred_sub
+
+    def get_feature_importance(self, *args, **kwargs) -> pd.Series:
+        """get feature importance
+
+        Notes
+        -----
+            parameters reference:
+            https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.Booster.html?highlight=feature_importance#lightgbm.Booster.feature_importance
+        """
+        res = []
+        for _model, _weight in zip(self.ensemble, self.sub_weights):
+            res.append(pd.Series(_model.feature_importance(*args, **kwargs), index=_model.feature_name()) * _weight)
+        return pd.concat(res, axis=1, sort=False).sum(axis=1).sort_values(ascending=False)
--- a/Show More
+++ b/Show More