diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml
index ec8ea5d69..488419d52 100644
--- a/.github/release-drafter.yml
+++ b/.github/release-drafter.yml
@@ -14,6 +14,9 @@ categories:
     label: 
       - 'doc'
       - 'documentation'
+  - title: '🧹 Maintenance'
+    label: 
+      - 'maintenance'
 change-template: '- $TITLE @$AUTHOR (#$NUMBER)'
 change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks.
 version-resolver:
@@ -30,4 +33,4 @@ version-resolver:
 template: |
   ## Changes
 
-  $CHANGES
\ No newline at end of file
+  $CHANGES
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
index db14fbf3b..e95a9e88c 100644
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -38,7 +38,7 @@ jobs:
         TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
       run: |
         twine upload dist/*
-        
+
   deploy_with_manylinux:
     runs-on: ubuntu-latest
     steps:
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
index b07bdf1e7..6ce457dfd 100644
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@@ -18,7 +18,8 @@ jobs:
         stale-issue-label: 'stale'
         stale-pr-label: 'stale'
         days-before-stale: 90
+        days-before-pr-stale: 365
         days-before-close: 5
         operations-per-run: 100
         exempt-issue-labels: 'bug,enhancement'
-        remove-stale-when-updated: true
\ No newline at end of file
+        remove-stale-when-updated: true
diff --git a/.github/workflows/test_qlib_from_pip.yml b/.github/workflows/test_qlib_from_pip.yml
index e6202e57e..f5db06ccb 100644
--- a/.github/workflows/test_qlib_from_pip.yml
+++ b/.github/workflows/test_qlib_from_pip.yml
@@ -8,6 +8,7 @@ on:
 
 jobs:
   build:
+    if: ${{ false }}  #  FIXME: temporarily disable... Due to we are rushing a feature
     timeout-minutes: 120
 
     runs-on: ${{ matrix.os }}
@@ -19,10 +20,20 @@ jobs:
 
     steps:
     - name: Test qlib from pip
-      uses: actions/checkout@v2
+      uses: actions/checkout@v3
+
+    # Since version 3.7 of python for MacOS is installed in CI, version 3.7.17, this version causes "_bz not found error".
+    # So we make the version number of python 3.7 for MacOS more specific.
+    # refs: https://github.com/actions/setup-python/issues/682
+    - name: Set up Python ${{ matrix.python-version }}
+      if: (matrix.os == 'macos-latest' && matrix.python-version == '3.7') || (matrix.os == 'macos-11' && matrix.python-version == '3.7')
+      uses: actions/setup-python@v4
+      with:
+        python-version: "3.7.16"
 
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
+      if: (matrix.os != 'macos-latest' || matrix.python-version != '3.7') && (matrix.os != 'macos-11' || matrix.python-version != '3.7')
+      uses: actions/setup-python@v4
       with:
         python-version: ${{ matrix.python-version }}
 
@@ -50,7 +61,9 @@ jobs:
 
     - name: Downloads dependencies data
       run: |
-        python scripts/get_data.py qlib_data --name qlib_data_simple --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn
+        cd ..
+        python -m qlib.run.get_data qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn
+        cd qlib
 
     - name: Test workflow by config
       run: |
diff --git a/.github/workflows/test_qlib_from_source.yml b/.github/workflows/test_qlib_from_source.yml
index 68dfe5b3f..7271287dc 100644
--- a/.github/workflows/test_qlib_from_source.yml
+++ b/.github/workflows/test_qlib_from_source.yml
@@ -20,18 +20,28 @@ jobs:
 
     steps:
     - name: Test qlib from source
-      uses: actions/checkout@v2
+      uses: actions/checkout@v3
+
+    # Since version 3.7 of python for MacOS is installed in CI, version 3.7.17, this version causes "_bz not found error".
+    # So we make the version number of python 3.7 for MacOS more specific.
+    # refs: https://github.com/actions/setup-python/issues/682
+    - name: Set up Python ${{ matrix.python-version }}
+      if: (matrix.os == 'macos-latest' && matrix.python-version == '3.7') || (matrix.os == 'macos-11' && matrix.python-version == '3.7')
+      uses: actions/setup-python@v4
+      with:
+        python-version: "3.7.16"
 
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
+      if: (matrix.os != 'macos-latest' || matrix.python-version != '3.7') && (matrix.os != 'macos-11' || matrix.python-version != '3.7')
+      uses: actions/setup-python@v4
       with:
         python-version: ${{ matrix.python-version }}
 
     - name: Update pip to the latest version
       # pip release version 23.1 on Apr.15 2023, CI failed to run, Please refer to #1495 ofr detailed logs.
-      # The pip version has been temporarily fixed to 23.0.1
+      # The pip version has been temporarily fixed to 23.0
       run: |
-        python -m pip install pip==23.0.1
+        python -m pip install pip==23.0
 
     - name: Installing pytorch for macos
       if: ${{ matrix.os == 'macos-11' || matrix.os == 'macos-latest' }}
@@ -54,7 +64,10 @@ jobs:
         python -m pip install -e .[dev]
 
     - name: Lint with Black
+      # Python 3.7 will use a black with low level. So we use python with higher version for black check
+      if: (matrix.python-version != '3.7')
       run: |
+        pip install -U black  # follow the latest version of black, previous Qlib dependency will downgrade black
         black . -l 120 --check --diff
 
     - name: Make html with sphinx
@@ -129,8 +142,7 @@ jobs:
     - name: Test data downloads
       run: |
         python scripts/get_data.py qlib_data --name qlib_data_simple --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn
-        azcopy copy https://qlibpublic.blob.core.windows.net/data/rl /tmp/qlibpublic/data --recursive
-        mv /tmp/qlibpublic/data tests/.data
+        python scripts/get_data.py download_data --file_name rl_data.zip --target_dir tests/.data/rl
 
     - name: Install Lightgbm for MacOS
       if: ${{ matrix.os == 'macos-11' || matrix.os == 'macos-latest' }}
diff --git a/.github/workflows/test_qlib_from_source_slow.yml b/.github/workflows/test_qlib_from_source_slow.yml
index f8e43fa17..1dfcc0179 100644
--- a/.github/workflows/test_qlib_from_source_slow.yml
+++ b/.github/workflows/test_qlib_from_source_slow.yml
@@ -20,18 +20,28 @@ jobs:
 
     steps:
     - name: Test qlib from source slow
-      uses: actions/checkout@v2
+      uses: actions/checkout@v3
+
+    # Since version 3.7 of python for MacOS is installed in CI, version 3.7.17, this version causes "_bz not found error".
+    # So we make the version number of python 3.7 for MacOS more specific.
+    # refs: https://github.com/actions/setup-python/issues/682
+    - name: Set up Python ${{ matrix.python-version }}
+      if: (matrix.os == 'macos-latest' && matrix.python-version == '3.7') || (matrix.os == 'macos-11' && matrix.python-version == '3.7')
+      uses: actions/setup-python@v4
+      with:
+        python-version: "3.7.16"
 
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
+      if: (matrix.os != 'macos-latest' || matrix.python-version != '3.7') && (matrix.os != 'macos-11' || matrix.python-version != '3.7')
+      uses: actions/setup-python@v4
       with:
         python-version: ${{ matrix.python-version }}
 
     - name: Set up Python tools
       # pip release version 23.1 on Apr.15 2023, CI failed to run, Please refer to #1495 ofr detailed logs.
-      # The pip version has been temporarily fixed to 23.0.1
+      # The pip version has been temporarily fixed to 23.0
       run: |
-        python -m pip install pip==23.0.1
+        python -m pip install pip==23.0
         pip install --upgrade cython numpy
         pip install -e .[dev]
 
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ea57aeb0e..15f00414c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
 -   repo: https://github.com/psf/black
-    rev: 22.6.0
+    rev: 23.7.0
     hooks:
     -   id: black
         args: ["qlib", "-l 120"]
@@ -9,4 +9,4 @@ repos:
     rev: 4.0.1
     hooks:
         - id: flake8
-          args: ["--ignore=E501,F541,E266,E402,W503,E731,E203"]
\ No newline at end of file
+          args: ["--ignore=E501,F541,E266,E402,W503,E731,E203"]
diff --git a/README.md b/README.md
index cedfdc348..539700a91 100644
--- a/README.md
+++ b/README.md
@@ -11,6 +11,7 @@
 Recent released features
 | Feature | Status |
 | --                      | ------    |
+| KRNN and Sandwich models | :chart_with_upwards_trend: [Released](https://github.com/microsoft/qlib/pull/1414/) on May 26, 2023 |
 | Release Qlib v0.9.0 | :octocat: [Released](https://github.com/microsoft/qlib/releases/tag/v0.9.0) on Dec 9, 2022 |
 | RL Learning Framework | :hammer: :chart_with_upwards_trend: Released on Nov 10, 2022. [#1332](https://github.com/microsoft/qlib/pull/1332), [#1322](https://github.com/microsoft/qlib/pull/1322), [#1316](https://github.com/microsoft/qlib/pull/1316),[#1299](https://github.com/microsoft/qlib/pull/1299),[#1263](https://github.com/microsoft/qlib/pull/1263), [#1244](https://github.com/microsoft/qlib/pull/1244), [#1169](https://github.com/microsoft/qlib/pull/1169), [#1125](https://github.com/microsoft/qlib/pull/1125), [#1076](https://github.com/microsoft/qlib/pull/1076)|
 | HIST and IGMTF models | :chart_with_upwards_trend: [Released](https://github.com/microsoft/qlib/pull/1040) on Apr 10, 2022 |
@@ -90,6 +91,7 @@ For more details, please refer to our paper ["Qlib: An AI-oriented Quantitative
               </ul>
             </li>
           <li type="circle"><a href="#adapting-to-market-dynamics">Adapting to Market Dynamics</a></li>
+          <li type="circle"><a href="#reinforcement-learning-modeling-continuous-decisions">Reinforcement Learning: modeling continuous decisions</a></li>
           </ul>
         </li>
       </td>
@@ -353,6 +355,8 @@ Here is a list of models built on `Qlib`.
 - [ADD based on pytorch (Hongshun Tang, et al.2020)](examples/benchmarks/ADD/)
 - [IGMTF based on pytorch (Wentao Xu, et al.2021)](examples/benchmarks/IGMTF/)
 - [HIST based on pytorch (Wentao Xu, et al.2021)](examples/benchmarks/HIST/)
+- [KRNN based on pytorch](examples/benchmarks/KRNN/)
+- [Sandwich based on pytorch](examples/benchmarks/Sandwich/)
 
 Your PR of new Quant models is highly welcomed.
 
@@ -389,6 +393,17 @@ Here is a list of solutions built on `Qlib`.
 - [Rolling Retraining](examples/benchmarks_dynamic/baseline/)
 - [DDG-DA on pytorch (Wendi, et al. AAAI 2022)](examples/benchmarks_dynamic/DDG-DA/)
 
+##  Reinforcement Learning: modeling continuous decisions
+Qlib now supports reinforcement learning, a feature designed to model continuous investment decisions. This functionality assists investors in optimizing their trading strategies by learning from interactions with the environment to maximize some notion of cumulative reward.
+
+Here is a list of solutions built on `Qlib` categorized by scenarios.
+
+### [RL for order execution](examples/rl_order_execution)
+[Here](https://qlib.readthedocs.io/en/latest/component/rl/overall.html#order-execution) is the introduction of this scenario.  All the methods below are compared [here](examples/rl_order_execution).
+- [TWAP](examples/rl_order_execution/exp_configs/backtest_twap.yml)
+- [PPO: "An End-to-End Optimal Trade Execution Framework based on Proximal Policy Optimization", IJCAL 2020](examples/rl_order_execution/exp_configs/backtest_ppo.yml)
+- [OPDS: "Universal Trading for Order Execution with Oracle Policy Distillation", AAAI 2021](examples/rl_order_execution/exp_configs/backtest_opds.yml)
+
 # Quant Dataset Zoo
 Dataset plays a very important role in Quant. Here is a list of the datasets built on `Qlib`:
 
diff --git a/docs/component/data.rst b/docs/component/data.rst
index 60e8d4fa1..5a2d458f6 100644
--- a/docs/component/data.rst
+++ b/docs/component/data.rst
@@ -119,7 +119,7 @@ Here are some example:
 for daily data:
   .. code-block:: bash
 
-    python scripts/get_data.py csv_data_cn --target_dir ~/.qlib/csv_data/cn_data
+    python scripts/get_data.py download_data --file_name csv_data_cn.zip --target_dir ~/.qlib/csv_data/cn_data
 
 for 1min data:
   .. code-block:: bash
diff --git a/docs/component/rl/guidance.rst b/docs/component/rl/guidance.rst
new file mode 100644
index 000000000..7f917d559
--- /dev/null
+++ b/docs/component/rl/guidance.rst
@@ -0,0 +1,32 @@
+
+========
+Guidance
+========
+.. currentmodule:: qlib
+
+QlibRL can help users quickly get started and conveniently implement quantitative strategies based on reinforcement learning(RL) algorithms. For different user groups, we recommend the following guidance to use QlibRL.
+
+Beginners to Reinforcement Learning Algorithms
+==============================================
+Whether you are a quantitative researcher who wants to understand what RL can do in trading or a learner who wants to get started with RL algorithms in trading scenarios, if you have limited knowledge of RL and want to shield various detailed settings to quickly get started with RL algorithms, we recommend the following sequence to learn qlibrl:
+ - Learn the fundamentals of RL in `part1 <https://qlib.readthedocs.io/en/latest/component/rl/overall.html#reinforcement-learning>`_.
+ - Understand the trading scenarios where RL methods can be applied in `part2 <https://qlib.readthedocs.io/en/latest/component/rl/overall.html#potential-application-scenarios-in-quantitative-trading>`_.
+ - Run the examples in `part3 <https://qlib.readthedocs.io/en/latest/component/rl/quickstart.html>`_ to solve trading problems using RL.
+ - If you want to further explore QlibRL and make some customizations, you need to first understand the framework of QlibRL in `part4 <https://qlib.readthedocs.io/en/latest/component/rl/framework.html>`_ and rewrite specific components according to your needs.
+
+Reinforcement Learning Algorithm Researcher
+==============================================
+If you are already familiar with existing RL algorithms and dedicated to researching RL algorithms but lack domain knowledge in the financial field, and you want to validate the effectiveness of your algorithms in financial trading scenarios, we recommend the following steps to get started with QlibRL:
+ - Understand the trading scenarios where RL methods can be applied in `part2 <https://qlib.readthedocs.io/en/latest/component/rl/overall.html#potential-application-scenarios-in-quantitative-trading>`_.
+ - Choose an RL application scenario (currently, QlibRL has implemented two scenario examples: order execution and algorithmic trading). Run the example in `part3 <https://qlib.readthedocs.io/en/latest/component/rl/quickstart.html>`_ to get it working.
+ - Modify the `policy <https://github.com/microsoft/qlib/blob/main/qlib/rl/order_execution/policy.py>`_ part to incorporate your own RL algorithm.
+
+Quantitative Researcher
+=======================
+If you have a certain level of financial domain knowledge and coding skills, and you want to explore the application of RL algorithms in the investment field, we recommend the following steps to explore QlibRL:
+ - Learn the fundamentals of RL in `part1 <https://qlib.readthedocs.io/en/latest/component/rl/overall.html#reinforcement-learning>`_.
+ - Understand the trading scenarios where RL methods can be applied in `part2 <https://qlib.readthedocs.io/en/latest/component/rl/overall.html#potential-application-scenarios-in-quantitative-trading>`_.
+ - Run the examples in `part3 <https://qlib.readthedocs.io/en/latest/component/rl/quickstart.html>`_ to solve trading problems using RL.
+ - Understand the framework of QlibRL in `part4 <https://qlib.readthedocs.io/en/latest/component/rl/framework.html>`_.
+ - Choose a suitable RL algorithm based on the characteristics of the problem you want to solve (currently, QlibRL supports PPO and DQN algorithms based on tianshou).
+ - Design the MDP (Markov Decision Process) process based on market trading rules and the problem you want to solve. Refer to the example in order execution and make corresponding modifications to the following modules: `State <https://github.com/microsoft/qlib/blob/main/qlib/rl/order_execution/state.py#L70>`_, `Metrics <https://github.com/microsoft/qlib/blob/main/qlib/rl/order_execution/state.py#L18>`_, `ActionInterpreter <https://github.com/microsoft/qlib/blob/main/qlib/rl/order_execution/interpreter.py#L199>`_, `StateInterpreter <https://github.com/microsoft/qlib/blob/main/qlib/rl/order_execution/interpreter.py#L68>`_, `Reward <https://github.com/microsoft/qlib/blob/main/qlib/rl/order_execution/reward.py>`_, `Observation <https://github.com/microsoft/qlib/blob/main/qlib/rl/order_execution/interpreter.py#L44>`_, `Simulator <https://github.com/microsoft/qlib/blob/main/qlib/rl/order_execution/simulator_simple.py>`_.
\ No newline at end of file
diff --git a/docs/component/rl/overall.rst b/docs/component/rl/overall.rst
index 4f59dd17a..f586a07e2 100644
--- a/docs/component/rl/overall.rst
+++ b/docs/component/rl/overall.rst
@@ -4,7 +4,7 @@ Reinforcement Learning in Quantitative Trading
 
 Reinforcement Learning
 ======================
-Different from supervised learning tasks such as classification tasks and regression tasks. Another important paradigm in machine learning is Reinforcement Learning, 
+Different from supervised learning tasks such as classification tasks and regression tasks. Another important paradigm in machine learning is Reinforcement Learning(RL), 
 which attempts to optimize an accumulative numerical reward signal by directly interacting with the environment under a few assumptions such as Markov Decision Process(MDP).
 
 As demonstrated in the following figure, an RL system consists of four elements, 1)the agent 2) the environment the agent interacts with 3) the policy that the agent follows to take actions on the environment and 4)the reward signal from the environment to the agent. 
@@ -25,26 +25,46 @@ The Qlib Reinforcement Learning toolkit (QlibRL) is an RL platform for quantitat
 
 Potential Application Scenarios in Quantitative Trading
 =======================================================
-RL methods have already achieved outstanding achievement in many applications, such as game playing, resource allocating, recommendation, marketing and advertising, etc.
-Investment is always a continuous process, taking the stock market as an example, investors need to control their positions and stock holdings by one or more buying and selling behaviors, to maximize the investment returns.
-Besides, each buy and sell decision is made by investors after fully considering the overall market information and stock information. 
-From the view of an investor, the process could be described as a continuous decision-making process generated according to interaction with the market, such problems could be solved by the RL algorithms. 
-Following are some scenarios where RL can potentially be used in quantitative investment.
-
-Portfolio Construction
-----------------------
-Portfolio construction is a process of selecting securities optimally by taking a minimum risk to achieve maximum returns. With an RL-based solution, an agent allocates stocks at every time step by obtaining information for each stock and the market. The key is to develop of policy for building a portfolio and make the policy able to pick the optimal portfolio. 
+RL methods have demonstrated remarkable achievements in various applications, including game playing, resource allocation, recommendation systems, marketing, and advertising.
+In the context of investment, which involves continuous decision-making, let's consider the example of the stock market. Investors strive to optimize their investment returns by effectively managing their positions and stock holdings through various buying and selling behaviors.
+Furthermore, investors carefully evaluate market conditions and stock-specific information before making each buying or selling decision. From an investor's perspective, this process can be viewed as a continuous decision-making process driven by interactions with the market. RL algorithms offer a promising approach to tackle such challenges.
+Here are several scenarios where RL holds potential for application in quantitative investment.
 
 Order Execution
 ---------------
-As a fundamental problem in algorithmic trading, order execution aims at fulfilling a specific trading order, either liquidation or acquirement, for a given instrument. Essentially, the goal of order execution is twofold: it not only requires to fulfill the whole order but also targets a more economical execution with maximizing profit gain (or minimizing capital loss). The order execution with only one order of liquidation or acquirement is called single-asset order execution.
+The order execution task is to execute orders efficiently while considering multiple factors, including optimal prices, minimizing trading costs, reducing market impact, maximizing order fullfill rates, and achieving execution within a specified time frame. RL can be applied to such tasks by incorporating these objectives into the reward function and action selection process. Specifically, the RL agent interacts with the market environment, observes the state from market information, and makes decisions on next step execution. The RL algorithm learns an optimal execution strategy through trial and error, aiming to maximize the expected cumulative reward, which incorporates the desired objectives.
 
-Considering stock investment always aim to pursue long-term maximized profits, it usually manifests as a sequential process of continuously adjusting the asset portfolios, execution for multiple orders, including order of liquidation and acquirement, brings more constraints and makes the sequence of execution for different orders should be considered, e.g. before executing an order to buy some stocks, we have to sell at least one stock. The order execution with multiple assets is called multi-asset order execution. 
+ - General Setting
+    - Environment: The environment represents the financial market where order execution takes place. It encompasses variables such as the order book dynamics, liquidity, price movements, and market conditions.
 
-According to the order execution’s trait of sequential decision-making, an RL-based solution could be applied to solve the order execution. With an RL-based solution, an agent optimizes execution strategy by interacting with the market environment. 
+    - State: The state refers to the information available to the RL agent at a given time step. It typically includes features such as the current order book state (bid-ask spread, order depth), historical price data, historical trading volume, market volatility, and any other relevant information that can aid in decision-making.
 
-With QlibRL, the RL algorithm in the above scenarios can be easily implemented.
+    - Action: The action is the decision made by the RL agent based on the observed state. In order execution, actions can include selecting the order size, price, and timing of execution.
 
-Nested Portfolio Construction and Order Executor
-------------------------------------------------
-QlibRL makes it possible to jointly optimize different levels of strategies/models/agents. Take `Nested Decision Execution Framework <https://github.com/microsoft/qlib/blob/main/examples/nested_decision_execution>`_ as an example, the optimization of order execution strategy and portfolio management strategies can interact with each other to maximize returns.
+    - Reward: The reward is a scalar signal that indicates the performance of the RL agent's action in the environment. The reward function is designed to encourage actions that lead to efficient and cost-effective order execution. It typically considers multiple objectives, such as maximizing price advantages, minimizing trading costs (including transaction fees and slippage), reducing market impact (the effect of the order on the market price) and maximizing order fullfill rates. 
+
+ - Scenarios
+    - Single-asset order execution: Single-asset order execution focuses on the task of executing a single order for a specific asset, such as a stock or a cryptocurrency. The primary objective is to execute the order efficiently while considering factors such as maximizing price advantages, minimizing trading costs, reducing market impact, and achieving a high fullfill rate. The RL agent interacts with the market environment and makes decisions on order size, price, and timing of execution for that particular asset. The goal is to learn an optimal execution strategy for the single asset, maximizing the expected cumulative reward while considering the specific dynamics and characteristics of that asset.
+
+    - Multi-asset order execution: Multi-asset order execution expands the order execution task to involve multiple assets or securities. It typically involves executing a portfolio of orders across different assets simultaneously or sequentially. Unlike single-asset order execution, the focus is not only on the execution of individual orders but also on managing the interactions and dependencies between different assets within the portfolio. The RL agent needs to make decisions on the order sizes, prices, and timings for each asset in the portfolio, considering their interdependencies, cash constraints, market conditions, and transaction costs. The goal is to learn an optimal execution strategy that balances the execution efficiency for each asset while considering the overall performance and objectives of the portfolio as a whole.
+   
+The choice of settings and RL algorithm depends on the specific requirements of the task, available data, and desired performance objectives. 
+
+Portfolio Construction
+----------------------
+Portfolio construction is a process of selecting and allocating assets in an investment portfolio. RL provides a framework to optimize portfolio management decisions by learning from interactions with the market environment and maximizing long-term returns while considering risk management.
+ - General Setting
+    - State: The state represents the current information about the market and the portfolio. It typically includes historical prices and volumes, technical indicators, and other relevant data.
+
+    - Action: The action corresponds to the decision of allocating capital to different assets in the portfolio. It determines the weights or proportions of investments in each asset.
+
+    - Reward: The reward is a metric that evaluates the performance of the portfolio. It can be defined in various ways, such as total return, risk-adjusted return, or other objectives like maximizing Sharpe ratio or minimizing drawdown.
+
+ - Scenarios
+    - Stock market: RL can be used to construct portfolios of stocks, where the agent learns to allocate capital among different stocks.
+
+    - Cryptocurrency market: RL can be applied to construct portfolios of cryptocurrencies, where the agent learns to make allocation decisions.
+
+    - Foreign exchange (Forex) market: RL can be used to construct portfolios of currency pairs, where the agent learns to allocate capital across different currencies based on exchange rate data, economic indicators, and other factors.
+
+Similarly, the choice of basic setting and algorithm depends on the specific requirements of the problem and the characteristics of the market.
\ No newline at end of file
diff --git a/docs/component/rl/toctree.rst b/docs/component/rl/toctree.rst
index d79d5e060..4b88de06e 100644
--- a/docs/component/rl/toctree.rst
+++ b/docs/component/rl/toctree.rst
@@ -5,6 +5,7 @@ Reinforcement Learning in Quantitative Trading
 ========================================================================
 
 .. toctree::
+    Guidance <guidance>
     Overall <overall>
     Quick Start <quickstart>
     Framework <framework>
diff --git a/docs/component/workflow.rst b/docs/component/workflow.rst
index 9b84ae4ca..19ba980a1 100644
--- a/docs/component/workflow.rst
+++ b/docs/component/workflow.rst
@@ -53,9 +53,7 @@ Below is a typical config file of ``qrun``.
             kwargs:
                 topk: 50
                 n_drop: 5
-                signal:
-                    - <MODEL>
-                    - <DATASET>
+                signal: <PRED>
         backtest:
             limit_threshold: 0.095
             account: 100000000
@@ -281,9 +279,7 @@ The following script is the configuration of `backtest` and the `strategy` used
             kwargs:
                 topk: 50
                 n_drop: 5
-                signal:
-                    - <MODEL>
-                    - <DATASET>
+                signal: <PRED>
         backtest:
             limit_threshold: 0.095
             account: 100000000
diff --git a/examples/benchmarks/ADARNN/workflow_config_adarnn_Alpha360.yaml b/examples/benchmarks/ADARNN/workflow_config_adarnn_Alpha360.yaml
index ac49d0145..ae2bad5cc 100644
--- a/examples/benchmarks/ADARNN/workflow_config_adarnn_Alpha360.yaml
+++ b/examples/benchmarks/ADARNN/workflow_config_adarnn_Alpha360.yaml
@@ -28,8 +28,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            model: <MODEL>
-            dataset: <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/ADD/workflow_config_add_Alpha360.yaml b/examples/benchmarks/ADD/workflow_config_add_Alpha360.yaml
index 033d4d22e..b2168a1b8 100644
--- a/examples/benchmarks/ADD/workflow_config_add_Alpha360.yaml
+++ b/examples/benchmarks/ADD/workflow_config_add_Alpha360.yaml
@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL>
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/ALSTM/workflow_config_alstm_Alpha158.yaml b/examples/benchmarks/ALSTM/workflow_config_alstm_Alpha158.yaml
index a8e89e360..568505ee3 100755
--- a/examples/benchmarks/ALSTM/workflow_config_alstm_Alpha158.yaml
+++ b/examples/benchmarks/ALSTM/workflow_config_alstm_Alpha158.yaml
@@ -36,9 +36,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/ALSTM/workflow_config_alstm_Alpha360.yaml b/examples/benchmarks/ALSTM/workflow_config_alstm_Alpha360.yaml
index 3aa8147fc..b345cacd9 100644
--- a/examples/benchmarks/ALSTM/workflow_config_alstm_Alpha360.yaml
+++ b/examples/benchmarks/ALSTM/workflow_config_alstm_Alpha360.yaml
@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha158.yaml b/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha158.yaml
index 2eb642741..635611ffa 100644
--- a/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha158.yaml
+++ b/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha158.yaml
@@ -14,9 +14,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha158_csi500.yaml b/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha158_csi500.yaml
index bb7c42fd0..c40f0f81a 100644
--- a/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha158_csi500.yaml
+++ b/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha158_csi500.yaml
@@ -14,9 +14,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha360.yaml b/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha360.yaml
index 982963eea..136ab7e6f 100644
--- a/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha360.yaml
+++ b/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha360.yaml
@@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha360_csi500.yaml b/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha360_csi500.yaml
index da4962b54..448140702 100644
--- a/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha360_csi500.yaml
+++ b/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha360_csi500.yaml
@@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha158.yaml b/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha158.yaml
index 85cc0a270..58a01d63a 100644
--- a/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha158.yaml
+++ b/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha158.yaml
@@ -14,9 +14,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha158_csi500.yaml b/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha158_csi500.yaml
index b2358c6bf..ea92fbc7c 100644
--- a/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha158_csi500.yaml
+++ b/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha158_csi500.yaml
@@ -14,9 +14,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha360.yaml b/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha360.yaml
index 74db1f362..edb5e960f 100644
--- a/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha360.yaml
+++ b/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha360.yaml
@@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha360_csi500.yaml b/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha360_csi500.yaml
index f10355f22..ec8afefb4 100644
--- a/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha360_csi500.yaml
+++ b/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha360_csi500.yaml
@@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_early_stop_Alpha158.yaml b/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_early_stop_Alpha158.yaml
index b3c38870e..3960aca15 100644
--- a/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_early_stop_Alpha158.yaml
+++ b/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_early_stop_Alpha158.yaml
@@ -14,9 +14,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/GATs/workflow_config_gats_Alpha158.yaml b/examples/benchmarks/GATs/workflow_config_gats_Alpha158.yaml
index e056bc845..0710f3181 100644
--- a/examples/benchmarks/GATs/workflow_config_gats_Alpha158.yaml
+++ b/examples/benchmarks/GATs/workflow_config_gats_Alpha158.yaml
@@ -35,9 +35,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/GATs/workflow_config_gats_Alpha360.yaml b/examples/benchmarks/GATs/workflow_config_gats_Alpha360.yaml
index 2effecd61..095e0bade 100644
--- a/examples/benchmarks/GATs/workflow_config_gats_Alpha360.yaml
+++ b/examples/benchmarks/GATs/workflow_config_gats_Alpha360.yaml
@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/GRU/workflow_config_gru_Alpha158.yaml b/examples/benchmarks/GRU/workflow_config_gru_Alpha158.yaml
index 7c525c12a..a2f03a230 100755
--- a/examples/benchmarks/GRU/workflow_config_gru_Alpha158.yaml
+++ b/examples/benchmarks/GRU/workflow_config_gru_Alpha158.yaml
@@ -36,9 +36,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/GRU/workflow_config_gru_Alpha360.yaml b/examples/benchmarks/GRU/workflow_config_gru_Alpha360.yaml
index 2daaa0136..f5d837a06 100644
--- a/examples/benchmarks/GRU/workflow_config_gru_Alpha360.yaml
+++ b/examples/benchmarks/GRU/workflow_config_gru_Alpha360.yaml
@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/HIST/workflow_config_hist_Alpha360.yaml b/examples/benchmarks/HIST/workflow_config_hist_Alpha360.yaml
index b3e96f485..cd50b3387 100644
--- a/examples/benchmarks/HIST/workflow_config_hist_Alpha360.yaml
+++ b/examples/benchmarks/HIST/workflow_config_hist_Alpha360.yaml
@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
@@ -89,4 +87,4 @@ task:
         - class: PortAnaRecord
           module_path: qlib.workflow.record_temp
           kwargs: 
-            config: *port_analysis_config
\ No newline at end of file
+            config: *port_analysis_config
diff --git a/examples/benchmarks/IGMTF/workflow_config_igmtf_Alpha360.yaml b/examples/benchmarks/IGMTF/workflow_config_igmtf_Alpha360.yaml
index 1fc908ea9..838e66064 100644
--- a/examples/benchmarks/IGMTF/workflow_config_igmtf_Alpha360.yaml
+++ b/examples/benchmarks/IGMTF/workflow_config_igmtf_Alpha360.yaml
@@ -28,8 +28,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            model: <MODEL>
-            dataset: <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/KRNN/README.md b/examples/benchmarks/KRNN/README.md
new file mode 100644
index 000000000..31af523e6
--- /dev/null
+++ b/examples/benchmarks/KRNN/README.md
@@ -0,0 +1,8 @@
+# KRNN
+* Code: [https://github.com/microsoft/FOST/blob/main/fostool/model/krnn.py](https://github.com/microsoft/FOST/blob/main/fostool/model/krnn.py)
+
+
+# Introductions about the settings/configs.
+* Torch_geometric is used in the original model in FOST, but we didn't use it.
+* make use your CUDA version matches the torch version to allow the usage of GPU, we use CUDA==10.2 and torch.__version__==1.12.1
+
diff --git a/examples/benchmarks/KRNN/requirements.txt b/examples/benchmarks/KRNN/requirements.txt
new file mode 100644
index 000000000..87d3b2dda
--- /dev/null
+++ b/examples/benchmarks/KRNN/requirements.txt
@@ -0,0 +1,2 @@
+numpy==1.23.4
+pandas==1.5.2
diff --git a/examples/benchmarks/KRNN/workflow_config_krnn_Alpha360.yaml b/examples/benchmarks/KRNN/workflow_config_krnn_Alpha360.yaml
new file mode 100644
index 000000000..b5a3e3bc0
--- /dev/null
+++ b/examples/benchmarks/KRNN/workflow_config_krnn_Alpha360.yaml
@@ -0,0 +1,89 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors:
+        - class: RobustZScoreNorm
+          kwargs:
+              fields_group: feature
+              clip_outlier: true
+        - class: Fillna
+          kwargs:
+              fields_group: feature
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"]
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy
+        kwargs:
+            signal: <PRED>
+            topk: 50
+            n_drop: 5
+    backtest:
+        start_time: 2017-01-01
+        end_time: 2020-08-01
+        account: 100000000
+        benchmark: *benchmark
+        exchange_kwargs:
+            limit_threshold: 0.095
+            deal_price: close
+            open_cost: 0.0005
+            close_cost: 0.0015
+            min_cost: 5
+task:
+    model:
+        class: KRNN
+        module_path: qlib.contrib.model.pytorch_krnn
+        kwargs:
+            fea_dim: 6
+            cnn_dim: 8
+            cnn_kernel_size: 3
+            rnn_dim: 8
+            rnn_dups: 2
+            rnn_layers: 2
+            n_epochs: 200
+            lr: 0.001
+            early_stop: 20
+            batch_size: 2000
+            metric: loss
+            GPU: 0
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha360
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            model: <MODEL>
+            dataset: <DATASET>
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
+
diff --git a/examples/benchmarks/LSTM/workflow_config_lstm_Alpha158.yaml b/examples/benchmarks/LSTM/workflow_config_lstm_Alpha158.yaml
index bf3738bc0..522f6443c 100755
--- a/examples/benchmarks/LSTM/workflow_config_lstm_Alpha158.yaml
+++ b/examples/benchmarks/LSTM/workflow_config_lstm_Alpha158.yaml
@@ -36,9 +36,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/LSTM/workflow_config_lstm_Alpha360.yaml b/examples/benchmarks/LSTM/workflow_config_lstm_Alpha360.yaml
index d550cacb2..e4f9b2fe9 100644
--- a/examples/benchmarks/LSTM/workflow_config_lstm_Alpha360.yaml
+++ b/examples/benchmarks/LSTM/workflow_config_lstm_Alpha360.yaml
@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/LightGBM/multi_freq_handler.py b/examples/benchmarks/LightGBM/multi_freq_handler.py
index b3e138192..1d4ba2b82 100644
--- a/examples/benchmarks/LightGBM/multi_freq_handler.py
+++ b/examples/benchmarks/LightGBM/multi_freq_handler.py
@@ -48,7 +48,6 @@ class Avg15minHandler(DataHandlerLP):
         )
 
     def loader_config(self):
-
         # Results for dataset: df: pd.DataFrame
         #   len(df.columns) == 6 + 6 * 16, len(df.index.get_level_values(level="datetime").unique()) == T
         #   df.columns: close0, close1, ..., close16, open0, ..., open16, ..., vwap16
diff --git a/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml b/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
index 2d441dea9..5ae316801 100644
--- a/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
+++ b/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
@@ -14,8 +14,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            model: <MODEL> 
-            dataset: <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158_csi500.yaml b/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158_csi500.yaml
index 327e7fffa..aa017bc9b 100644
--- a/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158_csi500.yaml
+++ b/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158_csi500.yaml
@@ -14,8 +14,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            model: <MODEL> 
-            dataset: <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158_multi_freq.yaml b/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158_multi_freq.yaml
index 6b58ea4bd..0e63b23f8 100644
--- a/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158_multi_freq.yaml
+++ b/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158_multi_freq.yaml
@@ -33,9 +33,7 @@ port_analysis_config: &port_analysis_config
         kwargs:
             topk: 50
             n_drop: 5
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
     backtest:
         verbose: False
         limit_threshold: 0.095
diff --git a/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha360.yaml b/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha360.yaml
index 053c5bd29..e43a390a2 100644
--- a/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha360.yaml
+++ b/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha360.yaml
@@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha360_csi500.yaml b/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha360_csi500.yaml
index 767050919..aa3ac8b5e 100644
--- a/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha360_csi500.yaml
+++ b/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha360_csi500.yaml
@@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/LightGBM/workflow_config_lightgbm_configurable_dataset.yaml b/examples/benchmarks/LightGBM/workflow_config_lightgbm_configurable_dataset.yaml
index f1ffc45da..7a784a5c8 100644
--- a/examples/benchmarks/LightGBM/workflow_config_lightgbm_configurable_dataset.yaml
+++ b/examples/benchmarks/LightGBM/workflow_config_lightgbm_configurable_dataset.yaml
@@ -29,9 +29,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/LightGBM/workflow_config_lightgbm_multi_freq.yaml b/examples/benchmarks/LightGBM/workflow_config_lightgbm_multi_freq.yaml
index 11b277ce6..af867a24e 100644
--- a/examples/benchmarks/LightGBM/workflow_config_lightgbm_multi_freq.yaml
+++ b/examples/benchmarks/LightGBM/workflow_config_lightgbm_multi_freq.yaml
@@ -31,9 +31,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/Linear/workflow_config_linear_Alpha158.yaml b/examples/benchmarks/Linear/workflow_config_linear_Alpha158.yaml
index 290a8bc42..e65dae250 100644
--- a/examples/benchmarks/Linear/workflow_config_linear_Alpha158.yaml
+++ b/examples/benchmarks/Linear/workflow_config_linear_Alpha158.yaml
@@ -27,9 +27,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/Linear/workflow_config_linear_Alpha158_csi500.yaml b/examples/benchmarks/Linear/workflow_config_linear_Alpha158_csi500.yaml
index 53e12b999..bff2e6a74 100644
--- a/examples/benchmarks/Linear/workflow_config_linear_Alpha158_csi500.yaml
+++ b/examples/benchmarks/Linear/workflow_config_linear_Alpha158_csi500.yaml
@@ -27,9 +27,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/Localformer/workflow_config_localformer_Alpha158.yaml b/examples/benchmarks/Localformer/workflow_config_localformer_Alpha158.yaml
index 7f5a78e74..e3200f129 100644
--- a/examples/benchmarks/Localformer/workflow_config_localformer_Alpha158.yaml
+++ b/examples/benchmarks/Localformer/workflow_config_localformer_Alpha158.yaml
@@ -36,9 +36,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/Localformer/workflow_config_localformer_Alpha360.yaml b/examples/benchmarks/Localformer/workflow_config_localformer_Alpha360.yaml
index 9de80a350..39c0093ac 100644
--- a/examples/benchmarks/Localformer/workflow_config_localformer_Alpha360.yaml
+++ b/examples/benchmarks/Localformer/workflow_config_localformer_Alpha360.yaml
@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/MLP/workflow_config_mlp_Alpha158.yaml b/examples/benchmarks/MLP/workflow_config_mlp_Alpha158.yaml
index b2012ba8c..6c85546ca 100644
--- a/examples/benchmarks/MLP/workflow_config_mlp_Alpha158.yaml
+++ b/examples/benchmarks/MLP/workflow_config_mlp_Alpha158.yaml
@@ -41,9 +41,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/MLP/workflow_config_mlp_Alpha158_csi500.yaml b/examples/benchmarks/MLP/workflow_config_mlp_Alpha158_csi500.yaml
index 8628898d3..745c9b017 100644
--- a/examples/benchmarks/MLP/workflow_config_mlp_Alpha158_csi500.yaml
+++ b/examples/benchmarks/MLP/workflow_config_mlp_Alpha158_csi500.yaml
@@ -41,9 +41,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/MLP/workflow_config_mlp_Alpha360.yaml b/examples/benchmarks/MLP/workflow_config_mlp_Alpha360.yaml
index 359e79202..b9cccd52e 100644
--- a/examples/benchmarks/MLP/workflow_config_mlp_Alpha360.yaml
+++ b/examples/benchmarks/MLP/workflow_config_mlp_Alpha360.yaml
@@ -29,9 +29,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/MLP/workflow_config_mlp_Alpha360_csi500.yaml b/examples/benchmarks/MLP/workflow_config_mlp_Alpha360_csi500.yaml
index 3862295f6..215633463 100644
--- a/examples/benchmarks/MLP/workflow_config_mlp_Alpha360_csi500.yaml
+++ b/examples/benchmarks/MLP/workflow_config_mlp_Alpha360_csi500.yaml
@@ -29,9 +29,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/README.md b/examples/benchmarks/README.md
index 24d3f5902..41799205e 100644
--- a/examples/benchmarks/README.md
+++ b/examples/benchmarks/README.md
@@ -26,7 +26,7 @@ The numbers shown below demonstrate the performance of the entire `workflow` of
 
 | Model Name                               | Dataset                             | IC          | ICIR        | Rank IC     | Rank ICIR   | Annualized Return | Information Ratio | Max Drawdown |
 |------------------------------------------|-------------------------------------|-------------|-------------|-------------|-------------|-------------------|-------------------|--------------|
-| TCN(Shaojie Bai, et al.)                 | Alpha158                            | 0.0275±0.00 | 0.2157±0.01 | 0.0411±0.00 | 0.3379±0.01 | 0.0190±0.02       | 0.2887±0.27       | -0.1202±0.03 |
+| TCN(Shaojie Bai, et al.)                 | Alpha158                            | 0.0279±0.00 | 0.2181±0.01 | 0.0421±0.00 | 0.3429±0.01 | 0.0262±0.02       | 0.4133±0.25       | -0.1090±0.03 |
 | TabNet(Sercan O. Arik, et al.)           | Alpha158                            | 0.0204±0.01 | 0.1554±0.07 | 0.0333±0.00 | 0.2552±0.05 | 0.0227±0.04       | 0.3676±0.54       | -0.1089±0.08 |
 | Transformer(Ashish Vaswani, et al.)      | Alpha158                            | 0.0264±0.00 | 0.2053±0.02 | 0.0407±0.00 | 0.3273±0.02 | 0.0273±0.02       | 0.3970±0.26       | -0.1101±0.02 |
 | GRU(Kyunghyun Cho, et al.)               | Alpha158(with selected 20 features) | 0.0315±0.00 | 0.2450±0.04 | 0.0428±0.00 | 0.3440±0.03 | 0.0344±0.02       | 0.5160±0.25       | -0.1017±0.02 |
@@ -68,6 +68,8 @@ The numbers shown below demonstrate the performance of the entire `workflow` of
 | TRA(Hengxu Lin, et al.)                   | Alpha360 | 0.0485±0.00 | 0.3787±0.03 | 0.0587±0.00 | 0.4756±0.03 | 0.0920±0.03       | 1.2789±0.42       | -0.0834±0.02 |
 | IGMTF(Wentao Xu, et al.)                  | Alpha360 | 0.0480±0.00 | 0.3589±0.02 | 0.0606±0.00 | 0.4773±0.01 | 0.0946±0.02       | 1.3509±0.25       | -0.0716±0.02 |
 | HIST(Wentao Xu, et al.)                   | Alpha360 | 0.0522±0.00 | 0.3530±0.01 | 0.0667±0.00 | 0.4576±0.01 | 0.0987±0.02       | 1.3726±0.27       | -0.0681±0.01 |
+| KRNN                                      | Alpha360 | 0.0173±0.01 | 0.1210±0.06 | 0.0270±0.01 | 0.2018±0.04 | -0.0465±0.05      | -0.5415±0.62      | -0.2919±0.13 |
+| Sandwich                                  | Alpha360 | 0.0258±0.00 | 0.1924±0.04 | 0.0337±0.00 | 0.2624±0.03 | 0.0005±0.03       | 0.0001±0.33       | -0.1752±0.05 |
 
 
 - The selected 20 features are based on the feature importance of a lightgbm-based model.
@@ -134,7 +136,7 @@ If you want to contribute your new models, you can follow the steps below.
     - `README.md`: a brief introduction to your models
     - `workflow_config_<model name>_<dataset>.yaml`: a configuration which can read by `qrun`. You are encouraged to run your model in all datasets.
 3. You can integrate your model as a module [in this folder](https://github.com/microsoft/qlib/tree/main/qlib/contrib/model).
-4. Please updated your results in the benchmark tables, e.g. [Alpha360](#alpha158-dataset), [Alpha158](#alpha158-dataset)(the values of each metric are the mean and std calculated based on 20 runs with different random seeds, if you don't have enough computational resource, you can ask for help in the PR).
+4. Please update your results in the above **Benchmark Tables**, e.g. [Alpha360](#alpha158-dataset), [Alpha158](#alpha158-dataset)(the values of each metric are the mean and std calculated based on **20 Runs** with different random seeds. You can accomplish the above operations through the automated [script](https://github.com/microsoft/qlib/blob/main/examples/run_all_model.py#LL286C22-L286C22) provided by Qlib, and get the final result in the .md file. if you don't have enough computational resource, you can ask for help in the PR).
 5. Update the info in the index page in the [news list](https://github.com/microsoft/qlib#newspaper-whats-new----sparkling_heart) and [model list](https://github.com/microsoft/qlib#quant-model-paper-zoo).
 
 Finally, you can send PR for review. ([here is an example](https://github.com/microsoft/qlib/pull/1040))
diff --git a/examples/benchmarks/SFM/workflow_config_sfm_Alpha360.yaml b/examples/benchmarks/SFM/workflow_config_sfm_Alpha360.yaml
index d750a9980..d992af342 100644
--- a/examples/benchmarks/SFM/workflow_config_sfm_Alpha360.yaml
+++ b/examples/benchmarks/SFM/workflow_config_sfm_Alpha360.yaml
@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/Sandwich/README.md b/examples/benchmarks/Sandwich/README.md
new file mode 100644
index 000000000..26f189a39
--- /dev/null
+++ b/examples/benchmarks/Sandwich/README.md
@@ -0,0 +1,8 @@
+# Sandwich
+* Code: [https://github.com/microsoft/FOST/blob/main/fostool/model/sandwich.py](https://github.com/microsoft/FOST/blob/main/fostool/model/sandwich.py)
+
+
+# Introductions about the settings/configs.
+* Torch_geometric is used in the original model in FOST, but we didn't use it.
+make use your CUDA version matches the torch version to allow the usage of GPU, we use CUDA==10.2 and torch.version==1.12.1
+
diff --git a/examples/benchmarks/Sandwich/requirements.txt b/examples/benchmarks/Sandwich/requirements.txt
new file mode 100644
index 000000000..87d3b2dda
--- /dev/null
+++ b/examples/benchmarks/Sandwich/requirements.txt
@@ -0,0 +1,2 @@
+numpy==1.23.4
+pandas==1.5.2
diff --git a/examples/benchmarks/Sandwich/workflow_config_sandwich_Alpha360.yaml b/examples/benchmarks/Sandwich/workflow_config_sandwich_Alpha360.yaml
new file mode 100644
index 000000000..29e67d67e
--- /dev/null
+++ b/examples/benchmarks/Sandwich/workflow_config_sandwich_Alpha360.yaml
@@ -0,0 +1,91 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors:
+        - class: RobustZScoreNorm
+          kwargs:
+              fields_group: feature
+              clip_outlier: true
+        - class: Fillna
+          kwargs:
+              fields_group: feature
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"]
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy
+        kwargs:
+            signal: <PRED>
+            topk: 50
+            n_drop: 5
+    backtest:
+        start_time: 2017-01-01
+        end_time: 2020-08-01
+        account: 100000000
+        benchmark: *benchmark
+        exchange_kwargs:
+            limit_threshold: 0.095
+            deal_price: close
+            open_cost: 0.0005
+            close_cost: 0.0015
+            min_cost: 5
+task:
+    model:
+        class: Sandwich
+        module_path: qlib.contrib.model.pytorch_sandwich
+        kwargs:
+            fea_dim: 6
+            cnn_dim_1: 16
+            cnn_dim_2: 16
+            cnn_kernel_size: 3
+            rnn_dim_1: 8
+            rnn_dim_2: 8
+            rnn_dups: 2
+            rnn_layers: 2
+            n_epochs: 200
+            lr: 0.001
+            early_stop: 20
+            batch_size: 2000
+            metric: loss
+            GPU: 0
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha360
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            model: <MODEL>
+            dataset: <DATASET>
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
+
diff --git a/examples/benchmarks/TCN/workflow_config_tcn_Alpha158.yaml b/examples/benchmarks/TCN/workflow_config_tcn_Alpha158.yaml
index c6f663f94..dcb7508a4 100755
--- a/examples/benchmarks/TCN/workflow_config_tcn_Alpha158.yaml
+++ b/examples/benchmarks/TCN/workflow_config_tcn_Alpha158.yaml
@@ -36,8 +36,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            model: <MODEL>
-            dataset: <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/TCN/workflow_config_tcn_Alpha360.yaml b/examples/benchmarks/TCN/workflow_config_tcn_Alpha360.yaml
index e383662fc..4756a93b2 100644
--- a/examples/benchmarks/TCN/workflow_config_tcn_Alpha360.yaml
+++ b/examples/benchmarks/TCN/workflow_config_tcn_Alpha360.yaml
@@ -28,8 +28,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            model: <MODEL>
-            dataset: <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/TCTS/workflow_config_tcts_Alpha360.yaml b/examples/benchmarks/TCTS/workflow_config_tcts_Alpha360.yaml
index 460a470bb..7adf97582 100644
--- a/examples/benchmarks/TCTS/workflow_config_tcts_Alpha360.yaml
+++ b/examples/benchmarks/TCTS/workflow_config_tcts_Alpha360.yaml
@@ -30,9 +30,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
@@ -95,4 +93,4 @@ task:
         - class: PortAnaRecord
           module_path: qlib.workflow.record_temp
           kwargs: 
-            config: *port_analysis_config
\ No newline at end of file
+            config: *port_analysis_config
diff --git a/examples/benchmarks/TFT/data_formatters/base.py b/examples/benchmarks/TFT/data_formatters/base.py
index 9df0448ba..9cdce6382 100644
--- a/examples/benchmarks/TFT/data_formatters/base.py
+++ b/examples/benchmarks/TFT/data_formatters/base.py
@@ -139,7 +139,6 @@ class GenericDataFormatter(abc.ABC):
         # Sanity checks first.
         # Ensure only one ID and time column exist
         def _check_single_column(input_type):
-
             length = len([tup for tup in column_definition if tup[2] == input_type])
 
             if length != 1:
diff --git a/examples/benchmarks/TFT/expt_settings/configs.py b/examples/benchmarks/TFT/expt_settings/configs.py
index 62aa68c38..55eb32a0b 100644
--- a/examples/benchmarks/TFT/expt_settings/configs.py
+++ b/examples/benchmarks/TFT/expt_settings/configs.py
@@ -78,7 +78,6 @@ class ExperimentConfig:
 
     @property
     def hyperparam_iterations(self):
-
         return 240 if self.experiment == "volatility" else 60
 
     def make_data_formatter(self):
diff --git a/examples/benchmarks/TFT/libs/hyperparam_opt.py b/examples/benchmarks/TFT/libs/hyperparam_opt.py
index e18f5b716..86f587d7d 100644
--- a/examples/benchmarks/TFT/libs/hyperparam_opt.py
+++ b/examples/benchmarks/TFT/libs/hyperparam_opt.py
@@ -88,7 +88,6 @@ class HyperparamOptManager:
         params_file = os.path.join(self.hyperparam_folder, "params.csv")
 
         if os.path.exists(results_file) and os.path.exists(params_file):
-
             self.results = pd.read_csv(results_file, index_col=0)
             self.saved_params = pd.read_csv(params_file, index_col=0)
 
@@ -178,7 +177,6 @@ class HyperparamOptManager:
             return parameters
 
         for _ in range(self._max_tries):
-
             parameters = _get_next()
             name = self._get_name(parameters)
 
diff --git a/examples/benchmarks/TFT/libs/tft_model.py b/examples/benchmarks/TFT/libs/tft_model.py
index aa055e294..2a1a2fa15 100644
--- a/examples/benchmarks/TFT/libs/tft_model.py
+++ b/examples/benchmarks/TFT/libs/tft_model.py
@@ -475,7 +475,6 @@ class TemporalFusionTransformer:
 
         embeddings = []
         for i in range(num_categorical_variables):
-
             embedding = tf.keras.Sequential(
                 [
                     tf.keras.layers.InputLayer([time_steps]),
@@ -680,7 +679,6 @@ class TemporalFusionTransformer:
 
         data_map = {}
         for _, sliced in data.groupby(id_col):
-
             col_mappings = {"identifier": [id_col], "time": [time_col], "outputs": [target_col], "inputs": input_cols}
 
             for k in col_mappings:
@@ -954,7 +952,6 @@ class TemporalFusionTransformer:
         """
 
         with tf.variable_scope(self.name):
-
             transformer_layer, all_inputs, attention_components = self._build_base_graph()
 
             outputs = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(self.output_size * len(self.quantiles)))(
diff --git a/examples/benchmarks/TFT/workflow_config_tft_Alpha158.yaml b/examples/benchmarks/TFT/workflow_config_tft_Alpha158.yaml
index d83878e3e..e925fb772 100644
--- a/examples/benchmarks/TFT/workflow_config_tft_Alpha158.yaml
+++ b/examples/benchmarks/TFT/workflow_config_tft_Alpha158.yaml
@@ -16,9 +16,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/TRA/example.py b/examples/benchmarks/TRA/example.py
index defacf412..0d52c8775 100644
--- a/examples/benchmarks/TRA/example.py
+++ b/examples/benchmarks/TRA/example.py
@@ -6,7 +6,6 @@ from qlib.utils import init_instance_by_config
 
 
 def main(seed, config_file="configs/config_alstm.yaml"):
-
     # set random seed
     with open(config_file) as f:
         config = yaml.safe_load(f)
@@ -30,7 +29,6 @@ def main(seed, config_file="configs/config_alstm.yaml"):
 
 
 if __name__ == "__main__":
-
     # set params from cmd
     parser = argparse.ArgumentParser(allow_abbrev=False)
     parser.add_argument("--seed", type=int, default=1000, help="random seed")
diff --git a/examples/benchmarks/TRA/src/dataset.py b/examples/benchmarks/TRA/src/dataset.py
index 6740b1cbd..de4b2ad41 100644
--- a/examples/benchmarks/TRA/src/dataset.py
+++ b/examples/benchmarks/TRA/src/dataset.py
@@ -96,7 +96,6 @@ class MTSDatasetH(DatasetH):
         drop_last=False,
         **kwargs,
     ):
-
         assert horizon > 0, "please specify `horizon` to avoid data leakage"
 
         self.seq_len = seq_len
@@ -111,7 +110,6 @@ class MTSDatasetH(DatasetH):
         super().__init__(handler, segments, **kwargs)
 
     def setup_data(self, handler_kwargs: dict = None, **kwargs):
-
         super().setup_data()
 
         # change index to <code, date>
diff --git a/examples/benchmarks/TRA/src/model.py b/examples/benchmarks/TRA/src/model.py
index cff94388e..affb115a1 100644
--- a/examples/benchmarks/TRA/src/model.py
+++ b/examples/benchmarks/TRA/src/model.py
@@ -45,7 +45,6 @@ class TRAModel(Model):
         avg_params=True,
         **kwargs,
     ):
-
         np.random.seed(seed)
         torch.manual_seed(seed)
 
@@ -93,7 +92,6 @@ class TRAModel(Model):
         self.global_step = -1
 
     def train_epoch(self, data_set):
-
         self.model.train()
         self.tra.train()
 
@@ -146,7 +144,6 @@ class TRAModel(Model):
         return total_loss
 
     def test_epoch(self, data_set, return_pred=False):
-
         self.model.eval()
         self.tra.eval()
         data_set.eval()
@@ -204,7 +201,6 @@ class TRAModel(Model):
         return metrics, preds
 
     def fit(self, dataset, evals_result=dict()):
-
         train_set, valid_set, test_set = dataset.prepare(["train", "valid", "test"])
 
         best_score = -1
@@ -380,7 +376,6 @@ class LSTM(nn.Module):
             self.output_size = hidden_size
 
     def forward(self, x):
-
         x = self.input_drop(x)
 
         if self.training and self.noise_level > 0:
@@ -464,7 +459,6 @@ class Transformer(nn.Module):
         self.output_size = hidden_size
 
     def forward(self, x):
-
         x = self.input_drop(x)
 
         if self.training and self.noise_level > 0:
@@ -514,7 +508,6 @@ class TRA(nn.Module):
         self.predictors = nn.Linear(input_size, num_states)
 
     def forward(self, hidden, hist_loss):
-
         preds = self.predictors(hidden)
 
         if self.num_states == 1:
diff --git a/examples/benchmarks/TRA/workflow_config_tra_Alpha158.yaml b/examples/benchmarks/TRA/workflow_config_tra_Alpha158.yaml
index c86f87fc6..02c4ecac3 100644
--- a/examples/benchmarks/TRA/workflow_config_tra_Alpha158.yaml
+++ b/examples/benchmarks/TRA/workflow_config_tra_Alpha158.yaml
@@ -57,9 +57,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/TRA/workflow_config_tra_Alpha158_full.yaml b/examples/benchmarks/TRA/workflow_config_tra_Alpha158_full.yaml
index 75f18f3ee..9ccf56e86 100644
--- a/examples/benchmarks/TRA/workflow_config_tra_Alpha158_full.yaml
+++ b/examples/benchmarks/TRA/workflow_config_tra_Alpha158_full.yaml
@@ -51,9 +51,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/TRA/workflow_config_tra_Alpha360.yaml b/examples/benchmarks/TRA/workflow_config_tra_Alpha360.yaml
index 9ab5b904b..29686d7da 100644
--- a/examples/benchmarks/TRA/workflow_config_tra_Alpha360.yaml
+++ b/examples/benchmarks/TRA/workflow_config_tra_Alpha360.yaml
@@ -51,9 +51,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha158.yaml b/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha158.yaml
index d9b94e86c..7549688b9 100644
--- a/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha158.yaml
+++ b/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha158.yaml
@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha360.yaml b/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha360.yaml
index 830943d6b..7155d25b1 100644
--- a/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha360.yaml
+++ b/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha360.yaml
@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/Transformer/workflow_config_transformer_Alpha158.yaml b/examples/benchmarks/Transformer/workflow_config_transformer_Alpha158.yaml
index e36d44c43..ce5105108 100644
--- a/examples/benchmarks/Transformer/workflow_config_transformer_Alpha158.yaml
+++ b/examples/benchmarks/Transformer/workflow_config_transformer_Alpha158.yaml
@@ -36,9 +36,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/Transformer/workflow_config_transformer_Alpha360.yaml b/examples/benchmarks/Transformer/workflow_config_transformer_Alpha360.yaml
index cab46a4d4..35342de94 100644
--- a/examples/benchmarks/Transformer/workflow_config_transformer_Alpha360.yaml
+++ b/examples/benchmarks/Transformer/workflow_config_transformer_Alpha360.yaml
@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/XGBoost/workflow_config_xgboost_Alpha158.yaml b/examples/benchmarks/XGBoost/workflow_config_xgboost_Alpha158.yaml
index 5ee38cf70..0c7f55d02 100644
--- a/examples/benchmarks/XGBoost/workflow_config_xgboost_Alpha158.yaml
+++ b/examples/benchmarks/XGBoost/workflow_config_xgboost_Alpha158.yaml
@@ -14,9 +14,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks/XGBoost/workflow_config_xgboost_Alpha360.yaml b/examples/benchmarks/XGBoost/workflow_config_xgboost_Alpha360.yaml
index 7c98bd40c..8e7b54372 100644
--- a/examples/benchmarks/XGBoost/workflow_config_xgboost_Alpha360.yaml
+++ b/examples/benchmarks/XGBoost/workflow_config_xgboost_Alpha360.yaml
@@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
         class: TopkDropoutStrategy
         module_path: qlib.contrib.strategy
         kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
             topk: 50
             n_drop: 5
     backtest:
diff --git a/examples/benchmarks_dynamic/DDG-DA/Makefile b/examples/benchmarks_dynamic/DDG-DA/Makefile
new file mode 100644
index 000000000..c6cf5206e
--- /dev/null
+++ b/examples/benchmarks_dynamic/DDG-DA/Makefile
@@ -0,0 +1,4 @@
+.PHONY: clean
+
+clean:
+	-rm -r *.pkl mlruns || true
diff --git a/examples/benchmarks_dynamic/DDG-DA/README.md b/examples/benchmarks_dynamic/DDG-DA/README.md
index 4d49315bd..ac4349d91 100644
--- a/examples/benchmarks_dynamic/DDG-DA/README.md
+++ b/examples/benchmarks_dynamic/DDG-DA/README.md
@@ -16,12 +16,12 @@ Though the dataset is different, the conclusion remains the same. By applying `D
 # Run the Code
 Users can try `DDG-DA` by running the following command:
 ```bash
-    python workflow.py run_all
+    python workflow.py run
 ```
 
 The default forecasting models are `Linear`. Users can choose other forecasting models by changing the `forecast_model` parameter when `DDG-DA` initializes. For example, users can try `LightGBM` forecasting models by running the following command:
 ```bash
-    python workflow.py --forecast_model="gbdt" run_all
+    python workflow.py --conf_path=../workflow_config_lightgbm_Alpha158.yaml run
 ```
 
 # Results
diff --git a/examples/benchmarks_dynamic/DDG-DA/workflow.py b/examples/benchmarks_dynamic/DDG-DA/workflow.py
index b69107549..7593fe374 100644
--- a/examples/benchmarks_dynamic/DDG-DA/workflow.py
+++ b/examples/benchmarks_dynamic/DDG-DA/workflow.py
@@ -1,302 +1,40 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 from pathlib import Path
-from qlib.model.meta.task import MetaTask
-from qlib.contrib.meta.data_selection.model import MetaModelDS
-from qlib.contrib.meta.data_selection.dataset import InternalData, MetaDatasetDS
-from qlib.data.dataset.handler import DataHandlerLP
+from typing import Union
 
-import pandas as pd
 import fire
-import sys
-import pickle
-from typing import Optional
+
 from qlib import auto_init
-from qlib.model.trainer import TrainerR
-from qlib.typehint import Literal
-from qlib.utils import init_instance_by_config
-from qlib.workflow import R
+from qlib.contrib.rolling.ddgda import DDGDA
 from qlib.tests.data import GetData
 
 DIRNAME = Path(__file__).absolute().resolve().parent
-sys.path.append(str(DIRNAME.parent / "baseline"))
-from rolling_benchmark import RollingBenchmark  # NOTE: sys.path is changed for import RollingBenchmark
+BENCH_DIR = DIRNAME.parent / "baseline"
 
 
-class DDGDA:
-    """
-    please run `python workflow.py run_all` to run the full workflow of the experiment
+class DDGDABench(DDGDA):
+    # The config in the README.md
+    CONF_LIST = [
+        BENCH_DIR / "workflow_config_linear_Alpha158.yaml",
+        BENCH_DIR / "workflow_config_lightgbm_Alpha158.yaml",
+    ]
 
-    **NOTE**
-    before running the example, please clean your previous results with following command
-    - `rm -r mlruns`
-    """
+    DEFAULT_CONF = CONF_LIST[0]  # Linear by default due to efficiency
 
-    def __init__(
-        self,
-        sim_task_model: Literal["linear", "gbdt"] = "linear",
-        forecast_model: Literal["linear", "gbdt"] = "linear",
-        h_path: Optional[str] = None,
-        test_end: Optional[str] = None,
-        train_start: Optional[str] = None,
-        meta_1st_train_end: Optional[str] = None,
-        task_ext_conf: Optional[dict] = None,
-        alpha: float = 0.0,
-        proxy_hd: str = "handler_proxy.pkl",
-    ):
-        """
+    def __init__(self, conf_path: Union[str, Path] = DEFAULT_CONF, horizon=20, **kwargs) -> None:
+        # This code is for being compatible with the previous old code
+        conf_path = Path(conf_path)
+        super().__init__(conf_path=conf_path, horizon=horizon, working_dir=DIRNAME, **kwargs)
 
-        Parameters
-        ----------
-
-        train_start: Optional[str]
-            the start datetime for data.  It is used in training start time (for both tasks & meta learing)
-        test_end: Optional[str]
-            the end datetime for data. It is used in test end time
-        meta_1st_train_end: Optional[str]
-            the datetime of training end of the first meta_task
-        alpha: float
-            Setting the L2 regularization for ridge
-            The `alpha` is only passed to MetaModelDS (it is not passed to sim_task_model currently..)
-        """
-        self.step = 20
-        # NOTE:
-        # the horizon must match the meaning in the base task template
-        self.horizon = 20
-        self.meta_exp_name = "DDG-DA"
-        self.sim_task_model = sim_task_model  # The model to capture the distribution of data.
-        self.forecast_model = forecast_model  # downstream forecasting models' type
-        self.rb_kwargs = {
-            "h_path": h_path,
-            "test_end": test_end,
-            "train_start": train_start,
-            "task_ext_conf": task_ext_conf,
-        }
-        self.alpha = alpha
-        self.meta_1st_train_end = meta_1st_train_end
-        self.proxy_hd = proxy_hd
-
-    def get_feature_importance(self):
-        # this must be lightGBM, because it needs to get the feature importance
-        rb = RollingBenchmark(model_type="gbdt", **self.rb_kwargs)
-        task = rb.basic_task()
-
-        with R.start(experiment_name="feature_importance"):
-            model = init_instance_by_config(task["model"])
-            dataset = init_instance_by_config(task["dataset"])
-            model.fit(dataset)
-
-        fi = model.get_feature_importance()
-
-        # Because the model use numpy instead of dataframe for training lightgbm
-        # So the we must use following extra steps to get the right feature importance
-        df = dataset.prepare(segments=slice(None), col_set="feature", data_key=DataHandlerLP.DK_R)
-        cols = df.columns
-        fi_named = {cols[int(k.split("_")[1])]: imp for k, imp in fi.to_dict().items()}
-
-        return pd.Series(fi_named)
-
-    def dump_data_for_proxy_model(self):
-        """
-        Dump data for training meta model.
-        The meta model will be trained upon the proxy forecasting model.
-        This dataset is for the proxy forecasting model.
-        """
-        topk = 30
-        fi = self.get_feature_importance()
-        col_selected = fi.nlargest(topk)
-
-        rb = RollingBenchmark(model_type=self.sim_task_model, **self.rb_kwargs)
-        task = rb.basic_task()
-        dataset = init_instance_by_config(task["dataset"])
-        prep_ds = dataset.prepare(slice(None), col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
-
-        feature_df = prep_ds["feature"]
-        label_df = prep_ds["label"]
-
-        feature_selected = feature_df.loc[:, col_selected.index]
-
-        feature_selected = feature_selected.groupby("datetime").apply(lambda df: (df - df.mean()).div(df.std()))
-        feature_selected = feature_selected.fillna(0.0)
-
-        df_all = {
-            "label": label_df.reindex(feature_selected.index),
-            "feature": feature_selected,
-        }
-        df_all = pd.concat(df_all, axis=1)
-        df_all.to_pickle(DIRNAME / "fea_label_df.pkl")
-
-        # dump data in handler format for aligning the interface
-        handler = DataHandlerLP(
-            data_loader={
-                "class": "qlib.data.dataset.loader.StaticDataLoader",
-                "kwargs": {"config": DIRNAME / "fea_label_df.pkl"},
-            }
-        )
-        handler.to_pickle(DIRNAME / self.proxy_hd, dump_all=True)
-
-    @property
-    def _internal_data_path(self):
-        return DIRNAME / f"internal_data_s{self.step}.pkl"
-
-    def dump_meta_ipt(self):
-        """
-        Dump data for training meta model.
-        This function will dump the input data for meta model
-        """
-        # According to the experiments, the choice of the model type is very important for achieving good results
-        rb = RollingBenchmark(model_type=self.sim_task_model, **self.rb_kwargs)
-        sim_task = rb.basic_task()
-
-        if self.sim_task_model == "gbdt":
-            sim_task["model"].setdefault("kwargs", {}).update({"early_stopping_rounds": None, "num_boost_round": 150})
-
-        exp_name_sim = f"data_sim_s{self.step}"
-
-        internal_data = InternalData(sim_task, self.step, exp_name=exp_name_sim)
-        internal_data.setup(trainer=TrainerR)
-
-        with self._internal_data_path.open("wb") as f:
-            pickle.dump(internal_data, f)
-
-    def train_meta_model(self, fill_method="max"):
-        """
-        training a meta model based on a simplified linear proxy model;
-        """
-
-        # 1) leverage the simplified proxy forecasting model to train meta model.
-        # - Only the dataset part is important, in current version of meta model will integrate the
-        rb = RollingBenchmark(model_type=self.sim_task_model, **self.rb_kwargs)
-        sim_task = rb.basic_task()
-        train_start = self.rb_kwargs.get("train_start", "2008-01-01")
-        train_end = "2010-12-31" if self.meta_1st_train_end is None else self.meta_1st_train_end
-        test_start = (pd.Timestamp(train_end) + pd.Timedelta(days=1)).strftime("%Y-%m-%d")
-        proxy_forecast_model_task = {
-            # "model": "qlib.contrib.model.linear.LinearModel",
-            "dataset": {
-                "class": "qlib.data.dataset.DatasetH",
-                "kwargs": {
-                    "handler": f"file://{(DIRNAME / self.proxy_hd).absolute()}",
-                    "segments": {
-                        "train": (train_start, train_end),
-                        "test": (test_start, sim_task["dataset"]["kwargs"]["segments"]["test"][1]),
-                    },
-                },
-            },
-            # "record": ["qlib.workflow.record_temp.SignalRecord"]
-        }
-        # the proxy_forecast_model_task will be used to create meta tasks.
-        # The test date of first task will be 2011-01-01. Each test segment will be about 20days
-        # The tasks include all training tasks and test tasks.
-
-        # 2) preparing meta dataset
-        kwargs = dict(
-            task_tpl=proxy_forecast_model_task,
-            step=self.step,
-            segments=0.62,  # keep test period consistent with the dataset yaml
-            trunc_days=1 + self.horizon,
-            hist_step_n=30,
-            fill_method=fill_method,
-            rolling_ext_days=0,
-        )
-        # NOTE:
-        # the input of meta model (internal data) are shared between proxy model and final forecasting model
-        # but their task test segment are not aligned! It worked in my previous experiment.
-        # So the misalignment will not affect the effectiveness of the method.
-        with self._internal_data_path.open("rb") as f:
-            internal_data = pickle.load(f)
-
-        md = MetaDatasetDS(exp_name=internal_data, **kwargs)
-
-        # 3) train and logging meta model
-        with R.start(experiment_name=self.meta_exp_name):
-            R.log_params(**kwargs)
-            mm = MetaModelDS(
-                step=self.step, hist_step_n=kwargs["hist_step_n"], lr=0.001, max_epoch=100, seed=43, alpha=self.alpha
-            )
-            mm.fit(md)
-            R.save_objects(model=mm)
-
-    @property
-    def _task_path(self):
-        return DIRNAME / f"tasks_s{self.step}.pkl"
-
-    def meta_inference(self):
-        """
-        Leverage meta-model for inference:
-        - Given
-            - baseline tasks
-            - input for meta model(internal data)
-            - meta model (its learnt knowledge on proxy forecasting model is expected to transfer to normal forecasting model)
-        """
-        # 1) get meta model
-        exp = R.get_exp(experiment_name=self.meta_exp_name)
-        rec = exp.list_recorders(rtype=exp.RT_L)[0]
-        meta_model: MetaModelDS = rec.load_object("model")
-
-        # 2)
-        # we are transfer to knowledge of meta model to final forecasting tasks.
-        # Create MetaTaskDataset for the final forecasting tasks
-        # Aligning the setting of it to the MetaTaskDataset when training Meta model is necessary
-
-        # 2.1) get previous config
-        param = rec.list_params()
-        trunc_days = int(param["trunc_days"])
-        step = int(param["step"])
-        hist_step_n = int(param["hist_step_n"])
-        fill_method = param.get("fill_method", "max")
-
-        rb = RollingBenchmark(model_type=self.forecast_model, **self.rb_kwargs)
-        task_l = rb.create_rolling_tasks()
-
-        # 2.2) create meta dataset for final dataset
-        kwargs = dict(
-            task_tpl=task_l,
-            step=step,
-            segments=0.0,  # all the tasks are for testing
-            trunc_days=trunc_days,
-            hist_step_n=hist_step_n,
-            fill_method=fill_method,
-            task_mode=MetaTask.PROC_MODE_TRANSFER,
-        )
-
-        with self._internal_data_path.open("rb") as f:
-            internal_data = pickle.load(f)
-        mds = MetaDatasetDS(exp_name=internal_data, **kwargs)
-
-        # 3) meta model make inference and get new qlib task
-        new_tasks = meta_model.inference(mds)
-        with self._task_path.open("wb") as f:
-            pickle.dump(new_tasks, f)
-
-    def train_and_eval_tasks(self):
-        """
-        Training the tasks generated by meta model
-        Then evaluate it
-        """
-        with self._task_path.open("rb") as f:
-            tasks = pickle.load(f)
-        rb = RollingBenchmark(rolling_exp="rolling_ds", model_type=self.forecast_model, **self.rb_kwargs)
-        rb.train_rolling_tasks(tasks)
-        rb.ens_rolling()
-        rb.update_rolling_rec()
-
-    def run_all(self):
-        # 1) file: handler_proxy.pkl (self.proxy_hd)
-        self.dump_data_for_proxy_model()
-        # 2)
-        # file: internal_data_s20.pkl
-        # mlflow: data_sim_s20, models for calculating meta_ipt
-        self.dump_meta_ipt()
-        # 3) meta model will be stored in `DDG-DA`
-        self.train_meta_model()
-        # 4) new_tasks are saved in "tasks_s20.pkl" (reweighter is added)
-        self.meta_inference()
-        # 5) load the saved tasks and train model
-        self.train_and_eval_tasks()
+        for f in self.CONF_LIST:
+            if conf_path.samefile(f):
+                break
+        else:
+            self.logger.warning("Model type is not in the benchmark!")
 
 
 if __name__ == "__main__":
     GetData().qlib_data(exists_skip=True)
     auto_init()
-    fire.Fire(DDGDA)
+    fire.Fire(DDGDABench)
diff --git a/examples/benchmarks_dynamic/README.md b/examples/benchmarks_dynamic/README.md
index 261fcc035..6f78fa71a 100644
--- a/examples/benchmarks_dynamic/README.md
+++ b/examples/benchmarks_dynamic/README.md
@@ -8,15 +8,17 @@ The table below shows the performances of different solutions on different forec
 Here is the [crowd sourced version of qlib data](data_collector/crowd_source/README.md): https://github.com/chenditc/investment_data/releases
 ```bash
 wget https://github.com/chenditc/investment_data/releases/download/20220720/qlib_bin.tar.gz
+mkdir -p ~/.qlib/qlib_data/cn_data
 tar -zxvf qlib_bin.tar.gz -C ~/.qlib/qlib_data/cn_data --strip-components=2
+rm -f qlib_bin.tar.gz
 ```
 
 | Model Name       | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown |
-|------------------|---------|----|------|---------|-----------|-------------------|-------------------|--------------|
-| RR[Linear]       |Alpha158 |0.089|0.577|0.102    |0.627      |0.093              |1.458              |-0.073        |
-| DDG-DA[Linear]   |Alpha158 |0.096|0.636|0.107    |0.677      |0.067              |0.996              |-0.091        |
-| RR[LightGBM]     |Alpha158 |0.082|0.589|0.091    |0.626      |0.077              |1.320              |-0.091        |
-| DDG-DA[LightGBM] |Alpha158 |0.085|0.658|0.094    |0.686      |0.115              |1.792              |-0.068        |
+|------------------|---------|------|------|---------|-----------|-------------------|-------------------|--------------|
+| RR[Linear]       |Alpha158 |0.0945|0.5989|0.1069   |0.6495     |0.0857             |1.3682             |-0.0986       |
+| DDG-DA[Linear]   |Alpha158 |0.0983|0.6157|0.1108   |0.6646     |0.0764             |1.1904             |-0.0769       |
+| RR[LightGBM]     |Alpha158 |0.0816|0.5887|0.0912   |0.6263     |0.0771             |1.3196             |-0.0909       |
+| DDG-DA[LightGBM] |Alpha158 |0.0878|0.6185|0.0975   |0.6524     |0.1261             |2.0096             |-0.0744       |
 
 - The label horizon of the `Alpha158` dataset is set to 20.
 - The rolling time intervals are set to 20 trading days.
diff --git a/examples/benchmarks_dynamic/baseline/README.md b/examples/benchmarks_dynamic/baseline/README.md
index 17e10482d..f17651412 100644
--- a/examples/benchmarks_dynamic/baseline/README.md
+++ b/examples/benchmarks_dynamic/baseline/README.md
@@ -5,11 +5,12 @@ This is the framework of periodically Rolling Retrain (RR) forecasting models. R
 ## Run the Code
 Users can try RR by running the following command:
 ```bash
-    python rolling_benchmark.py run_all
+    python rolling_benchmark.py run
 ```
 
 The default forecasting models are `Linear`. Users can choose other forecasting models by changing the `model_type` parameter.
 For example, users can try `LightGBM` forecasting models by running the following command:
 ```bash
-    python rolling_benchmark.py --model_type="gbdt" run_all
-```
\ No newline at end of file
+    python rolling_benchmark.py --conf_path=workflow_config_lightgbm_Alpha158.yaml run
+
+```
diff --git a/examples/benchmarks_dynamic/baseline/rolling_benchmark.py b/examples/benchmarks_dynamic/baseline/rolling_benchmark.py
index d452957d4..1ce30ef8a 100644
--- a/examples/benchmarks_dynamic/baseline/rolling_benchmark.py
+++ b/examples/benchmarks_dynamic/baseline/rolling_benchmark.py
@@ -1,160 +1,33 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
-from typing import Optional
-from qlib.model.ens.ensemble import RollingEnsemble
-from qlib.utils import init_instance_by_config
-import fire
-import yaml
-import pandas as pd
-from qlib import auto_init
 from pathlib import Path
-from tqdm.auto import tqdm
-from qlib.model.trainer import TrainerR
-from qlib.log import get_module_logger
-from qlib.utils.data import update_config
-from qlib.workflow import R
+from typing import Union
+
+import fire
+
+from qlib import auto_init
+from qlib.contrib.rolling.base import Rolling
 from qlib.tests.data import GetData
 
 DIRNAME = Path(__file__).absolute().resolve().parent
-from qlib.workflow.task.gen import task_generator, RollingGen
-from qlib.workflow.task.collect import RecorderCollector
-from qlib.workflow.record_temp import PortAnaRecord, SigAnaRecord
 
 
-class RollingBenchmark:
-    """
-    **NOTE**
-    before running the example, please clean your previous results with following command
-    - `rm -r mlruns`
+class RollingBenchmark(Rolling):
+    # The config in the README.md
+    CONF_LIST = [DIRNAME / "workflow_config_linear_Alpha158.yaml", DIRNAME / "workflow_config_lightgbm_Alpha158.yaml"]
 
-    """
+    DEFAULT_CONF = CONF_LIST[0]
 
-    def __init__(
-        self,
-        rolling_exp: str = "rolling_models",
-        model_type: str = "linear",
-        h_path: Optional[str] = None,
-        train_start: Optional[str] = None,
-        test_end: Optional[str] = None,
-        task_ext_conf: Optional[dict] = None,
-    ) -> None:
-        """
-        Parameters
-        ----------
-        rolling_exp : str
-            The name for the experiments for rolling
-        model_type : str
-            The model to be boosted.
-        h_path : Optional[str]
-            the dumped data handler;
-        test_end : Optional[str]
-            the test end for the data. It is typically used together with the handler
-        train_start : Optional[str]
-            the train start for the data.  It is typically used together with the handler.
-        task_ext_conf : Optional[dict]
-            some option to update the
-        """
-        self.step = 20
-        self.horizon = 20
-        self.rolling_exp = rolling_exp
-        self.model_type = model_type
-        self.h_path = h_path
-        self.train_start = train_start
-        self.test_end = test_end
-        self.logger = get_module_logger("RollingBenchmark")
-        self.task_ext_conf = task_ext_conf
+    def __init__(self, conf_path: Union[str, Path] = DEFAULT_CONF, horizon=20, **kwargs) -> None:
+        # This code is for being compatible with the previous old code
+        conf_path = Path(conf_path)
+        super().__init__(conf_path=conf_path, horizon=horizon, **kwargs)
 
-    def basic_task(self):
-        """For fast training rolling"""
-        if self.model_type == "gbdt":
-            conf_path = DIRNAME.parent.parent / "benchmarks" / "LightGBM" / "workflow_config_lightgbm_Alpha158.yaml"
-            # dump the processed data on to disk for later loading to speed up the processing
-            h_path = DIRNAME / "lightgbm_alpha158_handler_horizon{}.pkl".format(self.horizon)
-        elif self.model_type == "linear":
-            conf_path = DIRNAME.parent.parent / "benchmarks" / "Linear" / "workflow_config_linear_Alpha158.yaml"
-            h_path = DIRNAME / "linear_alpha158_handler_horizon{}.pkl".format(self.horizon)
+        for f in self.CONF_LIST:
+            if conf_path.samefile(f):
+                break
         else:
-            raise AssertionError("Model type is not supported!")
-
-        if self.h_path is not None:
-            h_path = Path(self.h_path)
-
-        with conf_path.open("r") as f:
-            conf = yaml.safe_load(f)
-
-        # modify dataset horizon
-        conf["task"]["dataset"]["kwargs"]["handler"]["kwargs"]["label"] = [
-            "Ref($close, -{}) / Ref($close, -1) - 1".format(self.horizon + 1)
-        ]
-
-        task = conf["task"]
-
-        if self.task_ext_conf is not None:
-            task = update_config(task, self.task_ext_conf)
-
-        if not h_path.exists():
-            h_conf = task["dataset"]["kwargs"]["handler"]
-            h = init_instance_by_config(h_conf)
-            h.to_pickle(h_path, dump_all=True)
-
-        task["dataset"]["kwargs"]["handler"] = f"file://{h_path}"
-        task["record"] = ["qlib.workflow.record_temp.SignalRecord"]
-
-        if self.train_start is not None:
-            seg = task["dataset"]["kwargs"]["segments"]["train"]
-            task["dataset"]["kwargs"]["segments"]["train"] = pd.Timestamp(self.train_start), seg[1]
-
-        if self.test_end is not None:
-            seg = task["dataset"]["kwargs"]["segments"]["test"]
-            task["dataset"]["kwargs"]["segments"]["test"] = seg[0], pd.Timestamp(self.test_end)
-        self.logger.info(task)
-        return task
-
-    def create_rolling_tasks(self):
-        task = self.basic_task()
-        task_l = task_generator(
-            task, RollingGen(step=self.step, trunc_days=self.horizon + 1)
-        )  # the last two days should be truncated to avoid information leakage
-        return task_l
-
-    def train_rolling_tasks(self, task_l=None):
-        if task_l is None:
-            task_l = self.create_rolling_tasks()
-        trainer = TrainerR(experiment_name=self.rolling_exp)
-        trainer(task_l)
-
-    COMB_EXP = "rolling"
-
-    def ens_rolling(self):
-        rc = RecorderCollector(
-            experiment=self.rolling_exp,
-            artifacts_key=["pred", "label"],
-            process_list=[RollingEnsemble()],
-            # rec_key_func=lambda rec: (self.COMB_EXP, rec.info["id"]),
-            artifacts_path={"pred": "pred.pkl", "label": "label.pkl"},
-        )
-        res = rc()
-        with R.start(experiment_name=self.COMB_EXP):
-            R.log_params(exp_name=self.rolling_exp)
-            R.save_objects(**{"pred.pkl": res["pred"], "label.pkl": res["label"]})
-
-    def update_rolling_rec(self):
-        """
-        Evaluate the combined rolling results
-        """
-        for _, rec in R.list_recorders(experiment_name=self.COMB_EXP).items():
-            for rt_cls in SigAnaRecord, PortAnaRecord:
-                rt = rt_cls(recorder=rec, skip_existing=True)
-                rt.generate()
-        print(f"Your evaluation results can be found in the experiment named `{self.COMB_EXP}`.")
-
-    def run_all(self):
-        # the results will be  save in mlruns.
-        # 1) each rolling task is saved in rolling_models
-        self.train_rolling_tasks()
-        # 2) combined rolling tasks and evaluation results are saved in rolling
-        self.ens_rolling()
-        self.update_rolling_rec()
+            self.logger.warning("Model type is not in the benchmark!")
 
 
 if __name__ == "__main__":
diff --git a/examples/benchmarks_dynamic/baseline/workflow_config_lightgbm_Alpha158.yaml b/examples/benchmarks_dynamic/baseline/workflow_config_lightgbm_Alpha158.yaml
new file mode 100644
index 000000000..5ae316801
--- /dev/null
+++ b/examples/benchmarks_dynamic/baseline/workflow_config_lightgbm_Alpha158.yaml
@@ -0,0 +1,71 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy
+        kwargs:
+            signal: <PRED>
+            topk: 50
+            n_drop: 5
+    backtest:
+        start_time: 2017-01-01
+        end_time: 2020-08-01
+        account: 100000000
+        benchmark: *benchmark
+        exchange_kwargs:
+            limit_threshold: 0.095
+            deal_price: close
+            open_cost: 0.0005
+            close_cost: 0.0015
+            min_cost: 5
+task:
+    model:
+        class: LGBModel
+        module_path: qlib.contrib.model.gbdt
+        kwargs:
+            loss: mse
+            colsample_bytree: 0.8879
+            learning_rate: 0.2
+            subsample: 0.8789
+            lambda_l1: 205.6999
+            lambda_l2: 580.9768
+            max_depth: 8
+            num_leaves: 210
+            num_threads: 20
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha158
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            model: <MODEL>
+            dataset: <DATASET>
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
diff --git a/examples/benchmarks_dynamic/baseline/workflow_config_linear_Alpha158.yaml b/examples/benchmarks_dynamic/baseline/workflow_config_linear_Alpha158.yaml
new file mode 100644
index 000000000..a5c272f28
--- /dev/null
+++ b/examples/benchmarks_dynamic/baseline/workflow_config_linear_Alpha158.yaml
@@ -0,0 +1,77 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors:
+        - class: RobustZScoreNorm
+          kwargs:
+              fields_group: feature
+              clip_outlier: true
+        - class: Fillna
+          kwargs:
+              fields_group: feature
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy
+        kwargs:
+            signal: <PRED>
+            topk: 50
+            n_drop: 5
+    backtest:
+        start_time: 2017-01-01
+        end_time: 2020-08-01
+        account: 100000000
+        benchmark: *benchmark
+        exchange_kwargs:
+            limit_threshold: 0.095
+            deal_price: close
+            open_cost: 0.0005
+            close_cost: 0.0015
+            min_cost: 5
+task:
+    model:
+        class: LinearModel
+        module_path: qlib.contrib.model.linear
+        kwargs:
+            estimator: ridge
+            alpha: 0.05
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha158
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            model: <MODEL>
+            dataset: <DATASET>
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: True
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
diff --git a/examples/highfreq/highfreq_handler.py b/examples/highfreq/highfreq_handler.py
index c15c3ec41..7df564b7b 100644
--- a/examples/highfreq/highfreq_handler.py
+++ b/examples/highfreq/highfreq_handler.py
@@ -14,7 +14,6 @@ class HighFreqHandler(DataHandlerLP):
         fit_end_time=None,
         drop_raw=True,
     ):
-
         infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
         learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
 
diff --git a/examples/highfreq/workflow.py b/examples/highfreq/workflow.py
index c631d72e7..02948c5a1 100644
--- a/examples/highfreq/workflow.py
+++ b/examples/highfreq/workflow.py
@@ -18,7 +18,6 @@ from highfreq_ops import get_calendar_day, DayLast, FFillNan, BFillNan, Date, Se
 
 
 class HighfreqWorkflow:
-
     SPEC_CONF = {"custom_ops": [DayLast, FFillNan, BFillNan, Date, Select, IsNull, Cut], "expression_cache": None}
 
     MARKET = "all"
diff --git a/examples/hyperparameter/LightGBM/hyperparameter_158.py b/examples/hyperparameter/LightGBM/hyperparameter_158.py
index 8c3e9f3e8..7520390a6 100644
--- a/examples/hyperparameter/LightGBM/hyperparameter_158.py
+++ b/examples/hyperparameter/LightGBM/hyperparameter_158.py
@@ -35,7 +35,6 @@ def objective(trial):
 
 
 if __name__ == "__main__":
-
     provider_uri = "~/.qlib/qlib_data/cn_data"
     GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
     qlib.init(provider_uri=provider_uri, region="cn")
diff --git a/examples/hyperparameter/LightGBM/hyperparameter_360.py b/examples/hyperparameter/LightGBM/hyperparameter_360.py
index 322c0fa42..7ba28c78f 100644
--- a/examples/hyperparameter/LightGBM/hyperparameter_360.py
+++ b/examples/hyperparameter/LightGBM/hyperparameter_360.py
@@ -38,7 +38,6 @@ def objective(trial):
 
 
 if __name__ == "__main__":
-
     provider_uri = "~/.qlib/qlib_data/cn_data"
     GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
     qlib.init(provider_uri=provider_uri, region=REG_CN)
diff --git a/examples/model_interpreter/feature.py b/examples/model_interpreter/feature.py
index bfc58fc84..8ad673d0e 100644
--- a/examples/model_interpreter/feature.py
+++ b/examples/model_interpreter/feature.py
@@ -11,7 +11,6 @@ from qlib.tests.config import CSI300_GBDT_TASK
 
 
 if __name__ == "__main__":
-
     # use default data
     provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
     GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
diff --git a/examples/portfolio/prepare_riskdata.py b/examples/portfolio/prepare_riskdata.py
index 3168e2f37..e502a1ff7 100644
--- a/examples/portfolio/prepare_riskdata.py
+++ b/examples/portfolio/prepare_riskdata.py
@@ -9,7 +9,6 @@ from qlib.model.riskmodel import StructuredCovEstimator
 
 
 def prepare_data(riskdata_root="./riskdata", T=240, start_time="2016-01-01"):
-
     universe = D.features(D.instruments("csi300"), ["$close"], start_time=start_time).swaplevel().sort_index()
 
     price_all = (
@@ -20,7 +19,6 @@ def prepare_data(riskdata_root="./riskdata", T=240, start_time="2016-01-01"):
     riskmodel = StructuredCovEstimator()
 
     for i in range(T - 1, len(price_all)):
-
         date = price_all.index[i]
         ref_date = price_all.index[i - T + 1]
 
@@ -47,7 +45,6 @@ def prepare_data(riskdata_root="./riskdata", T=240, start_time="2016-01-01"):
 
 
 if __name__ == "__main__":
-
     import qlib
 
     qlib.init(provider_uri="~/.qlib/qlib_data/cn_data")
diff --git a/examples/rolling_process_data/workflow.py b/examples/rolling_process_data/workflow.py
index 434d365e5..d1c03866a 100644
--- a/examples/rolling_process_data/workflow.py
+++ b/examples/rolling_process_data/workflow.py
@@ -13,7 +13,6 @@ from qlib.tests.data import GetData
 
 
 class RollingDataWorkflow:
-
     MARKET = "csi300"
     start_time = "2010-01-01"
     end_time = "2019-12-31"
@@ -93,7 +92,6 @@ class RollingDataWorkflow:
         dataset = init_instance_by_config(dataset_config)
 
         for rolling_offset in range(self.rolling_cnt):
-
             print(f"===========rolling{rolling_offset} start===========")
             if rolling_offset:
                 dataset.config(
diff --git a/examples/workflow_by_code.py b/examples/workflow_by_code.py
index 0c4d73a51..94de5c082 100644
--- a/examples/workflow_by_code.py
+++ b/examples/workflow_by_code.py
@@ -17,7 +17,6 @@ from qlib.tests.config import CSI300_BENCH, CSI300_GBDT_TASK
 
 
 if __name__ == "__main__":
-
     # use default data
     provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
     GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
diff --git a/qlib/__init__.py b/qlib/__init__.py
index 11d22cc23..3355ac04f 100644
--- a/qlib/__init__.py
+++ b/qlib/__init__.py
@@ -2,7 +2,7 @@
 # Licensed under the MIT License.
 from pathlib import Path
 
-__version__ = "0.9.1.99"
+__version__ = "0.9.2.99"
 __version__bak = __version__  # This version is backup for QlibConfig.reset_qlib_version
 import os
 from typing import Union
@@ -77,7 +77,6 @@ def init(default_conf="client", **kwargs):
 
 
 def _mount_nfs_uri(provider_uri, mount_path, auto_mount: bool = False):
-
     LOG = get_module_logger("mount nfs", level=logging.INFO)
     if mount_path is None:
         raise ValueError(f"Invalid mount path: {mount_path}!")
diff --git a/qlib/backtest/__init__.py b/qlib/backtest/__init__.py
index bb8ca731b..d784aed57 100644
--- a/qlib/backtest/__init__.py
+++ b/qlib/backtest/__init__.py
@@ -182,7 +182,6 @@ def get_strategy_executor(
     exchange_kwargs: dict = {},
     pos_type: str = "Position",
 ) -> Tuple[BaseStrategy, BaseExecutor]:
-
     # NOTE:
     # - for avoiding recursive import
     # - typing annotations is not reliable
diff --git a/qlib/backtest/exchange.py b/qlib/backtest/exchange.py
index a752a9f8c..1ab0d07a7 100644
--- a/qlib/backtest/exchange.py
+++ b/qlib/backtest/exchange.py
@@ -638,7 +638,6 @@ class Exchange:
         random.seed(0)
         random.shuffle(sorted_ids)
         for stock_id in sorted_ids:
-
             # Do not generate order for the non-tradable stocks
             if not self.is_stock_tradable(stock_id=stock_id, start_time=start_time, end_time=end_time):
                 continue
diff --git a/qlib/config.py b/qlib/config.py
index 7b726c658..7910dab73 100644
--- a/qlib/config.py
+++ b/qlib/config.py
@@ -293,7 +293,6 @@ class QlibConfig(Config):
         """
 
         def __init__(self, provider_uri: Union[str, Path, dict], mount_path: Union[str, Path, dict]):
-
             """
             The relation of `provider_uri` and `mount_path`
             - `mount_path` is used only if provider_uri is an NFS path
diff --git a/qlib/contrib/data/dataset.py b/qlib/contrib/data/dataset.py
index 9ce522cc0..8b40dba1f 100644
--- a/qlib/contrib/data/dataset.py
+++ b/qlib/contrib/data/dataset.py
@@ -130,7 +130,6 @@ class MTSDatasetH(DatasetH):
         input_size=None,
         **kwargs,
     ):
-
         assert num_states == 0 or horizon > 0, "please specify `horizon` to avoid data leakage"
         assert memory_mode in ["sample", "daily"], "unsupported memory mode"
         assert memory_mode == "sample" or batch_size < 0, "daily memory requires daily sampling (`batch_size < 0`)"
@@ -153,7 +152,6 @@ class MTSDatasetH(DatasetH):
         super().__init__(handler, segments, **kwargs)
 
     def setup_data(self, handler_kwargs: dict = None, **kwargs):
-
         super().setup_data(**kwargs)
 
         if handler_kwargs is not None:
@@ -288,7 +286,6 @@ class MTSDatasetH(DatasetH):
             daily_count = []  # store number of samples for each day
 
             for j in indices[i : i + batch_size]:
-
                 # normal sampling: self.batch_size > 0 => slices is a list => slices_subset is a slice
                 # daily sampling: self.batch_size < 0 => slices is a nested list => slices_subset is a list
                 slices_subset = slices[j]
@@ -297,7 +294,6 @@ class MTSDatasetH(DatasetH):
                 # each slices_subset contains a list of slices for multiple stocks
                 # NOTE: daily sampling is used in 1) eval mode, 2) train mode with self.batch_size < 0
                 if self.batch_size < 0:
-
                     # store daily index
                     idx = self._daily_index.index[j]  # daily_index.index is the index of the original data
                     daily_index.append(idx)
@@ -320,7 +316,6 @@ class MTSDatasetH(DatasetH):
                     slices_subset = [slices_subset]
 
                 for slc in slices_subset:
-
                     # legacy support for Alpha360 data by `input_size`
                     if self.input_size:
                         data.append(self._data[slc.stop - 1].reshape(self.input_size, -1).T)
diff --git a/qlib/contrib/data/highfreq_handler.py b/qlib/contrib/data/highfreq_handler.py
index 638fbf0e8..8eed4814f 100644
--- a/qlib/contrib/data/highfreq_handler.py
+++ b/qlib/contrib/data/highfreq_handler.py
@@ -17,7 +17,6 @@ class HighFreqHandler(DataHandlerLP):
         fit_end_time=None,
         drop_raw=True,
     ):
-
         infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
         learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
 
@@ -318,7 +317,6 @@ class HighFreqOrderHandler(DataHandlerLP):
         inst_processors=None,
         drop_raw=True,
     ):
-
         infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
         learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
 
diff --git a/qlib/contrib/data/highfreq_processor.py b/qlib/contrib/data/highfreq_processor.py
index f7041e9f4..db2a6e39b 100644
--- a/qlib/contrib/data/highfreq_processor.py
+++ b/qlib/contrib/data/highfreq_processor.py
@@ -29,7 +29,6 @@ class HighFreqNorm(Processor):
         feature_save_dir: str,
         norm_groups: Dict[str, int],
     ):
-
         self.fit_start_time = fit_start_time
         self.fit_end_time = fit_end_time
         self.feature_save_dir = feature_save_dir
diff --git a/qlib/contrib/meta/data_selection/dataset.py b/qlib/contrib/meta/data_selection/dataset.py
index e3689d964..9349a12fe 100644
--- a/qlib/contrib/meta/data_selection/dataset.py
+++ b/qlib/contrib/meta/data_selection/dataset.py
@@ -49,6 +49,8 @@ class InternalData:
 
         # 1) prepare the prediction of proxy models
         perf_task_tpl = deepcopy(self.task_tpl)  # this task is supposed to contains no complicated objects
+        # The only thing we want to save is the prediction
+        perf_task_tpl["record"] = ["qlib.workflow.record_temp.SignalRecord"]
 
         trainer = auto_filter_kwargs(trainer)(experiment_name=self.exp_name, **trainer_kwargs)
         # NOTE:
diff --git a/qlib/contrib/model/pytorch_adarnn.py b/qlib/contrib/model/pytorch_adarnn.py
index 4b0db7f4b..ca5e8ba86 100644
--- a/qlib/contrib/model/pytorch_adarnn.py
+++ b/qlib/contrib/model/pytorch_adarnn.py
@@ -246,7 +246,6 @@ class ADARNN(Model):
         evals_result=dict(),
         save_path=None,
     ):
-
         df_train, df_valid = dataset.prepare(
             ["train", "valid"],
             col_set=["feature", "label"],
@@ -318,7 +317,6 @@ class ADARNN(Model):
         preds = []
 
         for begin in range(sample_num)[:: self.batch_size]:
-
             if sample_num - begin < self.batch_size:
                 end = sample_num
             else:
diff --git a/qlib/contrib/model/pytorch_alstm.py b/qlib/contrib/model/pytorch_alstm.py
index b0770e2bd..2fe7cce3b 100644
--- a/qlib/contrib/model/pytorch_alstm.py
+++ b/qlib/contrib/model/pytorch_alstm.py
@@ -146,7 +146,6 @@ class ALSTM(Model):
         raise ValueError("unknown loss `%s`" % self.loss)
 
     def metric_fn(self, pred, label):
-
         mask = torch.isfinite(label)
 
         if self.metric in ("", "loss"):
@@ -155,7 +154,6 @@ class ALSTM(Model):
         raise ValueError("unknown metric `%s`" % self.metric)
 
     def train_epoch(self, x_train, y_train):
-
         x_train_values = x_train.values
         y_train_values = np.squeeze(y_train.values)
 
@@ -165,7 +163,6 @@ class ALSTM(Model):
         np.random.shuffle(indices)
 
         for i in range(len(indices))[:: self.batch_size]:
-
             if len(indices) - i < self.batch_size:
                 break
 
@@ -181,7 +178,6 @@ class ALSTM(Model):
             self.train_optimizer.step()
 
     def test_epoch(self, data_x, data_y):
-
         # prepare training data
         x_values = data_x.values
         y_values = np.squeeze(data_y.values)
@@ -194,7 +190,6 @@ class ALSTM(Model):
         indices = np.arange(len(x_values))
 
         for i in range(len(indices))[:: self.batch_size]:
-
             if len(indices) - i < self.batch_size:
                 break
 
@@ -217,7 +212,6 @@ class ALSTM(Model):
         evals_result=dict(),
         save_path=None,
     ):
-
         df_train, df_valid, df_test = dataset.prepare(
             ["train", "valid", "test"],
             col_set=["feature", "label"],
@@ -282,7 +276,6 @@ class ALSTM(Model):
         preds = []
 
         for begin in range(sample_num)[:: self.batch_size]:
-
             if sample_num - begin < self.batch_size:
                 end = sample_num
             else:
diff --git a/qlib/contrib/model/pytorch_alstm_ts.py b/qlib/contrib/model/pytorch_alstm_ts.py
index 3ab8ed8ab..008d78940 100644
--- a/qlib/contrib/model/pytorch_alstm_ts.py
+++ b/qlib/contrib/model/pytorch_alstm_ts.py
@@ -156,7 +156,6 @@ class ALSTM(Model):
         raise ValueError("unknown loss `%s`" % self.loss)
 
     def metric_fn(self, pred, label):
-
         mask = torch.isfinite(label)
 
         if self.metric in ("", "loss"):
@@ -165,10 +164,9 @@ class ALSTM(Model):
         raise ValueError("unknown metric `%s`" % self.metric)
 
     def train_epoch(self, data_loader):
-
         self.ALSTM_model.train()
 
-        for (data, weight) in data_loader:
+        for data, weight in data_loader:
             feature = data[:, :, 0:-1].to(self.device)
             label = data[:, -1, -1].to(self.device)
 
@@ -181,14 +179,12 @@ class ALSTM(Model):
             self.train_optimizer.step()
 
     def test_epoch(self, data_loader):
-
         self.ALSTM_model.eval()
 
         scores = []
         losses = []
 
-        for (data, weight) in data_loader:
-
+        for data, weight in data_loader:
             feature = data[:, :, 0:-1].to(self.device)
             # feature[torch.isnan(feature)] = 0
             label = data[:, -1, -1].to(self.device)
@@ -295,7 +291,6 @@ class ALSTM(Model):
         preds = []
 
         for data in test_loader:
-
             feature = data[:, :, 0:-1].to(self.device)
 
             with torch.no_grad():
diff --git a/qlib/contrib/model/pytorch_gats.py b/qlib/contrib/model/pytorch_gats.py
index 127408877..63ebd480a 100644
--- a/qlib/contrib/model/pytorch_gats.py
+++ b/qlib/contrib/model/pytorch_gats.py
@@ -154,7 +154,6 @@ class GATs(Model):
         raise ValueError("unknown loss `%s`" % self.loss)
 
     def metric_fn(self, pred, label):
-
         mask = torch.isfinite(label)
 
         if self.metric in ("", "loss"):
@@ -175,7 +174,6 @@ class GATs(Model):
         return daily_index, daily_count
 
     def train_epoch(self, x_train, y_train):
-
         x_train_values = x_train.values
         y_train_values = np.squeeze(y_train.values)
         self.GAT_model.train()
@@ -197,7 +195,6 @@ class GATs(Model):
             self.train_optimizer.step()
 
     def test_epoch(self, data_x, data_y):
-
         # prepare training data
         x_values = data_x.values
         y_values = np.squeeze(data_y.values)
@@ -230,7 +227,6 @@ class GATs(Model):
         evals_result=dict(),
         save_path=None,
     ):
-
         df_train, df_valid, df_test = dataset.prepare(
             ["train", "valid", "test"],
             col_set=["feature", "label"],
diff --git a/qlib/contrib/model/pytorch_gats_ts.py b/qlib/contrib/model/pytorch_gats_ts.py
index 1b75efe89..b1239f78e 100644
--- a/qlib/contrib/model/pytorch_gats_ts.py
+++ b/qlib/contrib/model/pytorch_gats_ts.py
@@ -32,7 +32,6 @@ class DailyBatchSampler(Sampler):
         self.daily_index[0] = 0
 
     def __iter__(self):
-
         for idx, count in zip(self.daily_index, self.daily_count):
             yield np.arange(idx, idx + count)
 
@@ -173,7 +172,6 @@ class GATs(Model):
         raise ValueError("unknown loss `%s`" % self.loss)
 
     def metric_fn(self, pred, label):
-
         mask = torch.isfinite(label)
 
         if self.metric in ("", "loss"):
@@ -194,11 +192,9 @@ class GATs(Model):
         return daily_index, daily_count
 
     def train_epoch(self, data_loader):
-
         self.GAT_model.train()
 
         for data in data_loader:
-
             data = data.squeeze()
             feature = data[:, :, 0:-1].to(self.device)
             label = data[:, -1, -1].to(self.device)
@@ -212,14 +208,12 @@ class GATs(Model):
             self.train_optimizer.step()
 
     def test_epoch(self, data_loader):
-
         self.GAT_model.eval()
 
         scores = []
         losses = []
 
         for data in data_loader:
-
             data = data.squeeze()
             feature = data[:, :, 0:-1].to(self.device)
             # feature[torch.isnan(feature)] = 0
@@ -240,7 +234,6 @@ class GATs(Model):
         evals_result=dict(),
         save_path=None,
     ):
-
         dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
         dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
         if dl_train.empty or dl_valid.empty:
@@ -329,7 +322,6 @@ class GATs(Model):
         preds = []
 
         for data in test_loader:
-
             data = data.squeeze()
             feature = data[:, :, 0:-1].to(self.device)
 
diff --git a/qlib/contrib/model/pytorch_gru.py b/qlib/contrib/model/pytorch_gru.py
index 10998236b..2a476a657 100755
--- a/qlib/contrib/model/pytorch_gru.py
+++ b/qlib/contrib/model/pytorch_gru.py
@@ -146,7 +146,6 @@ class GRU(Model):
         raise ValueError("unknown loss `%s`" % self.loss)
 
     def metric_fn(self, pred, label):
-
         mask = torch.isfinite(label)
 
         if self.metric in ("", "loss"):
@@ -155,7 +154,6 @@ class GRU(Model):
         raise ValueError("unknown metric `%s`" % self.metric)
 
     def train_epoch(self, x_train, y_train):
-
         x_train_values = x_train.values
         y_train_values = np.squeeze(y_train.values)
 
@@ -165,7 +163,6 @@ class GRU(Model):
         np.random.shuffle(indices)
 
         for i in range(len(indices))[:: self.batch_size]:
-
             if len(indices) - i < self.batch_size:
                 break
 
@@ -181,7 +178,6 @@ class GRU(Model):
             self.train_optimizer.step()
 
     def test_epoch(self, data_x, data_y):
-
         # prepare training data
         x_values = data_x.values
         y_values = np.squeeze(data_y.values)
@@ -194,7 +190,6 @@ class GRU(Model):
         indices = np.arange(len(x_values))
 
         for i in range(len(indices))[:: self.batch_size]:
-
             if len(indices) - i < self.batch_size:
                 break
 
@@ -217,7 +212,6 @@ class GRU(Model):
         evals_result=dict(),
         save_path=None,
     ):
-
         df_train, df_valid, df_test = dataset.prepare(
             ["train", "valid", "test"],
             col_set=["feature", "label"],
@@ -282,7 +276,6 @@ class GRU(Model):
         preds = []
 
         for begin in range(sample_num)[:: self.batch_size]:
-
             if sample_num - begin < self.batch_size:
                 end = sample_num
             else:
diff --git a/qlib/contrib/model/pytorch_gru_ts.py b/qlib/contrib/model/pytorch_gru_ts.py
index b588392a2..2e5076ea6 100755
--- a/qlib/contrib/model/pytorch_gru_ts.py
+++ b/qlib/contrib/model/pytorch_gru_ts.py
@@ -154,7 +154,6 @@ class GRU(Model):
         raise ValueError("unknown loss `%s`" % self.loss)
 
     def metric_fn(self, pred, label):
-
         mask = torch.isfinite(label)
 
         if self.metric in ("", "loss"):
@@ -163,10 +162,9 @@ class GRU(Model):
         raise ValueError("unknown metric `%s`" % self.metric)
 
     def train_epoch(self, data_loader):
-
         self.GRU_model.train()
 
-        for (data, weight) in data_loader:
+        for data, weight in data_loader:
             feature = data[:, :, 0:-1].to(self.device)
             label = data[:, -1, -1].to(self.device)
 
@@ -179,14 +177,12 @@ class GRU(Model):
             self.train_optimizer.step()
 
     def test_epoch(self, data_loader):
-
         self.GRU_model.eval()
 
         scores = []
         losses = []
 
-        for (data, weight) in data_loader:
-
+        for data, weight in data_loader:
             feature = data[:, :, 0:-1].to(self.device)
             # feature[torch.isnan(feature)] = 0
             label = data[:, -1, -1].to(self.device)
@@ -293,7 +289,6 @@ class GRU(Model):
         preds = []
 
         for data in test_loader:
-
             feature = data[:, :, 0:-1].to(self.device)
 
             with torch.no_grad():
diff --git a/qlib/contrib/model/pytorch_hist.py b/qlib/contrib/model/pytorch_hist.py
index f7b565dc5..5c3cd66a3 100644
--- a/qlib/contrib/model/pytorch_hist.py
+++ b/qlib/contrib/model/pytorch_hist.py
@@ -160,7 +160,6 @@ class HIST(Model):
         raise ValueError("unknown loss `%s`" % self.loss)
 
     def metric_fn(self, pred, label):
-
         mask = torch.isfinite(label)
 
         if self.metric == "ic":
@@ -189,7 +188,6 @@ class HIST(Model):
         return daily_index, daily_count
 
     def train_epoch(self, x_train, y_train, stock_index):
-
         stock2concept_matrix = np.load(self.stock2concept)
         x_train_values = x_train.values
         y_train_values = np.squeeze(y_train.values)
@@ -214,7 +212,6 @@ class HIST(Model):
             self.train_optimizer.step()
 
     def test_epoch(self, data_x, data_y, stock_index):
-
         # prepare training data
         stock2concept_matrix = np.load(self.stock2concept)
         x_values = data_x.values
diff --git a/qlib/contrib/model/pytorch_igmtf.py b/qlib/contrib/model/pytorch_igmtf.py
index d38ef9ad4..46a25c00f 100644
--- a/qlib/contrib/model/pytorch_igmtf.py
+++ b/qlib/contrib/model/pytorch_igmtf.py
@@ -153,7 +153,6 @@ class IGMTF(Model):
         raise ValueError("unknown loss `%s`" % self.loss)
 
     def metric_fn(self, pred, label):
-
         mask = torch.isfinite(label)
 
         if self.metric == "ic":
@@ -201,7 +200,6 @@ class IGMTF(Model):
         return train_hidden, train_hidden_day
 
     def train_epoch(self, x_train, y_train, train_hidden, train_hidden_day):
-
         x_train_values = x_train.values
         y_train_values = np.squeeze(y_train.values)
 
@@ -222,7 +220,6 @@ class IGMTF(Model):
             self.train_optimizer.step()
 
     def test_epoch(self, data_x, data_y, train_hidden, train_hidden_day):
-
         # prepare training data
         x_values = data_x.values
         y_values = np.squeeze(data_y.values)
@@ -254,7 +251,6 @@ class IGMTF(Model):
         evals_result=dict(),
         save_path=None,
     ):
-
         df_train, df_valid = dataset.prepare(
             ["train", "valid"],
             col_set=["feature", "label"],
diff --git a/qlib/contrib/model/pytorch_krnn.py b/qlib/contrib/model/pytorch_krnn.py
new file mode 100644
index 000000000..7c252672d
--- /dev/null
+++ b/qlib/contrib/model/pytorch_krnn.py
@@ -0,0 +1,511 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import pandas as pd
+from typing import Text, Union
+import copy
+from ...utils import get_or_create_path
+from ...log import get_module_logger
+
+import torch
+import torch.nn as nn
+import torch.optim as optim
+
+from ...model.base import Model
+from ...data.dataset import DatasetH
+from ...data.dataset.handler import DataHandlerLP
+
+########################################################################
+########################################################################
+########################################################################
+
+
+class CNNEncoderBase(nn.Module):
+    def __init__(self, input_dim, output_dim, kernel_size, device):
+        """Build a basic CNN encoder
+
+        Parameters
+        ----------
+        input_dim : int
+            The input dimension
+        output_dim : int
+            The output dimension
+        kernel_size : int
+            The size of convolutional kernels
+        """
+        super().__init__()
+
+        self.input_dim = input_dim
+        self.output_dim = output_dim
+        self.kernel_size = kernel_size
+        self.device = device
+
+        # set padding to ensure the same length
+        # it is correct only when kernel_size is odd, dilation is 1, stride is 1
+        self.conv = nn.Conv1d(input_dim, output_dim, kernel_size, padding=(kernel_size - 1) // 2)
+
+    def forward(self, x):
+        """
+        Parameters
+        ----------
+        x : torch.Tensor
+            input data
+
+        Returns
+        -------
+        torch.Tensor
+            Updated representations
+        """
+
+        # input shape: [batch_size, seq_len*input_dim]
+        # output shape: [batch_size, seq_len, input_dim]
+        x = x.view(x.shape[0], -1, self.input_dim).permute(0, 2, 1).to(self.device)
+        y = self.conv(x)  # [batch_size, output_dim, conved_seq_len]
+        y = y.permute(0, 2, 1)  # [batch_size, conved_seq_len, output_dim]
+
+        return y
+
+
+class KRNNEncoderBase(nn.Module):
+    def __init__(self, input_dim, output_dim, dup_num, rnn_layers, dropout, device):
+        """Build K parallel RNNs
+
+        Parameters
+        ----------
+        input_dim : int
+            The input dimension
+        output_dim : int
+            The output dimension
+        dup_num : int
+            The number of parallel RNNs
+        rnn_layers: int
+            The number of RNN layers
+        """
+        super().__init__()
+
+        self.input_dim = input_dim
+        self.output_dim = output_dim
+        self.dup_num = dup_num
+        self.rnn_layers = rnn_layers
+        self.dropout = dropout
+        self.device = device
+
+        self.rnn_modules = nn.ModuleList()
+        for _ in range(dup_num):
+            self.rnn_modules.append(nn.GRU(input_dim, output_dim, num_layers=self.rnn_layers, dropout=dropout))
+
+    def forward(self, x):
+        """
+        Parameters
+        ----------
+        x : torch.Tensor
+            Input data
+        n_id : torch.Tensor
+            Node indices
+
+        Returns
+        -------
+        torch.Tensor
+            Updated representations
+        """
+
+        # input shape: [batch_size, seq_len, input_dim]
+        # output shape: [batch_size, seq_len, output_dim]
+        # [seq_len, batch_size, input_dim]
+        batch_size, seq_len, input_dim = x.shape
+        x = x.permute(1, 0, 2).to(self.device)
+
+        hids = []
+        for rnn in self.rnn_modules:
+            h, _ = rnn(x)  # [seq_len, batch_size, output_dim]
+            hids.append(h)
+        # [seq_len, batch_size, output_dim, num_dups]
+        hids = torch.stack(hids, dim=-1)
+        hids = hids.view(seq_len, batch_size, self.output_dim, self.dup_num)
+        hids = hids.mean(dim=3)
+        hids = hids.permute(1, 0, 2)
+
+        return hids
+
+
+class CNNKRNNEncoder(nn.Module):
+    def __init__(
+        self, cnn_input_dim, cnn_output_dim, cnn_kernel_size, rnn_output_dim, rnn_dup_num, rnn_layers, dropout, device
+    ):
+        """Build an encoder composed of CNN and KRNN
+
+        Parameters
+        ----------
+        cnn_input_dim : int
+            The input dimension of CNN
+        cnn_output_dim : int
+            The output dimension of CNN
+        cnn_kernel_size : int
+            The size of convolutional kernels
+        rnn_output_dim : int
+            The output dimension of KRNN
+        rnn_dup_num : int
+            The number of parallel duplicates for KRNN
+        rnn_layers : int
+            The number of RNN layers
+        """
+        super().__init__()
+
+        self.cnn_encoder = CNNEncoderBase(cnn_input_dim, cnn_output_dim, cnn_kernel_size, device)
+        self.krnn_encoder = KRNNEncoderBase(cnn_output_dim, rnn_output_dim, rnn_dup_num, rnn_layers, dropout, device)
+
+    def forward(self, x):
+        """
+        Parameters
+        ----------
+        x : torch.Tensor
+            Input data
+        n_id : torch.Tensor
+            Node indices
+
+        Returns
+        -------
+        torch.Tensor
+            Updated representations
+        """
+        cnn_out = self.cnn_encoder(x)
+        krnn_out = self.krnn_encoder(cnn_out)
+
+        return krnn_out
+
+
+class KRNNModel(nn.Module):
+    def __init__(self, fea_dim, cnn_dim, cnn_kernel_size, rnn_dim, rnn_dups, rnn_layers, dropout, device, **params):
+        """Build a KRNN model
+
+        Parameters
+        ----------
+        fea_dim : int
+            The feature dimension
+        cnn_dim : int
+            The hidden dimension of CNN
+        cnn_kernel_size : int
+            The size of convolutional kernels
+        rnn_dim : int
+            The hidden dimension of KRNN
+        rnn_dups : int
+            The number of parallel duplicates
+        rnn_layers: int
+            The number of RNN layers
+        """
+        super().__init__()
+
+        self.encoder = CNNKRNNEncoder(
+            cnn_input_dim=fea_dim,
+            cnn_output_dim=cnn_dim,
+            cnn_kernel_size=cnn_kernel_size,
+            rnn_output_dim=rnn_dim,
+            rnn_dup_num=rnn_dups,
+            rnn_layers=rnn_layers,
+            dropout=dropout,
+            device=device,
+        )
+
+        self.out_fc = nn.Linear(rnn_dim, 1)
+        self.device = device
+
+    def forward(self, x):
+        # x: [batch_size, node_num, seq_len, input_dim]
+        encode = self.encoder(x)
+        out = self.out_fc(encode[:, -1, :]).squeeze().to(self.device)
+
+        return out
+
+
+class KRNN(Model):
+    """KRNN Model
+
+    Parameters
+    ----------
+    d_feat : int
+        input dimension for each time step
+    metric: str
+        the evaluation metric used in early stop
+    optimizer : str
+        optimizer name
+    GPU : str
+        the GPU ID(s) used for training
+    """
+
+    def __init__(
+        self,
+        fea_dim=6,
+        cnn_dim=64,
+        cnn_kernel_size=3,
+        rnn_dim=64,
+        rnn_dups=3,
+        rnn_layers=2,
+        dropout=0,
+        n_epochs=200,
+        lr=0.001,
+        metric="",
+        batch_size=2000,
+        early_stop=20,
+        loss="mse",
+        optimizer="adam",
+        GPU=0,
+        seed=None,
+        **kwargs
+    ):
+        # Set logger.
+        self.logger = get_module_logger("KRNN")
+        self.logger.info("KRNN pytorch version...")
+
+        # set hyper-parameters.
+        self.fea_dim = fea_dim
+        self.cnn_dim = cnn_dim
+        self.cnn_kernel_size = cnn_kernel_size
+        self.rnn_dim = rnn_dim
+        self.rnn_dups = rnn_dups
+        self.rnn_layers = rnn_layers
+        self.dropout = dropout
+        self.n_epochs = n_epochs
+        self.lr = lr
+        self.metric = metric
+        self.batch_size = batch_size
+        self.early_stop = early_stop
+        self.optimizer = optimizer.lower()
+        self.loss = loss
+        self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu")
+        self.seed = seed
+
+        self.logger.info(
+            "KRNN parameters setting:"
+            "\nfea_dim : {}"
+            "\ncnn_dim : {}"
+            "\ncnn_kernel_size : {}"
+            "\nrnn_dim : {}"
+            "\nrnn_dups : {}"
+            "\nrnn_layers : {}"
+            "\ndropout : {}"
+            "\nn_epochs : {}"
+            "\nlr : {}"
+            "\nmetric : {}"
+            "\nbatch_size: {}"
+            "\nearly_stop : {}"
+            "\noptimizer : {}"
+            "\nloss_type : {}"
+            "\nvisible_GPU : {}"
+            "\nuse_GPU : {}"
+            "\nseed : {}".format(
+                fea_dim,
+                cnn_dim,
+                cnn_kernel_size,
+                rnn_dim,
+                rnn_dups,
+                rnn_layers,
+                dropout,
+                n_epochs,
+                lr,
+                metric,
+                batch_size,
+                early_stop,
+                optimizer.lower(),
+                loss,
+                GPU,
+                self.use_gpu,
+                seed,
+            )
+        )
+
+        if self.seed is not None:
+            np.random.seed(self.seed)
+            torch.manual_seed(self.seed)
+
+        self.krnn_model = KRNNModel(
+            fea_dim=self.fea_dim,
+            cnn_dim=self.cnn_dim,
+            cnn_kernel_size=self.cnn_kernel_size,
+            rnn_dim=self.rnn_dim,
+            rnn_dups=self.rnn_dups,
+            rnn_layers=self.rnn_layers,
+            dropout=self.dropout,
+            device=self.device,
+        )
+        if optimizer.lower() == "adam":
+            self.train_optimizer = optim.Adam(self.krnn_model.parameters(), lr=self.lr)
+        elif optimizer.lower() == "gd":
+            self.train_optimizer = optim.SGD(self.krnn_model.parameters(), lr=self.lr)
+        else:
+            raise NotImplementedError("optimizer {} is not supported!".format(optimizer))
+
+        self.fitted = False
+        self.krnn_model.to(self.device)
+
+    @property
+    def use_gpu(self):
+        return self.device != torch.device("cpu")
+
+    def mse(self, pred, label):
+        loss = (pred - label) ** 2
+        return torch.mean(loss)
+
+    def loss_fn(self, pred, label):
+        mask = ~torch.isnan(label)
+
+        if self.loss == "mse":
+            return self.mse(pred[mask], label[mask])
+
+        raise ValueError("unknown loss `%s`" % self.loss)
+
+    def metric_fn(self, pred, label):
+        mask = torch.isfinite(label)
+
+        if self.metric in ("", "loss"):
+            return -self.loss_fn(pred[mask], label[mask])
+
+        raise ValueError("unknown metric `%s`" % self.metric)
+
+    def get_daily_inter(self, df, shuffle=False):
+        # organize the train data into daily batches
+        daily_count = df.groupby(level=0).size().values
+        daily_index = np.roll(np.cumsum(daily_count), 1)
+        daily_index[0] = 0
+        if shuffle:
+            # shuffle data
+            daily_shuffle = list(zip(daily_index, daily_count))
+            np.random.shuffle(daily_shuffle)
+            daily_index, daily_count = zip(*daily_shuffle)
+        return daily_index, daily_count
+
+    def train_epoch(self, x_train, y_train):
+        x_train_values = x_train.values
+        y_train_values = np.squeeze(y_train.values)
+        self.krnn_model.train()
+
+        indices = np.arange(len(x_train_values))
+        np.random.shuffle(indices)
+
+        for i in range(len(indices))[:: self.batch_size]:
+            if len(indices) - i < self.batch_size:
+                break
+
+            feature = torch.from_numpy(x_train_values[indices[i : i + self.batch_size]]).float().to(self.device)
+            label = torch.from_numpy(y_train_values[indices[i : i + self.batch_size]]).float().to(self.device)
+
+            pred = self.krnn_model(feature)
+            loss = self.loss_fn(pred, label)
+
+            self.train_optimizer.zero_grad()
+            loss.backward()
+            torch.nn.utils.clip_grad_value_(self.krnn_model.parameters(), 3.0)
+            self.train_optimizer.step()
+
+    def test_epoch(self, data_x, data_y):
+        # prepare training data
+        x_values = data_x.values
+        y_values = np.squeeze(data_y.values)
+
+        self.krnn_model.eval()
+
+        scores = []
+        losses = []
+
+        indices = np.arange(len(x_values))
+
+        for i in range(len(indices))[:: self.batch_size]:
+            if len(indices) - i < self.batch_size:
+                break
+
+            feature = torch.from_numpy(x_values[indices[i : i + self.batch_size]]).float().to(self.device)
+            label = torch.from_numpy(y_values[indices[i : i + self.batch_size]]).float().to(self.device)
+
+            pred = self.krnn_model(feature)
+            loss = self.loss_fn(pred, label)
+            losses.append(loss.item())
+
+            score = self.metric_fn(pred, label)
+            scores.append(score.item())
+
+        return np.mean(losses), np.mean(scores)
+
+    def fit(
+        self,
+        dataset: DatasetH,
+        evals_result=dict(),
+        save_path=None,
+    ):
+        df_train, df_valid, df_test = dataset.prepare(
+            ["train", "valid", "test"],
+            col_set=["feature", "label"],
+            data_key=DataHandlerLP.DK_L,
+        )
+        if df_train.empty or df_valid.empty:
+            raise ValueError("Empty data from dataset, please check your dataset config.")
+
+        x_train, y_train = df_train["feature"], df_train["label"]
+        x_valid, y_valid = df_valid["feature"], df_valid["label"]
+
+        save_path = get_or_create_path(save_path)
+        stop_steps = 0
+        train_loss = 0
+        best_score = -np.inf
+        best_epoch = 0
+        evals_result["train"] = []
+        evals_result["valid"] = []
+
+        # train
+        self.logger.info("training...")
+        self.fitted = True
+
+        for step in range(self.n_epochs):
+            self.logger.info("Epoch%d:", step)
+            self.logger.info("training...")
+            self.train_epoch(x_train, y_train)
+            self.logger.info("evaluating...")
+            train_loss, train_score = self.test_epoch(x_train, y_train)
+            val_loss, val_score = self.test_epoch(x_valid, y_valid)
+            self.logger.info("train %.6f, valid %.6f" % (train_score, val_score))
+            evals_result["train"].append(train_score)
+            evals_result["valid"].append(val_score)
+
+            if val_score > best_score:
+                best_score = val_score
+                stop_steps = 0
+                best_epoch = step
+                best_param = copy.deepcopy(self.krnn_model.state_dict())
+            else:
+                stop_steps += 1
+                if stop_steps >= self.early_stop:
+                    self.logger.info("early stop")
+                    break
+
+        self.logger.info("best score: %.6lf @ %d" % (best_score, best_epoch))
+        self.krnn_model.load_state_dict(best_param)
+        torch.save(best_param, save_path)
+
+        if self.use_gpu:
+            torch.cuda.empty_cache()
+
+    def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
+        if not self.fitted:
+            raise ValueError("model is not fitted yet!")
+
+        x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
+        index = x_test.index
+        self.krnn_model.eval()
+        x_values = x_test.values
+        sample_num = x_values.shape[0]
+        preds = []
+
+        for begin in range(sample_num)[:: self.batch_size]:
+            if sample_num - begin < self.batch_size:
+                end = sample_num
+            else:
+                end = begin + self.batch_size
+            x_batch = torch.from_numpy(x_values[begin:end]).float().to(self.device)
+            with torch.no_grad():
+                pred = self.krnn_model(x_batch).detach().cpu().numpy()
+            preds.append(pred)
+
+        return pd.Series(np.concatenate(preds), index=index)
diff --git a/qlib/contrib/model/pytorch_localformer.py b/qlib/contrib/model/pytorch_localformer.py
index 6e7d91180..830bc59f0 100644
--- a/qlib/contrib/model/pytorch_localformer.py
+++ b/qlib/contrib/model/pytorch_localformer.py
@@ -46,7 +46,6 @@ class LocalformerModel(Model):
         seed=None,
         **kwargs
     ):
-
         # set hyper-parameters.
         self.d_model = d_model
         self.dropout = dropout
@@ -96,7 +95,6 @@ class LocalformerModel(Model):
         raise ValueError("unknown loss `%s`" % self.loss)
 
     def metric_fn(self, pred, label):
-
         mask = torch.isfinite(label)
 
         if self.metric in ("", "loss"):
@@ -105,7 +103,6 @@ class LocalformerModel(Model):
         raise ValueError("unknown metric `%s`" % self.metric)
 
     def train_epoch(self, x_train, y_train):
-
         x_train_values = x_train.values
         y_train_values = np.squeeze(y_train.values)
 
@@ -115,7 +112,6 @@ class LocalformerModel(Model):
         np.random.shuffle(indices)
 
         for i in range(len(indices))[:: self.batch_size]:
-
             if len(indices) - i < self.batch_size:
                 break
 
@@ -131,7 +127,6 @@ class LocalformerModel(Model):
             self.train_optimizer.step()
 
     def test_epoch(self, data_x, data_y):
-
         # prepare training data
         x_values = data_x.values
         y_values = np.squeeze(data_y.values)
@@ -144,7 +139,6 @@ class LocalformerModel(Model):
         indices = np.arange(len(x_values))
 
         for i in range(len(indices))[:: self.batch_size]:
-
             if len(indices) - i < self.batch_size:
                 break
 
@@ -167,7 +161,6 @@ class LocalformerModel(Model):
         evals_result=dict(),
         save_path=None,
     ):
-
         df_train, df_valid, df_test = dataset.prepare(
             ["train", "valid", "test"],
             col_set=["feature", "label"],
@@ -232,7 +225,6 @@ class LocalformerModel(Model):
         preds = []
 
         for begin in range(sample_num)[:: self.batch_size]:
-
             if sample_num - begin < self.batch_size:
                 end = sample_num
             else:
diff --git a/qlib/contrib/model/pytorch_localformer_ts.py b/qlib/contrib/model/pytorch_localformer_ts.py
index 18ef7f112..b05c2d311 100644
--- a/qlib/contrib/model/pytorch_localformer_ts.py
+++ b/qlib/contrib/model/pytorch_localformer_ts.py
@@ -44,7 +44,6 @@ class LocalformerModel(Model):
         seed=None,
         **kwargs
     ):
-
         # set hyper-parameters.
         self.d_model = d_model
         self.dropout = dropout
@@ -96,7 +95,6 @@ class LocalformerModel(Model):
         raise ValueError("unknown loss `%s`" % self.loss)
 
     def metric_fn(self, pred, label):
-
         mask = torch.isfinite(label)
 
         if self.metric in ("", "loss"):
@@ -105,7 +103,6 @@ class LocalformerModel(Model):
         raise ValueError("unknown metric `%s`" % self.metric)
 
     def train_epoch(self, data_loader):
-
         self.model.train()
 
         for data in data_loader:
@@ -121,14 +118,12 @@ class LocalformerModel(Model):
             self.train_optimizer.step()
 
     def test_epoch(self, data_loader):
-
         self.model.eval()
 
         scores = []
         losses = []
 
         for data in data_loader:
-
             feature = data[:, :, 0:-1].to(self.device)
             label = data[:, -1, -1].to(self.device)
 
@@ -148,7 +143,6 @@ class LocalformerModel(Model):
         evals_result=dict(),
         save_path=None,
     ):
-
         dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
         dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
         if dl_train.empty or dl_valid.empty:
diff --git a/qlib/contrib/model/pytorch_lstm.py b/qlib/contrib/model/pytorch_lstm.py
index a68cf5eac..168be6ca5 100755
--- a/qlib/contrib/model/pytorch_lstm.py
+++ b/qlib/contrib/model/pytorch_lstm.py
@@ -142,7 +142,6 @@ class LSTM(Model):
         raise ValueError("unknown loss `%s`" % self.loss)
 
     def metric_fn(self, pred, label):
-
         mask = torch.isfinite(label)
 
         if self.metric in ("", "loss"):
@@ -151,7 +150,6 @@ class LSTM(Model):
         raise ValueError("unknown metric `%s`" % self.metric)
 
     def train_epoch(self, x_train, y_train):
-
         x_train_values = x_train.values
         y_train_values = np.squeeze(y_train.values)
 
@@ -161,7 +159,6 @@ class LSTM(Model):
         np.random.shuffle(indices)
 
         for i in range(len(indices))[:: self.batch_size]:
-
             if len(indices) - i < self.batch_size:
                 break
 
@@ -177,7 +174,6 @@ class LSTM(Model):
             self.train_optimizer.step()
 
     def test_epoch(self, data_x, data_y):
-
         # prepare training data
         x_values = data_x.values
         y_values = np.squeeze(data_y.values)
@@ -190,7 +186,6 @@ class LSTM(Model):
         indices = np.arange(len(x_values))
 
         for i in range(len(indices))[:: self.batch_size]:
-
             if len(indices) - i < self.batch_size:
                 break
 
@@ -212,7 +207,6 @@ class LSTM(Model):
         evals_result=dict(),
         save_path=None,
     ):
-
         df_train, df_valid, df_test = dataset.prepare(
             ["train", "valid", "test"],
             col_set=["feature", "label"],
diff --git a/qlib/contrib/model/pytorch_lstm_ts.py b/qlib/contrib/model/pytorch_lstm_ts.py
index f1a3c55e8..8ecafc2d5 100755
--- a/qlib/contrib/model/pytorch_lstm_ts.py
+++ b/qlib/contrib/model/pytorch_lstm_ts.py
@@ -150,7 +150,6 @@ class LSTM(Model):
         raise ValueError("unknown loss `%s`" % self.loss)
 
     def metric_fn(self, pred, label):
-
         mask = torch.isfinite(label)
 
         if self.metric in ("", "loss"):
@@ -159,10 +158,9 @@ class LSTM(Model):
         raise ValueError("unknown metric `%s`" % self.metric)
 
     def train_epoch(self, data_loader):
-
         self.LSTM_model.train()
 
-        for (data, weight) in data_loader:
+        for data, weight in data_loader:
             feature = data[:, :, 0:-1].to(self.device)
             label = data[:, -1, -1].to(self.device)
 
@@ -175,14 +173,12 @@ class LSTM(Model):
             self.train_optimizer.step()
 
     def test_epoch(self, data_loader):
-
         self.LSTM_model.eval()
 
         scores = []
         losses = []
 
-        for (data, weight) in data_loader:
-
+        for data, weight in data_loader:
             feature = data[:, :, 0:-1].to(self.device)
             # feature[torch.isnan(feature)] = 0
             label = data[:, -1, -1].to(self.device)
@@ -288,7 +284,6 @@ class LSTM(Model):
         preds = []
 
         for data in test_loader:
-
             feature = data[:, :, 0:-1].to(self.device)
 
             with torch.no_grad():
diff --git a/qlib/contrib/model/pytorch_sandwich.py b/qlib/contrib/model/pytorch_sandwich.py
new file mode 100644
index 000000000..020c736fd
--- /dev/null
+++ b/qlib/contrib/model/pytorch_sandwich.py
@@ -0,0 +1,381 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import pandas as pd
+from typing import Text, Union
+import copy
+from ...utils import get_or_create_path
+from ...log import get_module_logger
+
+import torch
+import torch.nn as nn
+import torch.optim as optim
+
+from ...model.base import Model
+from ...data.dataset import DatasetH
+from ...data.dataset.handler import DataHandlerLP
+from .pytorch_krnn import CNNKRNNEncoder
+
+
+class SandwichModel(nn.Module):
+    def __init__(
+        self,
+        fea_dim,
+        cnn_dim_1,
+        cnn_dim_2,
+        cnn_kernel_size,
+        rnn_dim_1,
+        rnn_dim_2,
+        rnn_dups,
+        rnn_layers,
+        dropout,
+        device,
+        **params
+    ):
+        """Build a Sandwich model
+
+        Parameters
+        ----------
+        fea_dim : int
+            The feature dimension
+        cnn_dim_1 : int
+            The hidden dimension of the first CNN
+        cnn_dim_2 : int
+            The hidden dimension of the second CNN
+        cnn_kernel_size : int
+            The size of convolutional kernels
+        rnn_dim_1 : int
+            The hidden dimension of the first KRNN
+        rnn_dim_2 : int
+            The hidden dimension of the second KRNN
+        rnn_dups : int
+            The number of parallel duplicates
+        rnn_layers: int
+            The number of RNN layers
+        """
+        super().__init__()
+
+        self.first_encoder = CNNKRNNEncoder(
+            cnn_input_dim=fea_dim,
+            cnn_output_dim=cnn_dim_1,
+            cnn_kernel_size=cnn_kernel_size,
+            rnn_output_dim=rnn_dim_1,
+            rnn_dup_num=rnn_dups,
+            rnn_layers=rnn_layers,
+            dropout=dropout,
+            device=device,
+        )
+
+        self.second_encoder = CNNKRNNEncoder(
+            cnn_input_dim=rnn_dim_1,
+            cnn_output_dim=cnn_dim_2,
+            cnn_kernel_size=cnn_kernel_size,
+            rnn_output_dim=rnn_dim_2,
+            rnn_dup_num=rnn_dups,
+            rnn_layers=rnn_layers,
+            dropout=dropout,
+            device=device,
+        )
+
+        self.out_fc = nn.Linear(rnn_dim_2, 1)
+        self.device = device
+
+    def forward(self, x):
+        # x: [batch_size, node_num, seq_len, input_dim]
+        encode = self.first_encoder(x)
+        encode = self.second_encoder(encode)
+        out = self.out_fc(encode[:, -1, :]).squeeze().to(self.device)
+
+        return out
+
+
+class Sandwich(Model):
+    """Sandwich Model
+
+    Parameters
+    ----------
+    d_feat : int
+        input dimension for each time step
+    metric: str
+        the evaluation metric used in early stop
+    optimizer : str
+        optimizer name
+    GPU : str
+        the GPU ID(s) used for training
+    """
+
+    def __init__(
+        self,
+        fea_dim=6,
+        cnn_dim_1=64,
+        cnn_dim_2=32,
+        cnn_kernel_size=3,
+        rnn_dim_1=16,
+        rnn_dim_2=8,
+        rnn_dups=3,
+        rnn_layers=2,
+        dropout=0,
+        n_epochs=200,
+        lr=0.001,
+        metric="",
+        batch_size=2000,
+        early_stop=20,
+        loss="mse",
+        optimizer="adam",
+        GPU=0,
+        seed=None,
+        **kwargs
+    ):
+        # Set logger.
+        self.logger = get_module_logger("Sandwich")
+        self.logger.info("Sandwich pytorch version...")
+
+        # set hyper-parameters.
+        self.fea_dim = fea_dim
+        self.cnn_dim_1 = cnn_dim_1
+        self.cnn_dim_2 = cnn_dim_2
+        self.cnn_kernel_size = cnn_kernel_size
+        self.rnn_dim_1 = rnn_dim_1
+        self.rnn_dim_2 = rnn_dim_2
+        self.rnn_dups = rnn_dups
+        self.rnn_layers = rnn_layers
+        self.dropout = dropout
+        self.n_epochs = n_epochs
+        self.lr = lr
+        self.metric = metric
+        self.batch_size = batch_size
+        self.early_stop = early_stop
+        self.optimizer = optimizer.lower()
+        self.loss = loss
+        self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu")
+        self.seed = seed
+
+        self.logger.info(
+            "Sandwich parameters setting:"
+            "\nfea_dim : {}"
+            "\ncnn_dim_1 : {}"
+            "\ncnn_dim_2 : {}"
+            "\ncnn_kernel_size : {}"
+            "\nrnn_dim_1 : {}"
+            "\nrnn_dim_2 : {}"
+            "\nrnn_dups : {}"
+            "\nrnn_layers : {}"
+            "\ndropout : {}"
+            "\nn_epochs : {}"
+            "\nlr : {}"
+            "\nmetric : {}"
+            "\nbatch_size: {}"
+            "\nearly_stop : {}"
+            "\noptimizer : {}"
+            "\nloss_type : {}"
+            "\nvisible_GPU : {}"
+            "\nuse_GPU : {}"
+            "\nseed : {}".format(
+                fea_dim,
+                cnn_dim_1,
+                cnn_dim_2,
+                cnn_kernel_size,
+                rnn_dim_1,
+                rnn_dim_2,
+                rnn_dups,
+                rnn_layers,
+                dropout,
+                n_epochs,
+                lr,
+                metric,
+                batch_size,
+                early_stop,
+                optimizer.lower(),
+                loss,
+                GPU,
+                self.use_gpu,
+                seed,
+            )
+        )
+
+        if self.seed is not None:
+            np.random.seed(self.seed)
+            torch.manual_seed(self.seed)
+
+        self.sandwich_model = SandwichModel(
+            fea_dim=self.fea_dim,
+            cnn_dim_1=self.cnn_dim_1,
+            cnn_dim_2=self.cnn_dim_2,
+            cnn_kernel_size=self.cnn_kernel_size,
+            rnn_dim_1=self.rnn_dim_1,
+            rnn_dim_2=self.rnn_dim_2,
+            rnn_dups=self.rnn_dups,
+            rnn_layers=self.rnn_layers,
+            dropout=self.dropout,
+            device=self.device,
+        )
+        if optimizer.lower() == "adam":
+            self.train_optimizer = optim.Adam(self.sandwich_model.parameters(), lr=self.lr)
+        elif optimizer.lower() == "gd":
+            self.train_optimizer = optim.SGD(self.sandwich_model.parameters(), lr=self.lr)
+        else:
+            raise NotImplementedError("optimizer {} is not supported!".format(optimizer))
+
+        self.fitted = False
+        self.sandwich_model.to(self.device)
+
+    @property
+    def use_gpu(self):
+        return self.device != torch.device("cpu")
+
+    def mse(self, pred, label):
+        loss = (pred - label) ** 2
+        return torch.mean(loss)
+
+    def loss_fn(self, pred, label):
+        mask = ~torch.isnan(label)
+
+        if self.loss == "mse":
+            return self.mse(pred[mask], label[mask])
+
+        raise ValueError("unknown loss `%s`" % self.loss)
+
+    def metric_fn(self, pred, label):
+        mask = torch.isfinite(label)
+
+        if self.metric in ("", "loss"):
+            return -self.loss_fn(pred[mask], label[mask])
+
+        raise ValueError("unknown metric `%s`" % self.metric)
+
+    def train_epoch(self, x_train, y_train):
+        x_train_values = x_train.values
+        y_train_values = np.squeeze(y_train.values)
+        self.sandwich_model.train()
+
+        indices = np.arange(len(x_train_values))
+        np.random.shuffle(indices)
+
+        for i in range(len(indices))[:: self.batch_size]:
+            if len(indices) - i < self.batch_size:
+                break
+
+            feature = torch.from_numpy(x_train_values[indices[i : i + self.batch_size]]).float().to(self.device)
+            label = torch.from_numpy(y_train_values[indices[i : i + self.batch_size]]).float().to(self.device)
+
+            pred = self.sandwich_model(feature)
+            loss = self.loss_fn(pred, label)
+
+            self.train_optimizer.zero_grad()
+            loss.backward()
+            torch.nn.utils.clip_grad_value_(self.sandwich_model.parameters(), 3.0)
+            self.train_optimizer.step()
+
+    def test_epoch(self, data_x, data_y):
+        # prepare training data
+        x_values = data_x.values
+        y_values = np.squeeze(data_y.values)
+
+        self.sandwich_model.eval()
+
+        scores = []
+        losses = []
+
+        indices = np.arange(len(x_values))
+
+        for i in range(len(indices))[:: self.batch_size]:
+            if len(indices) - i < self.batch_size:
+                break
+
+            feature = torch.from_numpy(x_values[indices[i : i + self.batch_size]]).float().to(self.device)
+            label = torch.from_numpy(y_values[indices[i : i + self.batch_size]]).float().to(self.device)
+
+            pred = self.sandwich_model(feature)
+            loss = self.loss_fn(pred, label)
+            losses.append(loss.item())
+
+            score = self.metric_fn(pred, label)
+            scores.append(score.item())
+
+        return np.mean(losses), np.mean(scores)
+
+    def fit(
+        self,
+        dataset: DatasetH,
+        evals_result=dict(),
+        save_path=None,
+    ):
+        df_train, df_valid, df_test = dataset.prepare(
+            ["train", "valid", "test"],
+            col_set=["feature", "label"],
+            data_key=DataHandlerLP.DK_L,
+        )
+        if df_train.empty or df_valid.empty:
+            raise ValueError("Empty data from dataset, please check your dataset config.")
+
+        x_train, y_train = df_train["feature"], df_train["label"]
+        x_valid, y_valid = df_valid["feature"], df_valid["label"]
+
+        save_path = get_or_create_path(save_path)
+        stop_steps = 0
+        train_loss = 0
+        best_score = -np.inf
+        best_epoch = 0
+        evals_result["train"] = []
+        evals_result["valid"] = []
+
+        # train
+        self.logger.info("training...")
+        self.fitted = True
+
+        for step in range(self.n_epochs):
+            self.logger.info("Epoch%d:", step)
+            self.logger.info("training...")
+            self.train_epoch(x_train, y_train)
+            self.logger.info("evaluating...")
+            train_loss, train_score = self.test_epoch(x_train, y_train)
+            val_loss, val_score = self.test_epoch(x_valid, y_valid)
+            self.logger.info("train %.6f, valid %.6f" % (train_score, val_score))
+            evals_result["train"].append(train_score)
+            evals_result["valid"].append(val_score)
+
+            if val_score > best_score:
+                best_score = val_score
+                stop_steps = 0
+                best_epoch = step
+                best_param = copy.deepcopy(self.sandwich_model.state_dict())
+            else:
+                stop_steps += 1
+                if stop_steps >= self.early_stop:
+                    self.logger.info("early stop")
+                    break
+
+        self.logger.info("best score: %.6lf @ %d" % (best_score, best_epoch))
+        self.sandwich_model.load_state_dict(best_param)
+        torch.save(best_param, save_path)
+
+        if self.use_gpu:
+            torch.cuda.empty_cache()
+
+    def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
+        if not self.fitted:
+            raise ValueError("model is not fitted yet!")
+
+        x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
+        index = x_test.index
+        self.sandwich_model.eval()
+        x_values = x_test.values
+        sample_num = x_values.shape[0]
+        preds = []
+
+        for begin in range(sample_num)[:: self.batch_size]:
+            if sample_num - begin < self.batch_size:
+                end = sample_num
+            else:
+                end = begin + self.batch_size
+            x_batch = torch.from_numpy(x_values[begin:end]).float().to(self.device)
+            with torch.no_grad():
+                pred = self.sandwich_model(x_batch).detach().cpu().numpy()
+            preds.append(pred)
+
+        return pd.Series(np.concatenate(preds), index=index)
diff --git a/qlib/contrib/model/pytorch_sfm.py b/qlib/contrib/model/pytorch_sfm.py
index 29bae94a3..e79f475d6 100644
--- a/qlib/contrib/model/pytorch_sfm.py
+++ b/qlib/contrib/model/pytorch_sfm.py
@@ -306,7 +306,6 @@ class SFM(Model):
         return self.device != torch.device("cpu")
 
     def test_epoch(self, data_x, data_y):
-
         # prepare training data
         x_values = data_x.values
         y_values = np.squeeze(data_y.values)
@@ -319,7 +318,6 @@ class SFM(Model):
         indices = np.arange(len(x_values))
 
         for i in range(len(indices))[:: self.batch_size]:
-
             if len(indices) - i < self.batch_size:
                 break
 
@@ -336,7 +334,6 @@ class SFM(Model):
         return np.mean(losses), np.mean(scores)
 
     def train_epoch(self, x_train, y_train):
-
         x_train_values = x_train.values
         y_train_values = np.squeeze(y_train.values)
 
@@ -346,7 +343,6 @@ class SFM(Model):
         np.random.shuffle(indices)
 
         for i in range(len(indices))[:: self.batch_size]:
-
             if len(indices) - i < self.batch_size:
                 break
 
@@ -367,7 +363,6 @@ class SFM(Model):
         evals_result=dict(),
         save_path=None,
     ):
-
         df_train, df_valid = dataset.prepare(
             ["train", "valid"],
             col_set=["feature", "label"],
@@ -431,7 +426,6 @@ class SFM(Model):
         raise ValueError("unknown loss `%s`" % self.loss)
 
     def metric_fn(self, pred, label):
-
         mask = torch.isfinite(label)
 
         if self.metric in ("", "loss"):
diff --git a/qlib/contrib/model/pytorch_tabnet.py b/qlib/contrib/model/pytorch_tabnet.py
index adc7354fe..3c698edad 100644
--- a/qlib/contrib/model/pytorch_tabnet.py
+++ b/qlib/contrib/model/pytorch_tabnet.py
@@ -256,7 +256,6 @@ class TabnetModel(Model):
         indices = np.arange(len(x_values))
 
         for i in range(len(indices))[:: self.batch_size]:
-
             if len(indices) - i < self.batch_size:
                 break
             feature = x_values[indices[i : i + self.batch_size]].float().to(self.device)
@@ -283,7 +282,6 @@ class TabnetModel(Model):
         np.random.shuffle(indices)
 
         for i in range(len(indices))[:: self.batch_size]:
-
             if len(indices) - i < self.batch_size:
                 break
 
@@ -308,7 +306,6 @@ class TabnetModel(Model):
         self.tabnet_decoder.train()
 
         for i in range(len(indices))[:: self.batch_size]:
-
             if len(indices) - i < self.batch_size:
                 break
 
@@ -339,7 +336,6 @@ class TabnetModel(Model):
         losses = []
 
         for i in range(len(indices))[:: self.batch_size]:
-
             if len(indices) - i < self.batch_size:
                 break
 
diff --git a/qlib/contrib/model/pytorch_tcn.py b/qlib/contrib/model/pytorch_tcn.py
index 2af7a04ea..38e289342 100755
--- a/qlib/contrib/model/pytorch_tcn.py
+++ b/qlib/contrib/model/pytorch_tcn.py
@@ -154,7 +154,6 @@ class TCN(Model):
         raise ValueError("unknown loss `%s`" % self.loss)
 
     def metric_fn(self, pred, label):
-
         mask = torch.isfinite(label)
 
         if self.metric in ("", "loss"):
@@ -163,7 +162,6 @@ class TCN(Model):
         raise ValueError("unknown metric `%s`" % self.metric)
 
     def train_epoch(self, x_train, y_train):
-
         x_train_values = x_train.values
         y_train_values = np.squeeze(y_train.values)
 
@@ -173,7 +171,6 @@ class TCN(Model):
         np.random.shuffle(indices)
 
         for i in range(len(indices))[:: self.batch_size]:
-
             if len(indices) - i < self.batch_size:
                 break
 
@@ -200,7 +197,6 @@ class TCN(Model):
         indices = np.arange(len(x_values))
 
         for i in range(len(indices))[:: self.batch_size]:
-
             if len(indices) - i < self.batch_size:
                 break
 
@@ -223,7 +219,6 @@ class TCN(Model):
         evals_result=dict(),
         save_path=None,
     ):
-
         df_train, df_valid, df_test = dataset.prepare(
             ["train", "valid", "test"],
             col_set=["feature", "label"],
@@ -286,7 +281,6 @@ class TCN(Model):
         preds = []
 
         for begin in range(sample_num)[:: self.batch_size]:
-
             if sample_num - begin < self.batch_size:
                 end = sample_num
             else:
diff --git a/qlib/contrib/model/pytorch_tcn_ts.py b/qlib/contrib/model/pytorch_tcn_ts.py
index 4972a3065..605da62c4 100755
--- a/qlib/contrib/model/pytorch_tcn_ts.py
+++ b/qlib/contrib/model/pytorch_tcn_ts.py
@@ -155,7 +155,6 @@ class TCN(Model):
         raise ValueError("unknown loss `%s`" % self.loss)
 
     def metric_fn(self, pred, label):
-
         mask = torch.isfinite(label)
 
         if self.metric in ("", "loss"):
@@ -164,11 +163,11 @@ class TCN(Model):
         raise ValueError("unknown metric `%s`" % self.metric)
 
     def train_epoch(self, data_loader):
-
         self.TCN_model.train()
 
         for data in data_loader:
-            feature = data[:, :, 0:-1].to(self.device)
+            data = torch.transpose(data, 1, 2)
+            feature = data[:, 0:-1, :].to(self.device)
             label = data[:, -1, -1].to(self.device)
 
             pred = self.TCN_model(feature.float())
@@ -180,15 +179,14 @@ class TCN(Model):
             self.train_optimizer.step()
 
     def test_epoch(self, data_loader):
-
         self.TCN_model.eval()
 
         scores = []
         losses = []
 
         for data in data_loader:
-
-            feature = data[:, :, 0:-1].to(self.device)
+            data = torch.transpose(data, 1, 2)
+            feature = data[:, 0:-1, :].to(self.device)
             # feature[torch.isnan(feature)] = 0
             label = data[:, -1, -1].to(self.device)
 
@@ -276,7 +274,6 @@ class TCN(Model):
         preds = []
 
         for data in test_loader:
-
             feature = data[:, :, 0:-1].to(self.device)
 
             with torch.no_grad():
diff --git a/qlib/contrib/model/pytorch_tcts.py b/qlib/contrib/model/pytorch_tcts.py
index b46835cb6..651bd03d2 100644
--- a/qlib/contrib/model/pytorch_tcts.py
+++ b/qlib/contrib/model/pytorch_tcts.py
@@ -119,7 +119,6 @@ class TCTS(Model):
         )
 
     def loss_fn(self, pred, label, weight):
-
         if self.mode == "hard":
             loc = torch.argmax(weight, 1)
             loss = (pred - label[np.arange(weight.shape[0]), loc]) ** 2
@@ -157,7 +156,6 @@ class TCTS(Model):
 
         for i in range(self.steps):
             for i in range(len(indices))[:: self.batch_size]:
-
                 if len(indices) - i < self.batch_size:
                     break
 
@@ -191,7 +189,6 @@ class TCTS(Model):
 
         # fix forecasting model and valid weight model
         for i in range(len(indices))[:: self.batch_size]:
-
             if len(indices) - i < self.batch_size:
                 break
 
@@ -212,7 +209,6 @@ class TCTS(Model):
             self.weight_optimizer.step()
 
     def test_epoch(self, data_x, data_y):
-
         # prepare training data
         x_values = data_x.values
         y_values = np.squeeze(data_y.values)
@@ -224,7 +220,6 @@ class TCTS(Model):
         indices = np.arange(len(x_values))
 
         for i in range(len(indices))[:: self.batch_size]:
-
             if len(indices) - i < self.batch_size:
                 break
 
@@ -282,7 +277,6 @@ class TCTS(Model):
         verbose=True,
         save_path=None,
     ):
-
         self.fore_model = GRUModel(
             d_feat=self.d_feat,
             hidden_size=self.hidden_size,
@@ -366,7 +360,6 @@ class TCTS(Model):
         preds = []
 
         for begin in range(sample_num)[:: self.batch_size]:
-
             if sample_num - begin < self.batch_size:
                 end = sample_num
             else:
diff --git a/qlib/contrib/model/pytorch_tra.py b/qlib/contrib/model/pytorch_tra.py
index 46d362c68..964febf11 100644
--- a/qlib/contrib/model/pytorch_tra.py
+++ b/qlib/contrib/model/pytorch_tra.py
@@ -84,7 +84,6 @@ class TRAModel(Model):
         transport_method="none",
         memory_mode="sample",
     ):
-
         self.logger = get_module_logger("TRA")
 
         assert memory_mode in ["sample", "daily"], "invalid memory mode"
@@ -136,7 +135,6 @@ class TRAModel(Model):
         self._init_model()
 
     def _init_model(self):
-
         self.logger.info("init TRAModel...")
 
         self.model = eval(self.model_type)(**self.model_config).to(device)
@@ -176,7 +174,6 @@ class TRAModel(Model):
         self.global_step = -1
 
     def train_epoch(self, epoch, data_set, is_pretrain=False):
-
         self.model.train()
         self.tra.train()
         data_set.train()
@@ -274,7 +271,6 @@ class TRAModel(Model):
         return total_loss
 
     def test_epoch(self, epoch, data_set, return_pred=False, prefix="test", is_pretrain=False):
-
         self.model.eval()
         self.tra.eval()
         data_set.eval()
@@ -360,7 +356,6 @@ class TRAModel(Model):
         return metrics, preds, probs, P_all
 
     def _fit(self, train_set, valid_set, test_set, evals_result, is_pretrain=True):
-
         best_score = -1
         best_epoch = 0
         stop_rounds = 0
@@ -419,7 +414,6 @@ class TRAModel(Model):
         return best_score
 
     def fit(self, dataset, evals_result=dict()):
-
         assert isinstance(dataset, MTSDatasetH), "TRAModel only supports `qlib.contrib.data.dataset.MTSDatasetH`"
 
         train_set, valid_set, test_set = dataset.prepare(["train", "valid", "test"])
@@ -503,7 +497,6 @@ class TRAModel(Model):
                 json.dump(info, f)
 
     def predict(self, dataset, segment="test"):
-
         assert isinstance(dataset, MTSDatasetH), "TRAModel only supports `qlib.contrib.data.dataset.MTSDatasetH`"
 
         if not self.fitted:
@@ -571,7 +564,6 @@ class RNN(nn.Module):
             self.output_size = hidden_size
 
     def forward(self, x):
-
         if self.input_proj is not None:
             x = self.input_proj(x)
 
@@ -647,7 +639,6 @@ class Transformer(nn.Module):
         self.output_size = hidden_size
 
     def forward(self, x):
-
         x = x.permute(1, 0, 2).contiguous()  # the first dim need to be time
         x = self.pe(x)
 
@@ -713,7 +704,6 @@ class TRA(nn.Module):
             child.reset_parameters()
 
     def forward(self, hidden, hist_loss):
-
         preds = self.predictors(hidden)
 
         if self.num_states == 1:  # no need for router when having only one prediction
diff --git a/qlib/contrib/model/pytorch_transformer.py b/qlib/contrib/model/pytorch_transformer.py
index 66e5b2c4e..f4b7a06eb 100644
--- a/qlib/contrib/model/pytorch_transformer.py
+++ b/qlib/contrib/model/pytorch_transformer.py
@@ -45,7 +45,6 @@ class TransformerModel(Model):
         seed=None,
         **kwargs
     ):
-
         # set hyper-parameters.
         self.d_model = d_model
         self.dropout = dropout
@@ -95,7 +94,6 @@ class TransformerModel(Model):
         raise ValueError("unknown loss `%s`" % self.loss)
 
     def metric_fn(self, pred, label):
-
         mask = torch.isfinite(label)
 
         if self.metric in ("", "loss"):
@@ -104,7 +102,6 @@ class TransformerModel(Model):
         raise ValueError("unknown metric `%s`" % self.metric)
 
     def train_epoch(self, x_train, y_train):
-
         x_train_values = x_train.values
         y_train_values = np.squeeze(y_train.values)
 
@@ -114,7 +111,6 @@ class TransformerModel(Model):
         np.random.shuffle(indices)
 
         for i in range(len(indices))[:: self.batch_size]:
-
             if len(indices) - i < self.batch_size:
                 break
 
@@ -130,7 +126,6 @@ class TransformerModel(Model):
             self.train_optimizer.step()
 
     def test_epoch(self, data_x, data_y):
-
         # prepare training data
         x_values = data_x.values
         y_values = np.squeeze(data_y.values)
@@ -143,7 +138,6 @@ class TransformerModel(Model):
         indices = np.arange(len(x_values))
 
         for i in range(len(indices))[:: self.batch_size]:
-
             if len(indices) - i < self.batch_size:
                 break
 
@@ -166,7 +160,6 @@ class TransformerModel(Model):
         evals_result=dict(),
         save_path=None,
     ):
-
         df_train, df_valid, df_test = dataset.prepare(
             ["train", "valid", "test"],
             col_set=["feature", "label"],
@@ -231,7 +224,6 @@ class TransformerModel(Model):
         preds = []
 
         for begin in range(sample_num)[:: self.batch_size]:
-
             if sample_num - begin < self.batch_size:
                 end = sample_num
             else:
diff --git a/qlib/contrib/model/pytorch_transformer_ts.py b/qlib/contrib/model/pytorch_transformer_ts.py
index 6cffded9c..84b093805 100644
--- a/qlib/contrib/model/pytorch_transformer_ts.py
+++ b/qlib/contrib/model/pytorch_transformer_ts.py
@@ -43,7 +43,6 @@ class TransformerModel(Model):
         seed=None,
         **kwargs
     ):
-
         # set hyper-parameters.
         self.d_model = d_model
         self.dropout = dropout
@@ -93,7 +92,6 @@ class TransformerModel(Model):
         raise ValueError("unknown loss `%s`" % self.loss)
 
     def metric_fn(self, pred, label):
-
         mask = torch.isfinite(label)
 
         if self.metric in ("", "loss"):
@@ -102,7 +100,6 @@ class TransformerModel(Model):
         raise ValueError("unknown metric `%s`" % self.metric)
 
     def train_epoch(self, data_loader):
-
         self.model.train()
 
         for data in data_loader:
@@ -118,14 +115,12 @@ class TransformerModel(Model):
             self.train_optimizer.step()
 
     def test_epoch(self, data_loader):
-
         self.model.eval()
 
         scores = []
         losses = []
 
         for data in data_loader:
-
             feature = data[:, :, 0:-1].to(self.device)
             label = data[:, -1, -1].to(self.device)
 
@@ -145,7 +140,6 @@ class TransformerModel(Model):
         evals_result=dict(),
         save_path=None,
     ):
-
         dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
         dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
 
diff --git a/qlib/contrib/model/xgboost.py b/qlib/contrib/model/xgboost.py
index d38655ebd..67bedafa8 100755
--- a/qlib/contrib/model/xgboost.py
+++ b/qlib/contrib/model/xgboost.py
@@ -30,7 +30,6 @@ class XGBModel(Model, FeatureInt):
         reweighter=None,
         **kwargs
     ):
-
         df_train, df_valid = dataset.prepare(
             ["train", "valid"],
             col_set=["feature", "label"],
diff --git a/qlib/contrib/report/data/ana.py b/qlib/contrib/report/data/ana.py
index 782a92d5a..567ef311d 100644
--- a/qlib/contrib/report/data/ana.py
+++ b/qlib/contrib/report/data/ana.py
@@ -30,7 +30,6 @@ class CombFeaAna(FeaAnalyser):
         """The statistics of features are finished in the underlying analysers"""
 
     def plot_all(self, *args, **kwargs):
-
         ax_gen = iter(sub_fig_generator(row_n=len(self._fea_ana_l), *args, **kwargs))
 
         for col in self._dataset:
diff --git a/qlib/contrib/report/data/base.py b/qlib/contrib/report/data/base.py
index 1e7e092af..a91eda48e 100644
--- a/qlib/contrib/report/data/base.py
+++ b/qlib/contrib/report/data/base.py
@@ -28,7 +28,6 @@ class FeaAnalyser:
         return False
 
     def plot_all(self, *args, **kwargs):
-
         ax_gen = iter(sub_fig_generator(*args, **kwargs))
         for col in self._dataset:
             if not self.skip(col):
diff --git a/qlib/contrib/report/graph.py b/qlib/contrib/report/graph.py
index c5f932978..f9cf517ea 100644
--- a/qlib/contrib/report/graph.py
+++ b/qlib/contrib/report/graph.py
@@ -15,7 +15,6 @@ from plotly.figure_factory import create_distplot
 
 
 class BaseGraph:
-
     _name = None
 
     def __init__(
diff --git a/qlib/contrib/rolling/__init__.py b/qlib/contrib/rolling/__init__.py
new file mode 100644
index 000000000..b940486fd
--- /dev/null
+++ b/qlib/contrib/rolling/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+"""
+The difference between me and the scripts in examples/benchmarks/benchmarks_dynamic
+- This module only focus provide a general rolling implementation.
+  Anything specific that benchmark is placed in examples/benchmarks/benchmarks_dynamic
+"""
diff --git a/qlib/contrib/rolling/__main__.py b/qlib/contrib/rolling/__main__.py
new file mode 100644
index 000000000..461c0e777
--- /dev/null
+++ b/qlib/contrib/rolling/__main__.py
@@ -0,0 +1,16 @@
+import fire
+from qlib import auto_init
+from qlib.contrib.rolling.base import Rolling
+from qlib.utils.mod import find_all_classes
+
+if __name__ == "__main__":
+    sub_commands = {}
+    for cls in find_all_classes("qlib.contrib.rolling", Rolling):
+        sub_commands[cls.__module__.split(".")[-1]] = cls
+    # The sub_commands will be like
+    # {'base': <class 'qlib.contrib.rolling.base.Rolling'>, ...}
+    # So the you can run it with commands like command below
+    # - `python -m qlib.contrib.rolling base --conf_path <path to the yaml> run`
+    # - base can be replace with other module names
+    auto_init()
+    fire.Fire(sub_commands)
diff --git a/qlib/contrib/rolling/base.py b/qlib/contrib/rolling/base.py
new file mode 100644
index 000000000..d179efb38
--- /dev/null
+++ b/qlib/contrib/rolling/base.py
@@ -0,0 +1,246 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+from copy import deepcopy
+from pathlib import Path
+from typing import List, Optional, Union
+
+import fire
+import pandas as pd
+import yaml
+
+from qlib import auto_init
+from qlib.log import get_module_logger
+from qlib.model.ens.ensemble import RollingEnsemble
+from qlib.model.trainer import TrainerR
+from qlib.utils import get_cls_kwargs, init_instance_by_config
+from qlib.utils.data import update_config
+from qlib.workflow import R
+from qlib.workflow.record_temp import SignalRecord
+from qlib.workflow.task.collect import RecorderCollector
+from qlib.workflow.task.gen import RollingGen, task_generator
+from qlib.workflow.task.utils import replace_task_handler_with_cache
+
+
+class Rolling:
+    """
+    The motivation of Rolling Module
+    - It only focus **offlinely** turn a specific task to rollinng
+    - To make the implementation easier, following factors are ignored.
+        - The tasks is dependent (e.g. time series).
+
+    Related modules and difference from me:
+    - MetaController: It is learning how to handle a task (e.g. learning to learn).
+        - But rolling is about how to split a single task into tasks in time series and run them.
+    - OnlineStrategy: It is focusing on serving a model, the model can be updated time dependently in time.
+        - Rolling is much simpler and is only for testing rolling models offline. It does not want to share the interface with OnlineStrategy.
+
+    The code about rolling is shared in `task_generator` & `RollingGen` level between me and the above modules
+    But it is for different purpose, so other parts are not shared.
+
+
+    .. code-block:: shell
+
+        # here is an typical use case of the module.
+        python -m qlib.contrib.rolling.base --conf_path <path to the yaml> run
+
+    **NOTE**
+    before running the example, please clean your previous results with following command
+    - `rm -r mlruns`
+    - Because it is very hard to permanently delete a experiment (it will be moved into .trash and raise error when creating experiment with same name).
+
+    """
+
+    def __init__(
+        self,
+        conf_path: Union[str, Path],
+        exp_name: Optional[str] = None,
+        horizon: Optional[int] = 20,
+        step: int = 20,
+        h_path: Optional[str] = None,
+        train_start: Optional[str] = None,
+        test_end: Optional[str] = None,
+        task_ext_conf: Optional[dict] = None,
+        rolling_exp: Optional[str] = None,
+    ) -> None:
+        """
+        Parameters
+        ----------
+        conf_path : str
+            Path to the config for rolling.
+        exp_name : Optional[str]
+            The exp name of the outputs (Output is a record which contains the concatenated predictions of rolling records).
+        horizon: Optional[int] = 20,
+            The horizon of the prediction target.
+            This is used to override the prediction horizon of the file.
+        h_path : Optional[str]
+            the dumped data handler;
+            It may come from other data source. It will override the data handler in the config.
+        test_end : Optional[str]
+            the test end for the data. It is typically used together with the handler
+            You can do the same thing with task_ext_conf in a more complicated way
+        train_start : Optional[str]
+            the train start for the data.  It is typically used together with the handler.
+            You can do the same thing with task_ext_conf in a more complicated way
+        task_ext_conf : Optional[dict]
+            some option to update the task config.
+        rolling_exp : Optional[str]
+            The name for the experiments for rolling.
+            It will contains a lot of record in an experiment. Each record corresponds to a specific rolling.
+            Please note that it is different from the final experiments
+        """
+        self.logger = get_module_logger("Rolling")
+        self.conf_path = Path(conf_path)
+        self.exp_name = exp_name
+        self._rid = None  # the final combined recorder id in `exp_name`
+
+        self.step = step
+        assert horizon is not None, "Current version does not support extracting horizon from the underlying dataset"
+        self.horizon = horizon
+        if rolling_exp is None:
+            datetime_suffix = pd.Timestamp.now().strftime("%Y%m%d%H%M%S")
+            self.rolling_exp = f"rolling_models_{datetime_suffix}"
+        else:
+            self.rolling_exp = rolling_exp
+            self.logger.warning(
+                "Using user specifiied name for rolling models. So the experiment names duplicateds. "
+                "Please manually remove your experiment for rolling model with command like `rm -r mlruns`."
+                " Otherwise it will prevents the creating of experimen with same name"
+            )
+        self.train_start = train_start
+        self.test_end = test_end
+        self.task_ext_conf = task_ext_conf
+        self.h_path = h_path
+
+        # FIXME:
+        # - the qlib_init section will be ignored by me.
+        # - So we have to design a priority mechanism to solve this issue.
+
+    def _raw_conf(self) -> dict:
+        with self.conf_path.open("r") as f:
+            return yaml.safe_load(f)
+
+    def _replace_hanler_with_cache(self, task: dict):
+        """
+        Due to the data processing part in original rolling is slow. So we have to
+        This class tries to add more feature
+        """
+        if self.h_path is not None:
+            h_path = Path(self.h_path)
+            task["dataset"]["kwargs"]["handler"] = f"file://{h_path}"
+        else:
+            task = replace_task_handler_with_cache(task, self.conf_path.parent)
+        return task
+
+    def _update_start_end_time(self, task: dict):
+        if self.train_start is not None:
+            seg = task["dataset"]["kwargs"]["segments"]["train"]
+            task["dataset"]["kwargs"]["segments"]["train"] = pd.Timestamp(self.train_start), seg[1]
+
+        if self.test_end is not None:
+            seg = task["dataset"]["kwargs"]["segments"]["test"]
+            task["dataset"]["kwargs"]["segments"]["test"] = seg[0], pd.Timestamp(self.test_end)
+        return task
+
+    def basic_task(self, enable_handler_cache: Optional[bool] = True):
+        """
+        The basic task may not be the exactly same as the config from `conf_path` from __init__ due to
+        - some parameters could be overriding by some parameters from __init__
+        - user could implementing sublcass to change it for higher performance
+        """
+        task: dict = self._raw_conf()["task"]
+        task = deepcopy(task)
+
+        # modify dataset horizon
+        # NOTE:
+        # It assumpts that the label can be modifiled in the handler's kwargs
+        # But is not always a valid. It is only valid in the predefined dataset `Alpha158` & `Alpha360`
+        if self.horizon is None:
+            # TODO:
+            # - get horizon automatically from the expression!!!!
+            raise NotImplementedError(f"This type of input is not supported")
+        else:
+            self.logger.info("The prediction horizon is overrided")
+            task["dataset"]["kwargs"]["handler"]["kwargs"]["label"] = [
+                "Ref($close, -{}) / Ref($close, -1) - 1".format(self.horizon + 1)
+            ]
+
+        if enable_handler_cache:
+            task = self._replace_hanler_with_cache(task)
+        task = self._update_start_end_time(task)
+
+        if self.task_ext_conf is not None:
+            task = update_config(task, self.task_ext_conf)
+        self.logger.info(task)
+        return task
+
+    def get_task_list(self) -> List[dict]:
+        """return a batch of tasks for rolling."""
+        task = self.basic_task()
+        task_l = task_generator(
+            task, RollingGen(step=self.step, trunc_days=self.horizon + 1)
+        )  # the last two days should be truncated to avoid information leakage
+        for t in task_l:
+            # when we rolling tasks. No further analyis is needed.
+            # analyis are postponed to the final ensemble.
+            t["record"] = ["qlib.workflow.record_temp.SignalRecord"]
+        return task_l
+
+    def _train_rolling_tasks(self):
+        task_l = self.get_task_list()
+        self.logger.info("Deleting previous Rolling results")
+        try:
+            # TODO: mlflow does not support permanently delete experiment
+            # it will  be moved to .trash and prevents creating the experiments with the same name
+            R.delete_exp(experiment_name=self.rolling_exp)  # We should remove the rolling experiments.
+        except ValueError:
+            self.logger.info("No previous rolling results")
+        trainer = TrainerR(experiment_name=self.rolling_exp)
+        trainer(task_l)
+
+    def _ens_rolling(self):
+        rc = RecorderCollector(
+            experiment=self.rolling_exp,
+            artifacts_key=["pred", "label"],
+            process_list=[RollingEnsemble()],
+            # rec_key_func=lambda rec: (self.COMB_EXP, rec.info["id"]),
+            artifacts_path={"pred": "pred.pkl", "label": "label.pkl"},
+        )
+        res = rc()
+        with R.start(experiment_name=self.exp_name):
+            R.log_params(exp_name=self.rolling_exp)
+            R.save_objects(**{"pred.pkl": res["pred"], "label.pkl": res["label"]})
+            self._rid = R.get_recorder().id
+
+    def _update_rolling_rec(self):
+        """
+        Evaluate the combined rolling results
+        """
+        rec = R.get_recorder(experiment_name=self.exp_name, recorder_id=self._rid)
+        # Follow the original analyser
+        records = self._raw_conf()["task"].get("record", [])
+        if isinstance(records, dict):  # prevent only one dict
+            records = [records]
+        for record in records:
+            if issubclass(get_cls_kwargs(record)[0], SignalRecord):
+                # skip the signal record.
+                continue
+            r = init_instance_by_config(
+                record,
+                recorder=rec,
+                default_module="qlib.workflow.record_temp",
+            )
+            r.generate()
+        print(f"Your evaluation results can be found in the experiment named `{self.exp_name}`.")
+
+    def run(self):
+        # the results will be  save in mlruns.
+        # 1) each rolling task is saved in rolling_models
+        self._train_rolling_tasks()
+        # 2) combined rolling tasks and evaluation results are saved in rolling
+        self._ens_rolling()
+        self._update_rolling_rec()
+
+
+if __name__ == "__main__":
+    auto_init()
+    fire.Fire(Rolling)
diff --git a/qlib/contrib/rolling/ddgda.py b/qlib/contrib/rolling/ddgda.py
new file mode 100644
index 000000000..25fb4c36e
--- /dev/null
+++ b/qlib/contrib/rolling/ddgda.py
@@ -0,0 +1,343 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+from pathlib import Path
+import pickle
+from typing import Optional, Union
+
+import pandas as pd
+import yaml
+
+from qlib.contrib.meta.data_selection.dataset import InternalData, MetaDatasetDS
+from qlib.contrib.meta.data_selection.model import MetaModelDS
+from qlib.data.dataset.handler import DataHandlerLP
+from qlib.model.meta.task import MetaTask
+from qlib.model.trainer import TrainerR
+from qlib.typehint import Literal
+from qlib.utils import init_instance_by_config
+from qlib.workflow import R
+from qlib.workflow.task.utils import replace_task_handler_with_cache
+
+from .base import Rolling
+
+# LGBM is designed for feature importance & similarity
+LGBM_MODEL = """
+class: LGBModel
+module_path: qlib.contrib.model.gbdt
+kwargs:
+    loss: mse
+    colsample_bytree: 0.8879
+    learning_rate: 0.2
+    subsample: 0.8789
+    lambda_l1: 205.6999
+    lambda_l2: 580.9768
+    max_depth: 8
+    num_leaves: 210
+    num_threads: 20
+"""
+# covnert the yaml to dict
+LGBM_MODEL = yaml.load(LGBM_MODEL, Loader=yaml.FullLoader)
+
+LINEAR_MODEL = """
+class: LinearModel
+module_path: qlib.contrib.model.linear
+kwargs:
+    estimator: ridge
+    alpha: 0.05
+"""
+LINEAR_MODEL = yaml.load(LINEAR_MODEL, Loader=yaml.FullLoader)
+
+PROC_ARGS = """
+infer_processors:
+    - class: RobustZScoreNorm
+      kwargs:
+          fields_group: feature
+          clip_outlier: true
+    - class: Fillna
+      kwargs:
+          fields_group: feature
+learn_processors:
+    - class: DropnaLabel
+    - class: CSRankNorm
+      kwargs:
+          fields_group: label
+"""
+PROC_ARGS = yaml.load(PROC_ARGS, Loader=yaml.FullLoader)
+
+UTIL_MODEL_TYPE = Literal["linear", "gbdt"]
+
+
+class DDGDA(Rolling):
+    """
+    It is a rolling based on DDG-DA
+
+    **NOTE**
+    before running the example, please clean your previous results with following command
+    - `rm -r mlruns`
+    """
+
+    def __init__(
+        self,
+        sim_task_model: UTIL_MODEL_TYPE = "gbdt",
+        meta_1st_train_end: Optional[str] = None,
+        alpha: float = 0.01,
+        working_dir: Optional[Union[str, Path]] = None,
+        **kwargs,
+    ):
+        """
+
+        Parameters
+        ----------
+        sim_task_model: Literal["linear", "gbdt"] = "gbdt",
+            The model for calculating similarity between data.
+        meta_1st_train_end: Optional[str]
+            the datetime of training end of the first meta_task
+        alpha: float
+            Setting the L2 regularization for ridge
+            The `alpha` is only passed to MetaModelDS (it is not passed to sim_task_model currently..)
+        """
+        # NOTE:
+        # the horizon must match the meaning in the base task template
+        self.meta_exp_name = "DDG-DA"
+        self.sim_task_model: UTIL_MODEL_TYPE = sim_task_model  # The model to capture the distribution of data.
+        self.alpha = alpha
+        self.meta_1st_train_end = meta_1st_train_end
+        super().__init__(**kwargs)
+        self.working_dir = self.conf_path.parent if working_dir is None else Path(working_dir)
+        self.proxy_hd = self.working_dir / "handler_proxy.pkl"
+
+    def _adjust_task(self, task: dict, astype: UTIL_MODEL_TYPE):
+        """
+        some task are use for special purpose.
+        For example:
+        - GBDT for calculating feature importance
+        - Linear or GBDT for calculating similarity
+        - Datset (well processed) that aligned to Linear that for meta learning
+        """
+        # NOTE: here is just for aligning with previous implementation
+        # It is not necessary for the current implementation
+        handler = task["dataset"].setdefault("kwargs", {}).setdefault("handler", {})
+        if astype == "gbdt":
+            task["model"] = LGBM_MODEL
+            if isinstance(handler, dict):
+                for k in ["infer_processors", "learn_processors"]:
+                    if k in handler.setdefault("kwargs", {}):
+                        handler["kwargs"].pop(k)
+        elif astype == "linear":
+            task["model"] = LINEAR_MODEL
+            handler["kwargs"].update(PROC_ARGS)
+        else:
+            raise ValueError(f"astype not supported: {astype}")
+        return task
+
+    def _get_feature_importance(self):
+        # this must be lightGBM, because it needs to get the feature importance
+        task = self.basic_task(enable_handler_cache=False)
+        task = self._adjust_task(task, astype="gbdt")
+        task = replace_task_handler_with_cache(task, self.working_dir)
+
+        with R.start(experiment_name="feature_importance"):
+            model = init_instance_by_config(task["model"])
+            dataset = init_instance_by_config(task["dataset"])
+            model.fit(dataset)
+
+        fi = model.get_feature_importance()
+        # Because the model use numpy instead of dataframe for training lightgbm
+        # So the we must use following extra steps to get the right feature importance
+        df = dataset.prepare(segments=slice(None), col_set="feature", data_key=DataHandlerLP.DK_R)
+        cols = df.columns
+        fi_named = {cols[int(k.split("_")[1])]: imp for k, imp in fi.to_dict().items()}
+
+        return pd.Series(fi_named)
+
+    def _dump_data_for_proxy_model(self):
+        """
+        Dump data for training meta model.
+        The meta model will be trained upon the proxy forecasting model.
+        This dataset is for the proxy forecasting model.
+        """
+        topk = 30
+        fi = self._get_feature_importance()
+        col_selected = fi.nlargest(topk)
+        # NOTE: adjusting to `self.sim_task_model` just for aligning with previous implementation.
+        task = self._adjust_task(self.basic_task(enable_handler_cache=False), self.sim_task_model)
+        task = replace_task_handler_with_cache(task, self.working_dir)
+
+        dataset = init_instance_by_config(task["dataset"])
+        prep_ds = dataset.prepare(slice(None), col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
+
+        feature_df = prep_ds["feature"]
+        label_df = prep_ds["label"]
+
+        feature_selected = feature_df.loc[:, col_selected.index]
+
+        feature_selected = feature_selected.groupby("datetime", group_keys=False).apply(
+            lambda df: (df - df.mean()).div(df.std())
+        )
+        feature_selected = feature_selected.fillna(0.0)
+
+        df_all = {
+            "label": label_df.reindex(feature_selected.index),
+            "feature": feature_selected,
+        }
+        df_all = pd.concat(df_all, axis=1)
+        df_all.to_pickle(self.working_dir / "fea_label_df.pkl")
+
+        # dump data in handler format for aligning the interface
+        handler = DataHandlerLP(
+            data_loader={
+                "class": "qlib.data.dataset.loader.StaticDataLoader",
+                "kwargs": {"config": self.working_dir / "fea_label_df.pkl"},
+            }
+        )
+        handler.to_pickle(self.working_dir / self.proxy_hd, dump_all=True)
+
+    @property
+    def _internal_data_path(self):
+        return self.working_dir / f"internal_data_s{self.step}.pkl"
+
+    def _dump_meta_ipt(self):
+        """
+        Dump data for training meta model.
+        This function will dump the input data for meta model
+        """
+        # According to the experiments, the choice of the model type is very important for achieving good results
+        sim_task = self._adjust_task(self.basic_task(enable_handler_cache=False), astype=self.sim_task_model)
+        sim_task = replace_task_handler_with_cache(sim_task, self.working_dir)
+
+        if self.sim_task_model == "gbdt":
+            sim_task["model"].setdefault("kwargs", {}).update({"early_stopping_rounds": None, "num_boost_round": 150})
+
+        exp_name_sim = f"data_sim_s{self.step}"
+
+        internal_data = InternalData(sim_task, self.step, exp_name=exp_name_sim)
+        internal_data.setup(trainer=TrainerR)
+
+        with self._internal_data_path.open("wb") as f:
+            pickle.dump(internal_data, f)
+
+    def _train_meta_model(self, fill_method="max"):
+        """
+        training a meta model based on a simplified linear proxy model;
+        """
+
+        # 1) leverage the simplified proxy forecasting model to train meta model.
+        # - Only the dataset part is important, in current version of meta model will integrate the
+
+        # the train_start for training meta model does not necessarily align with final rolling
+        train_start = "2008-01-01" if self.train_start is None else self.train_start
+        train_end = "2010-12-31" if self.meta_1st_train_end is None else self.meta_1st_train_end
+        test_start = (pd.Timestamp(train_end) + pd.Timedelta(days=1)).strftime("%Y-%m-%d")
+        proxy_forecast_model_task = {
+            # "model": "qlib.contrib.model.linear.LinearModel",
+            "dataset": {
+                "class": "qlib.data.dataset.DatasetH",
+                "kwargs": {
+                    "handler": f"file://{(self.working_dir / self.proxy_hd).absolute()}",
+                    "segments": {
+                        "train": (train_start, train_end),
+                        "test": (test_start, self.basic_task()["dataset"]["kwargs"]["segments"]["test"][1]),
+                    },
+                },
+            },
+            # "record": ["qlib.workflow.record_temp.SignalRecord"]
+        }
+        # the proxy_forecast_model_task will be used to create meta tasks.
+        # The test date of first task will be 2011-01-01. Each test segment will be about 20days
+        # The tasks include all training tasks and test tasks.
+
+        # 2) preparing meta dataset
+        kwargs = dict(
+            task_tpl=proxy_forecast_model_task,
+            step=self.step,
+            segments=0.62,  # keep test period consistent with the dataset yaml
+            trunc_days=1 + self.horizon,
+            hist_step_n=30,
+            fill_method=fill_method,
+            rolling_ext_days=0,
+        )
+        # NOTE:
+        # the input of meta model (internal data) are shared between proxy model and final forecasting model
+        # but their task test segment are not aligned! It worked in my previous experiment.
+        # So the misalignment will not affect the effectiveness of the method.
+        with self._internal_data_path.open("rb") as f:
+            internal_data = pickle.load(f)
+
+        md = MetaDatasetDS(exp_name=internal_data, **kwargs)
+
+        # 3) train and logging meta model
+        with R.start(experiment_name=self.meta_exp_name):
+            R.log_params(**kwargs)
+            mm = MetaModelDS(
+                step=self.step, hist_step_n=kwargs["hist_step_n"], lr=0.001, max_epoch=30, seed=43, alpha=self.alpha
+            )
+            mm.fit(md)
+            R.save_objects(model=mm)
+
+    @property
+    def _task_path(self):
+        return self.working_dir / f"tasks_s{self.step}.pkl"
+
+    def get_task_list(self):
+        """
+        Leverage meta-model for inference:
+        - Given
+            - baseline tasks
+            - input for meta model(internal data)
+            - meta model (its learnt knowledge on proxy forecasting model is expected to transfer to normal forecasting model)
+        """
+        # 1) get meta model
+        exp = R.get_exp(experiment_name=self.meta_exp_name)
+        rec = exp.list_recorders(rtype=exp.RT_L)[0]
+        meta_model: MetaModelDS = rec.load_object("model")
+
+        # 2)
+        # we are transfer to knowledge of meta model to final forecasting tasks.
+        # Create MetaTaskDataset for the final forecasting tasks
+        # Aligning the setting of it to the MetaTaskDataset when training Meta model is necessary
+
+        # 2.1) get previous config
+        param = rec.list_params()
+        trunc_days = int(param["trunc_days"])
+        step = int(param["step"])
+        hist_step_n = int(param["hist_step_n"])
+        fill_method = param.get("fill_method", "max")
+
+        task_l = super().get_task_list()
+
+        # 2.2) create meta dataset for final dataset
+        kwargs = dict(
+            task_tpl=task_l,
+            step=step,
+            segments=0.0,  # all the tasks are for testing
+            trunc_days=trunc_days,
+            hist_step_n=hist_step_n,
+            fill_method=fill_method,
+            task_mode=MetaTask.PROC_MODE_TRANSFER,
+        )
+
+        with self._internal_data_path.open("rb") as f:
+            internal_data = pickle.load(f)
+        mds = MetaDatasetDS(exp_name=internal_data, **kwargs)
+
+        # 3) meta model make inference and get new qlib task
+        new_tasks = meta_model.inference(mds)
+        with self._task_path.open("wb") as f:
+            pickle.dump(new_tasks, f)
+        return new_tasks
+
+    def run(self):
+        # prepare the meta model for rolling ---------
+        # 1) file: handler_proxy.pkl (self.proxy_hd)
+        self._dump_data_for_proxy_model()
+        # 2)
+        # file: internal_data_s20.pkl
+        # mlflow: data_sim_s20, models for calculating meta_ipt
+        self._dump_meta_ipt()
+        # 3) meta model will be stored in `DDG-DA`
+        self._train_meta_model()
+
+        # Run rolling --------------------------------
+        # 4) new_tasks are saved in "tasks_s20.pkl" (reweighter is added)
+        # - the meta inference are done when calling `get_task_list`
+        # 5) load the saved tasks and train model
+        super().run()
diff --git a/qlib/contrib/strategy/optimizer/optimizer.py b/qlib/contrib/strategy/optimizer/optimizer.py
index a70929e27..a5fb76312 100644
--- a/qlib/contrib/strategy/optimizer/optimizer.py
+++ b/qlib/contrib/strategy/optimizer/optimizer.py
@@ -112,7 +112,6 @@ class PortfolioOptimizer(BaseOptimizer):
         return w
 
     def _optimize(self, S: np.ndarray, r: Optional[np.ndarray] = None, w0: Optional[np.ndarray] = None) -> np.ndarray:
-
         # inverse volatility
         if self.method == self.OPT_INV:
             if r is not None:
diff --git a/qlib/contrib/strategy/rule_strategy.py b/qlib/contrib/strategy/rule_strategy.py
index 30facf3a3..f2b919739 100644
--- a/qlib/contrib/strategy/rule_strategy.py
+++ b/qlib/contrib/strategy/rule_strategy.py
@@ -522,7 +522,6 @@ class ACStrategy(BaseStrategy):
             _order_amount = min(_order_amount, self.trade_amount[order.stock_id])
 
             if _order_amount > 1e-5:
-
                 _order = Order(
                     stock_id=order.stock_id,
                     amount=_order_amount,
diff --git a/qlib/contrib/strategy/signal_strategy.py b/qlib/contrib/strategy/signal_strategy.py
index cb94017cd..9ba960eeb 100644
--- a/qlib/contrib/strategy/signal_strategy.py
+++ b/qlib/contrib/strategy/signal_strategy.py
@@ -435,7 +435,6 @@ class EnhancedIndexingStrategy(WeightStrategyBase):
         self._riskdata_cache = {}
 
     def get_risk_data(self, date):
-
         if date in self._riskdata_cache:
             return self._riskdata_cache[date]
 
@@ -462,7 +461,6 @@ class EnhancedIndexingStrategy(WeightStrategyBase):
         return self._riskdata_cache[date]
 
     def generate_target_weight_position(self, score, current, trade_start_time, trade_end_time):
-
         trade_date = trade_start_time
         pre_date = get_pre_trading_date(trade_date, future=True)  # previous trade date
 
diff --git a/qlib/contrib/tuner/config.py b/qlib/contrib/tuner/config.py
index 6e37f0097..7a8534a20 100644
--- a/qlib/contrib/tuner/config.py
+++ b/qlib/contrib/tuner/config.py
@@ -11,7 +11,6 @@ import os
 
 class TunerConfigManager:
     def __init__(self, config_path):
-
         if not config_path:
             raise ValueError("Config path is invalid.")
         self.config_path = config_path
@@ -58,7 +57,6 @@ class PipelineExperimentConfig:
 
 class OptimizationConfig:
     def __init__(self, config, TUNER_CONFIG_MANAGER):
-
         self.report_type = config.get("report_type", "pred_long")
         if self.report_type not in [
             "pred_long",
diff --git a/qlib/contrib/tuner/pipeline.py b/qlib/contrib/tuner/pipeline.py
index db48c46cf..34977fa55 100644
--- a/qlib/contrib/tuner/pipeline.py
+++ b/qlib/contrib/tuner/pipeline.py
@@ -15,11 +15,9 @@ from ...utils import get_module_by_module_path
 
 
 class Pipeline:
-
     GLOBAL_BEST_PARAMS_NAME = "global_best_params.json"
 
     def __init__(self, tuner_config_manager):
-
         self.logger = get_module_logger("Pipeline", sh_level=logging.INFO)
 
         self.tuner_config_manager = tuner_config_manager
@@ -37,7 +35,6 @@ class Pipeline:
         self.best_tuner_index = None
 
     def run(self):
-
         TimeInspector.set_time_mark()
         for tuner_index, tuner_config in enumerate(self.pipeline_config):
             tuner = self.init_tuner(tuner_index, tuner_config)
@@ -77,7 +74,6 @@ class Pipeline:
         return tuner_class(tuner_config, self.optim_config)
 
     def save_tuner_exp_info(self):
-
         TimeInspector.set_time_mark()
         save_path = os.path.join(self.pipeline_ex_config.tuner_ex_dir, Pipeline.GLOBAL_BEST_PARAMS_NAME)
         with open(save_path, "w") as fp:
diff --git a/qlib/contrib/tuner/tuner.py b/qlib/contrib/tuner/tuner.py
index c183b28ae..7705ce8b7 100644
--- a/qlib/contrib/tuner/tuner.py
+++ b/qlib/contrib/tuner/tuner.py
@@ -24,7 +24,6 @@ from hyperopt import STATUS_OK, STATUS_FAIL
 
 class Tuner:
     def __init__(self, tuner_config, optim_config):
-
         self.logger = get_module_logger("Tuner", sh_level=logging.INFO)
 
         self.tuner_config = tuner_config
@@ -42,7 +41,6 @@ class Tuner:
         self.space = self.setup_space()
 
     def tune(self):
-
         TimeInspector.set_time_mark()
         fmin(
             fn=self.objective,
@@ -84,7 +82,6 @@ class Tuner:
 
 
 class QLibTuner(Tuner):
-
     ESTIMATOR_CONFIG_NAME = "estimator_config.yaml"
     EXP_INFO_NAME = "exp_info.json"
     EXP_RESULT_DIR = "sacred/{}"
@@ -92,7 +89,6 @@ class QLibTuner(Tuner):
     LOCAL_BEST_PARAMS_NAME = "local_best_params.json"
 
     def objective(self, params):
-
         # 1. Setup an config for a specific estimator process
         estimator_path = self.setup_estimator_config(params)
         self.logger.info("Searching params: {} ".format(params))
@@ -120,7 +116,6 @@ class QLibTuner(Tuner):
         return {"loss": res, "status": status}
 
     def fetch_result(self):
-
         # 1. Get experiment information
         exp_info_path = os.path.join(self.ex_dir, QLibTuner.EXP_INFO_NAME)
         with open(exp_info_path) as fp:
@@ -155,7 +150,6 @@ class QLibTuner(Tuner):
             return np.abs(res.values[0] - 1)
 
     def setup_estimator_config(self, params):
-
         estimator_config = copy.deepcopy(self.tuner_config)
         estimator_config["model"].update({"args": params["model_space"]})
         estimator_config["strategy"].update({"args": params["strategy_space"]})
@@ -212,7 +206,6 @@ class QLibTuner(Tuner):
         return space
 
     def save_local_best_params(self):
-
         TimeInspector.set_time_mark()
         local_best_params_path = os.path.join(self.ex_dir, QLibTuner.LOCAL_BEST_PARAMS_NAME)
         with open(local_best_params_path, "w") as fp:
diff --git a/qlib/data/cache.py b/qlib/data/cache.py
index addd28871..3264dcd02 100644
--- a/qlib/data/cache.py
+++ b/qlib/data/cache.py
@@ -583,7 +583,6 @@ class DiskExpressionCache(ExpressionCache):
         r.tofile(str(cache_path))
 
     def update(self, sid, cache_uri, freq: str = "day"):
-
         cp_cache_uri = self.get_cache_dir(freq).joinpath(sid).joinpath(cache_uri)
         meta_path = cp_cache_uri.with_suffix(".meta")
         if not self.check_cache_exists(cp_cache_uri, suffix_list=[".meta"]):
@@ -696,7 +695,6 @@ class DiskDatasetCache(DatasetCache):
     def _dataset(
         self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=0, inst_processors=[]
     ):
-
         if disk_cache == 0:
             # In this case, data_set cache is configured but will not be used.
             return self.provider.dataset(
@@ -801,7 +799,6 @@ class DiskDatasetCache(DatasetCache):
         KEY = "df"
 
         def __init__(self, cache_path: Union[str, Path]):
-
             self.index_path = cache_path.with_suffix(".index")
             self._data = None
             self.logger = get_module_logger(self.__class__.__name__)
@@ -1126,7 +1123,6 @@ class DatasetURICache(DatasetCache):
     def dataset(
         self, instruments, fields, start_time=None, end_time=None, freq="day", disk_cache=0, inst_processors=[]
     ):
-
         if "local" in C.dataset_provider.lower():
             # use LocalDatasetProvider
             return self.provider.dataset(
@@ -1189,7 +1185,6 @@ class MemoryCalendarCache(CalendarCache):
         uri = self._uri(start_time, end_time, freq, future)
         result, expire = MemCacheExpire.get_cache(H["c"], uri)
         if result is None or expire:
-
             result = self.provider.calendar(start_time, end_time, freq, future)
             MemCacheExpire.set_cache(H["c"], uri, result)
 
diff --git a/qlib/data/data.py b/qlib/data/data.py
index 809b8d1c3..116827f23 100644
--- a/qlib/data/data.py
+++ b/qlib/data/data.py
@@ -1096,7 +1096,6 @@ class ClientDatasetProvider(DatasetProvider):
                 else:
                     return data
         else:
-
             """
             Call the server to generate the data-set cache, get the uri of the cache file.
             Then load the data from the file on NFS directly.
diff --git a/qlib/data/dataset/processor.py b/qlib/data/dataset/processor.py
index f7204cf78..63acd937e 100644
--- a/qlib/data/dataset/processor.py
+++ b/qlib/data/dataset/processor.py
@@ -132,7 +132,6 @@ class FilterCol(Processor):
         self.col_list = col_list
 
     def __call__(self, df):
-
         cols = get_group_columns(df, self.fields_group)
         all_cols = df.columns
         diff_cols = np.setdiff1d(all_cols.get_level_values(-1), cols.get_level_values(-1))
diff --git a/qlib/data/dataset/utils.py b/qlib/data/dataset/utils.py
index 4761fb383..f19dfe08f 100644
--- a/qlib/data/dataset/utils.py
+++ b/qlib/data/dataset/utils.py
@@ -71,15 +71,11 @@ def fetch_df_by_index(
     if fetch_orig:
         for slc in idx_slc:
             if slc != slice(None, None):
-                return df.loc[
-                    pd.IndexSlice[idx_slc],
-                ]
+                return df.loc[pd.IndexSlice[idx_slc],]  # noqa: E231
         else:  # pylint: disable=W0120
             return df
     else:
-        return df.loc[
-            pd.IndexSlice[idx_slc],
-        ]
+        return df.loc[pd.IndexSlice[idx_slc],]  # noqa: E231
 
 
 def fetch_df_by_col(df: pd.DataFrame, col_set: Union[str, List[str]]) -> pd.DataFrame:
diff --git a/qlib/data/pit.py b/qlib/data/pit.py
index 093b98cab..33d5e0c5c 100644
--- a/qlib/data/pit.py
+++ b/qlib/data/pit.py
@@ -22,7 +22,6 @@ from .data import Cal
 
 class P(ElemOperator):
     def _load_internal(self, instrument, start_index, end_index, freq):
-
         _calendar = Cal.calendar(freq=freq)
         resample_data = np.empty(end_index - start_index + 1, dtype="float32")
 
diff --git a/qlib/data/storage/file_storage.py b/qlib/data/storage/file_storage.py
index 288500c55..8a100a2d1 100644
--- a/qlib/data/storage/file_storage.py
+++ b/qlib/data/storage/file_storage.py
@@ -190,7 +190,6 @@ class FileCalendarStorage(FileStorageMixin, CalendarStorage):
 
 
 class FileInstrumentStorage(FileStorageMixin, InstrumentStorage):
-
     INSTRUMENT_SEP = "\t"
     INSTRUMENT_START_FIELD = "start_datetime"
     INSTRUMENT_END_FIELD = "end_datetime"
@@ -260,7 +259,6 @@ class FileInstrumentStorage(FileStorageMixin, InstrumentStorage):
         return self._read_instrument()[k]
 
     def update(self, *args, **kwargs) -> None:
-
         if len(args) > 1:
             raise TypeError(f"update expected at most 1 arguments, got {len(args)}")
         inst = self._read_instrument()
@@ -358,7 +356,6 @@ class FileFeatureStorage(FileStorageMixin, FeatureStorage):
         storage_end_index = self.end_index
         with self.uri.open("rb") as fp:
             if isinstance(i, int):
-
                 if storage_start_index > i:
                     raise IndexError(f"{i}: start index is {storage_start_index}")
                 fp.seek(4 * (i - storage_start_index) + 4)
diff --git a/qlib/log.py b/qlib/log.py
index 115abc137..f7683d511 100644
--- a/qlib/log.py
+++ b/qlib/log.py
@@ -84,7 +84,6 @@ get_module_logger = _QLibLoggerManager()
 
 
 class TimeInspector:
-
     timer_logger = get_module_logger("timer")
 
     time_marks = []
diff --git a/qlib/model/riskmodel/poet.py b/qlib/model/riskmodel/poet.py
index 8946b2ac5..42388d84c 100644
--- a/qlib/model/riskmodel/poet.py
+++ b/qlib/model/riskmodel/poet.py
@@ -43,7 +43,6 @@ class POETCovEstimator(RiskModel):
         self.thresh_method = thresh_method
 
     def _predict(self, X: np.ndarray) -> np.ndarray:
-
         Y = X.T  # NOTE: to match POET's implementation
         p, n = Y.shape
 
diff --git a/qlib/tests/__init__.py b/qlib/tests/__init__.py
index 52c924918..97ff00c57 100644
--- a/qlib/tests/__init__.py
+++ b/qlib/tests/__init__.py
@@ -14,7 +14,6 @@ from qlib.data.storage import CalendarStorage, InstrumentStorage, FeatureStorage
 
 
 class TestAutoData(unittest.TestCase):
-
     _setup_kwargs = {}
     provider_uri = "~/.qlib/qlib_data/cn_data_simple"  # target_dir
     provider_uri_1day = "~/.qlib/qlib_data/cn_data"  # target_dir
@@ -286,6 +285,5 @@ class TestMockData(unittest.TestCase):
 
     @classmethod
     def setUpClass(cls) -> None:
-
         provider_uri = "Not necessary."
         init(region=REG_TW, provider_uri=provider_uri, expression_cache=None, dataset_cache=None, **cls._setup_kwargs)
diff --git a/qlib/tests/data.py b/qlib/tests/data.py
index 2163b4bf7..f6bd78090 100644
--- a/qlib/tests/data.py
+++ b/qlib/tests/data.py
@@ -1,6 +1,7 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 
+import os
 import re
 import sys
 import qlib
@@ -11,13 +12,15 @@ import datetime
 from tqdm import tqdm
 from pathlib import Path
 from loguru import logger
+from cryptography.fernet import Fernet
 from qlib.utils import exists_qlib_data
 
 
 class GetData:
-    DATASET_VERSION = "v2"
     REMOTE_URL = "https://qlibpublic.blob.core.windows.net/data/default/stock_data"
-    QLIB_DATA_NAME = "{dataset_name}_{region}_{interval}_{qlib_version}.zip"
+    # "?" is not included in the token.
+    TOKEN = b"gAAAAABkmDhojHc0VSCDdNK1MqmRzNLeDFXe5hy8obHpa6SDQh4de6nW5gtzuD-fa6O_WZb0yyqYOL7ndOfJX_751W3xN5YB4-n-P22jK-t6ucoZqhT70KPD0Lf0_P328QPJVZ1gDnjIdjhi2YLOcP4BFTHLNYO0mvzszR8TKm9iT5AKRvuysWnpi8bbYwGU9zAcJK3x9EPL43hOGtxliFHcPNGMBoJW4g_ercdhi0-Qgv5_JLsV-29_MV-_AhuaYvJuN2dEywBy"
+    KEY = "EYcA8cgorA8X9OhyMwVfuFxn_1W3jGk6jCbs3L2oPoA="
 
     def __init__(self, delete_zip_file=False):
         """
@@ -29,24 +32,44 @@ class GetData:
         """
         self.delete_zip_file = delete_zip_file
 
-    def normalize_dataset_version(self, dataset_version: str = None):
-        if dataset_version is None:
-            dataset_version = self.DATASET_VERSION
-        return dataset_version
+    def merge_remote_url(self, file_name: str):
+        fernet = Fernet(self.KEY)
+        token = fernet.decrypt(self.TOKEN).decode()
+        return f"{self.REMOTE_URL}/{file_name}?{token}"
 
-    def merge_remote_url(self, file_name: str, dataset_version: str = None):
-        return f"{self.REMOTE_URL}/{self.normalize_dataset_version(dataset_version)}/{file_name}"
+    def download_data(self, file_name: str, target_dir: [Path, str], delete_old: bool = True):
+        """
+        Download the specified file to the target folder.
 
-    def _download_data(
-        self, file_name: str, target_dir: [Path, str], delete_old: bool = True, dataset_version: str = None
-    ):
+        Parameters
+        ----------
+        target_dir: str
+            data save directory
+        file_name: str
+            dataset name, needs to endwith .zip, value from [rl_data.zip, csv_data_cn.zip, ...]
+            may contain folder names, for example: v2/qlib_data_simple_cn_1d_latest.zip
+        delete_old: bool
+            delete an existing directory, by default True
+
+        Examples
+        ---------
+        # get rl data
+        python get_data.py download_data --file_name rl_data.zip --target_dir ~/.qlib/qlib_data/rl_data
+        When this command is run, the data will be downloaded from this link: https://qlibpublic.blob.core.windows.net/data/default/stock_data/rl_data.zip?{token}
+
+        # get cn csv data
+        python get_data.py download_data --file_name csv_data_cn.zip --target_dir ~/.qlib/csv_data/cn_data
+        When this command is run, the data will be downloaded from this link: https://qlibpublic.blob.core.windows.net/data/default/stock_data/csv_data_cn.zip?{token}
+        -------
+
+        """
         target_dir = Path(target_dir).expanduser()
         target_dir.mkdir(exist_ok=True, parents=True)
         # saved file name
-        _target_file_name = datetime.datetime.now().strftime("%Y%m%d%H%M%S") + "_" + file_name
+        _target_file_name = datetime.datetime.now().strftime("%Y%m%d%H%M%S") + "_" + os.path.basename(file_name)
         target_path = target_dir.joinpath(_target_file_name)
 
-        url = self.merge_remote_url(file_name, dataset_version)
+        url = self.merge_remote_url(file_name)
         resp = requests.get(url, stream=True, timeout=60)
         resp.raise_for_status()
         if resp.status_code != 200:
@@ -56,7 +79,7 @@ class GetData:
         logger.warning(
             f"The data for the example is collected from Yahoo Finance. Please be aware that the quality of the data might not be perfect. (You can refer to the original data source: https://finance.yahoo.com/lookup.)"
         )
-        logger.info(f"{file_name} downloading......")
+        logger.info(f"{os.path.basename(file_name)} downloading......")
         with tqdm(total=int(resp.headers.get("Content-Length", 0))) as p_bar:
             with target_path.open("wb") as fp:
                 for chunk in resp.iter_content(chunk_size=chunk_size):
@@ -67,8 +90,8 @@ class GetData:
         if self.delete_zip_file:
             target_path.unlink()
 
-    def check_dataset(self, file_name: str, dataset_version: str = None):
-        url = self.merge_remote_url(file_name, dataset_version)
+    def check_dataset(self, file_name: str):
+        url = self.merge_remote_url(file_name)
         resp = requests.get(url, stream=True, timeout=60)
         status = True
         if resp.status_code == 404:
@@ -140,9 +163,11 @@ class GetData:
         ---------
         # get 1d data
         python get_data.py qlib_data --name qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn
+        When this command is run, the data will be downloaded from this link: https://qlibpublic.blob.core.windows.net/data/default/stock_data/v2/qlib_data_cn_1d_latest.zip?{token}
 
         # get 1min data
         python get_data.py qlib_data --name qlib_data --target_dir ~/.qlib/qlib_data/cn_data_1min --interval 1min --region cn
+        When this command is run, the data will be downloaded from this link: https://qlibpublic.blob.core.windows.net/data/default/stock_data/v2/qlib_data_cn_1min_latest.zip?{token}
         -------
 
         """
@@ -155,29 +180,12 @@ class GetData:
 
         qlib_version = ".".join(re.findall(r"(\d+)\.+", qlib.__version__))
 
-        def _get_file_name(v):
-            return self.QLIB_DATA_NAME.format(
-                dataset_name=name, region=region.lower(), interval=interval.lower(), qlib_version=v
-            )
+        def _get_file_name_with_version(qlib_version, dataset_version):
+            dataset_version = "v2" if dataset_version is None else dataset_version
+            file_name_with_version = f"{dataset_version}/{name}_{region.lower()}_{interval.lower()}_{qlib_version}.zip"
+            return file_name_with_version
 
-        file_name = _get_file_name(qlib_version)
-        if not self.check_dataset(file_name, version):
-            file_name = _get_file_name("latest")
-        self._download_data(file_name.lower(), target_dir, delete_old, dataset_version=version)
-
-    def csv_data_cn(self, target_dir="~/.qlib/csv_data/cn_data"):
-        """download cn csv data from remote
-
-        Parameters
-        ----------
-        target_dir: str
-            data save directory
-
-        Examples
-        ---------
-        python get_data.py csv_data_cn --target_dir ~/.qlib/csv_data/cn_data
-        -------
-
-        """
-        file_name = "csv_data_cn.zip"
-        self._download_data(file_name, target_dir)
+        file_name = _get_file_name_with_version(qlib_version, dataset_version=version)
+        if not self.check_dataset(file_name):
+            file_name = _get_file_name_with_version("latest", dataset_version=version)
+        self.download_data(file_name.lower(), target_dir, delete_old)
diff --git a/qlib/utils/__init__.py b/qlib/utils/__init__.py
index 910a4c08b..9e63c104a 100644
--- a/qlib/utils/__init__.py
+++ b/qlib/utils/__init__.py
@@ -7,12 +7,9 @@ from __future__ import division
 from __future__ import print_function
 
 import os
-import pickle
 import re
-import sys
 import copy
 import json
-from qlib.typehint import InstConf
 import yaml
 import redis
 import bisect
@@ -22,15 +19,11 @@ import inspect
 import hashlib
 import datetime
 import requests
-import importlib
-import contextlib
 import collections
 import numpy as np
 import pandas as pd
 from pathlib import Path
-from typing import List, Dict, Union, Tuple, Any, Optional, Callable
-from types import ModuleType
-from urllib.parse import urlparse
+from typing import List, Union, Optional, Callable
 from packaging import version
 from .file import get_or_create_path, save_multiple_parts_file, unpack_archive_with_buffer, get_tmp_file_with_buffer
 from ..config import C
@@ -288,182 +281,6 @@ def parse_field(field):
     return field
 
 
-def get_module_by_module_path(module_path: Union[str, ModuleType]):
-    """Load module path
-
-    :param module_path:
-    :return:
-    :raises: ModuleNotFoundError
-    """
-    if module_path is None:
-        raise ModuleNotFoundError("None is passed in as parameters as module_path")
-
-    if isinstance(module_path, ModuleType):
-        module = module_path
-    else:
-        if module_path.endswith(".py"):
-            module_name = re.sub("^[^a-zA-Z_]+", "", re.sub("[^0-9a-zA-Z_]", "", module_path[:-3].replace("/", "_")))
-            module_spec = importlib.util.spec_from_file_location(module_name, module_path)
-            module = importlib.util.module_from_spec(module_spec)
-            sys.modules[module_name] = module
-            module_spec.loader.exec_module(module)
-        else:
-            module = importlib.import_module(module_path)
-    return module
-
-
-def split_module_path(module_path: str) -> Tuple[str, str]:
-    """
-
-    Parameters
-    ----------
-    module_path : str
-        e.g. "a.b.c.ClassName"
-
-    Returns
-    -------
-    Tuple[str, str]
-        e.g. ("a.b.c", "ClassName")
-    """
-    *m_path, cls = module_path.split(".")
-    m_path = ".".join(m_path)
-    return m_path, cls
-
-
-def get_callable_kwargs(config: InstConf, default_module: Union[str, ModuleType] = None) -> (type, dict):
-    """
-    extract class/func and kwargs from config info
-
-    Parameters
-    ----------
-    config : [dict, str]
-        similar to config
-        please refer to the doc of init_instance_by_config
-
-    default_module : Python module or str
-        It should be a python module to load the class type
-        This function will load class from the config['module_path'] first.
-        If config['module_path'] doesn't exists, it will load the class from default_module.
-
-    Returns
-    -------
-    (type, dict):
-        the class/func object and it's arguments.
-
-    Raises
-    ------
-        ModuleNotFoundError
-    """
-    if isinstance(config, dict):
-        key = "class" if "class" in config else "func"
-        if isinstance(config[key], str):
-            # 1) get module and class
-            # - case 1): "a.b.c.ClassName"
-            # - case 2): {"class": "ClassName", "module_path": "a.b.c"}
-            m_path, cls = split_module_path(config[key])
-            if m_path == "":
-                m_path = config.get("module_path", default_module)
-            module = get_module_by_module_path(m_path)
-
-            # 2) get callable
-            _callable = getattr(module, cls)  # may raise AttributeError
-        else:
-            _callable = config[key]  # the class type itself is passed in
-        kwargs = config.get("kwargs", {})
-    elif isinstance(config, str):
-        # a.b.c.ClassName
-        m_path, cls = split_module_path(config)
-        module = get_module_by_module_path(default_module if m_path == "" else m_path)
-
-        _callable = getattr(module, cls)
-        kwargs = {}
-    else:
-        raise NotImplementedError(f"This type of input is not supported")
-    return _callable, kwargs
-
-
-get_cls_kwargs = get_callable_kwargs  # NOTE: this is for compatibility for the previous version
-
-
-def init_instance_by_config(
-    config: InstConf,
-    default_module=None,
-    accept_types: Union[type, Tuple[type]] = (),
-    try_kwargs: Dict = {},
-    **kwargs,
-) -> Any:
-    """
-    get initialized instance with config
-
-    Parameters
-    ----------
-    config : InstConf
-
-    default_module : Python module
-        Optional. It should be a python module.
-        NOTE: the "module_path" will be override by `module` arguments
-
-        This function will load class from the config['module_path'] first.
-        If config['module_path'] doesn't exists, it will load the class from default_module.
-
-    accept_types: Union[type, Tuple[type]]
-        Optional. If the config is a instance of specific type, return the config directly.
-        This will be passed into the second parameter of isinstance.
-
-    try_kwargs: Dict
-        Try to pass in kwargs in `try_kwargs` when initialized the instance
-        If error occurred, it will fail back to initialization without try_kwargs.
-
-    Returns
-    -------
-    object:
-        An initialized object based on the config info
-    """
-    if isinstance(config, accept_types):
-        return config
-
-    if isinstance(config, (str, Path)):
-        if isinstance(config, str):
-            # path like 'file:///<path to pickle file>/obj.pkl'
-            pr = urlparse(config)
-            if pr.scheme == "file":
-                pr_path = os.path.join(pr.netloc, pr.path) if bool(pr.path) else pr.netloc
-                with open(os.path.normpath(pr_path), "rb") as f:
-                    return pickle.load(f)
-        else:
-            with config.open("rb") as f:
-                return pickle.load(f)
-
-    klass, cls_kwargs = get_callable_kwargs(config, default_module=default_module)
-
-    try:
-        return klass(**cls_kwargs, **try_kwargs, **kwargs)
-    except (TypeError,):
-        # TypeError for handling errors like
-        # 1: `XXX() got multiple values for keyword argument 'YYY'`
-        # 2: `XXX() got an unexpected keyword argument 'YYY'
-        return klass(**cls_kwargs, **kwargs)
-
-
-@contextlib.contextmanager
-def class_casting(obj: object, cls: type):
-    """
-    Python doesn't provide the downcasting mechanism.
-    We use the trick here to downcast the class
-
-    Parameters
-    ----------
-    obj : object
-        the object to be cast
-    cls : type
-        the target class type
-    """
-    orig_cls = obj.__class__
-    obj.__class__ = cls
-    yield
-    obj.__class__ = orig_cls
-
-
 def compare_dict_value(src_data: dict, dst_data: dict):
     """Compare dict value
 
@@ -744,7 +561,6 @@ def exists_qlib_data(qlib_dir):
             return False
     # check calendar bin
     for _calendar in calendars_dir.iterdir():
-
         if ("_future" not in _calendar.name) and (
             not list(features_dir.rglob(f"*.{_calendar.name.split('.')[0]}.bin"))
         ):
@@ -872,9 +688,9 @@ def get_item_from_obj(config: dict, name_path: str) -> object:
     cur_cfg = config
     for k in name_path.split("."):
         if isinstance(cur_cfg, dict):
-            cur_cfg = cur_cfg[k]
+            cur_cfg = cur_cfg[k]  # may raise KeyError
         elif k.isdigit():
-            cur_cfg = cur_cfg[int(k)]
+            cur_cfg = cur_cfg[int(k)]  # may raise IndexError
         else:
             raise ValueError(f"Error when getting {k} from cur_cfg")
     return cur_cfg
@@ -910,6 +726,21 @@ def fill_placeholder(config: dict, config_extend: dict):
     top = 0
     tail = 1
     item_queue = [config]
+
+    def try_replace_placeholder(value):
+        if value in config_extend.keys():
+            value = config_extend[value]
+        else:
+            m = re.match(r"<(?P<name_path>[^<>]+)>", value)
+            if m is not None:
+                try:
+                    value = get_item_from_obj(config, m.groupdict()["name_path"])
+                except (KeyError, ValueError, IndexError):
+                    get_module_logger("fill_placeholder").info(
+                        f"{value} lookes like a placeholder, but it can't match to any given values"
+                    )
+        return value
+
     while top < tail:
         now_item = item_queue[top]
         top += 1
@@ -917,17 +748,13 @@ def fill_placeholder(config: dict, config_extend: dict):
             item_keys = range(len(now_item))
         elif isinstance(now_item, dict):
             item_keys = now_item.keys()
-        for key in item_keys:
+        for key in item_keys:  # noqa
             if isinstance(now_item[key], (list, dict)):
                 item_queue.append(now_item[key])
                 tail += 1
             elif isinstance(now_item[key], str):
-                if now_item[key] in config_extend.keys():
-                    now_item[key] = config_extend[now_item[key]]
-                else:
-                    m = re.match(r"<(?P<name_path>[^<>]+)>", now_item[key])
-                    if m is not None:
-                        now_item[key] = get_item_from_obj(config, m.groupdict()["name_path"])
+                # If it is a string, try to replace it with placeholder
+                now_item[key] = try_replace_placeholder(now_item[key])
     return config
 
 
@@ -1049,6 +876,15 @@ def fname_to_code(fname: str):
     return fname
 
 
+from .mod import (
+    get_module_by_module_path,
+    split_module_path,
+    get_callable_kwargs,
+    get_cls_kwargs,
+    init_instance_by_config,
+    class_casting,
+)
+
 __all__ = [
     "get_or_create_path",
     "save_multiple_parts_file",
@@ -1056,4 +892,10 @@ __all__ = [
     "get_tmp_file_with_buffer",
     "set_log_with_config",
     "init_instance_by_config",
+    "get_module_by_module_path",
+    "split_module_path",
+    "get_callable_kwargs",
+    "get_cls_kwargs",
+    "init_instance_by_config",
+    "class_casting",
 ]
diff --git a/qlib/utils/index_data.py b/qlib/utils/index_data.py
index b62bc02ce..113f9802d 100644
--- a/qlib/utils/index_data.py
+++ b/qlib/utils/index_data.py
@@ -351,7 +351,6 @@ class IndexData(metaclass=index_data_ops_creator):
     loc_idx_cls = LocIndexer
 
     def __init__(self, data: np.ndarray, *indices: Union[List, pd.Index, Index]):
-
         self.data = data
         self.indices = indices
 
diff --git a/qlib/utils/mod.py b/qlib/utils/mod.py
new file mode 100644
index 000000000..e53957260
--- /dev/null
+++ b/qlib/utils/mod.py
@@ -0,0 +1,235 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+"""
+All module related class, e.g. :
+- importing a module, class
+- walkiing a module
+- operations on class or module...
+"""
+
+import contextlib
+import importlib
+import os
+from pathlib import Path
+import pickle
+import pkgutil
+import re
+import sys
+from types import ModuleType
+from typing import Any, Dict, List, Tuple, Union
+from urllib.parse import urlparse
+
+from qlib.typehint import InstConf
+
+
+def get_module_by_module_path(module_path: Union[str, ModuleType]):
+    """Load module path
+
+    :param module_path:
+    :return:
+    :raises: ModuleNotFoundError
+    """
+    if module_path is None:
+        raise ModuleNotFoundError("None is passed in as parameters as module_path")
+
+    if isinstance(module_path, ModuleType):
+        module = module_path
+    else:
+        if module_path.endswith(".py"):
+            module_name = re.sub("^[^a-zA-Z_]+", "", re.sub("[^0-9a-zA-Z_]", "", module_path[:-3].replace("/", "_")))
+            module_spec = importlib.util.spec_from_file_location(module_name, module_path)
+            module = importlib.util.module_from_spec(module_spec)
+            sys.modules[module_name] = module
+            module_spec.loader.exec_module(module)
+        else:
+            module = importlib.import_module(module_path)
+    return module
+
+
+def split_module_path(module_path: str) -> Tuple[str, str]:
+    """
+
+    Parameters
+    ----------
+    module_path : str
+        e.g. "a.b.c.ClassName"
+
+    Returns
+    -------
+    Tuple[str, str]
+        e.g. ("a.b.c", "ClassName")
+    """
+    *m_path, cls = module_path.split(".")
+    m_path = ".".join(m_path)
+    return m_path, cls
+
+
+def get_callable_kwargs(config: InstConf, default_module: Union[str, ModuleType] = None) -> (type, dict):
+    """
+    extract class/func and kwargs from config info
+
+    Parameters
+    ----------
+    config : [dict, str]
+        similar to config
+        please refer to the doc of init_instance_by_config
+
+    default_module : Python module or str
+        It should be a python module to load the class type
+        This function will load class from the config['module_path'] first.
+        If config['module_path'] doesn't exists, it will load the class from default_module.
+
+    Returns
+    -------
+    (type, dict):
+        the class/func object and it's arguments.
+
+    Raises
+    ------
+        ModuleNotFoundError
+    """
+    if isinstance(config, dict):
+        key = "class" if "class" in config else "func"
+        if isinstance(config[key], str):
+            # 1) get module and class
+            # - case 1): "a.b.c.ClassName"
+            # - case 2): {"class": "ClassName", "module_path": "a.b.c"}
+            m_path, cls = split_module_path(config[key])
+            if m_path == "":
+                m_path = config.get("module_path", default_module)
+            module = get_module_by_module_path(m_path)
+
+            # 2) get callable
+            _callable = getattr(module, cls)  # may raise AttributeError
+        else:
+            _callable = config[key]  # the class type itself is passed in
+        kwargs = config.get("kwargs", {})
+    elif isinstance(config, str):
+        # a.b.c.ClassName
+        m_path, cls = split_module_path(config)
+        module = get_module_by_module_path(default_module if m_path == "" else m_path)
+
+        _callable = getattr(module, cls)
+        kwargs = {}
+    else:
+        raise NotImplementedError(f"This type of input is not supported")
+    return _callable, kwargs
+
+
+get_cls_kwargs = get_callable_kwargs  # NOTE: this is for compatibility for the previous version
+
+
+def init_instance_by_config(
+    config: InstConf,
+    default_module=None,
+    accept_types: Union[type, Tuple[type]] = (),
+    try_kwargs: Dict = {},
+    **kwargs,
+) -> Any:
+    """
+    get initialized instance with config
+
+    Parameters
+    ----------
+    config : InstConf
+
+    default_module : Python module
+        Optional. It should be a python module.
+        NOTE: the "module_path" will be override by `module` arguments
+
+        This function will load class from the config['module_path'] first.
+        If config['module_path'] doesn't exists, it will load the class from default_module.
+
+    accept_types: Union[type, Tuple[type]]
+        Optional. If the config is a instance of specific type, return the config directly.
+        This will be passed into the second parameter of isinstance.
+
+    try_kwargs: Dict
+        Try to pass in kwargs in `try_kwargs` when initialized the instance
+        If error occurred, it will fail back to initialization without try_kwargs.
+
+    Returns
+    -------
+    object:
+        An initialized object based on the config info
+    """
+    if isinstance(config, accept_types):
+        return config
+
+    if isinstance(config, (str, Path)):
+        if isinstance(config, str):
+            # path like 'file:///<path to pickle file>/obj.pkl'
+            pr = urlparse(config)
+            if pr.scheme == "file":
+                pr_path = os.path.join(pr.netloc, pr.path) if bool(pr.path) else pr.netloc
+                with open(os.path.normpath(pr_path), "rb") as f:
+                    return pickle.load(f)
+        else:
+            with config.open("rb") as f:
+                return pickle.load(f)
+
+    klass, cls_kwargs = get_callable_kwargs(config, default_module=default_module)
+
+    try:
+        return klass(**cls_kwargs, **try_kwargs, **kwargs)
+    except (TypeError,):
+        # TypeError for handling errors like
+        # 1: `XXX() got multiple values for keyword argument 'YYY'`
+        # 2: `XXX() got an unexpected keyword argument 'YYY'
+        return klass(**cls_kwargs, **kwargs)
+
+
+@contextlib.contextmanager
+def class_casting(obj: object, cls: type):
+    """
+    Python doesn't provide the downcasting mechanism.
+    We use the trick here to downcast the class
+
+    Parameters
+    ----------
+    obj : object
+        the object to be cast
+    cls : type
+        the target class type
+    """
+    orig_cls = obj.__class__
+    obj.__class__ = cls
+    yield
+    obj.__class__ = orig_cls
+
+
+def find_all_classes(module_path: Union[str, ModuleType], cls: type) -> List[type]:
+    """
+    Find all the classes recursively that inherit from `cls` in a given module.
+    - `cls` itself is also included
+
+        >>> from qlib.data.dataset.handler import DataHandler
+        >>> find_all_classes("qlib.contrib.data.handler", DataHandler)
+        [<class 'qlib.contrib.data.handler.Alpha158'>, <class 'qlib.contrib.data.handler.Alpha158vwap'>, <class 'qlib.contrib.data.handler.Alpha360'>, <class 'qlib.contrib.data.handler.Alpha360vwap'>, <class 'qlib.data.dataset.handler.DataHandlerLP'>]
+
+    TODO:
+    - skip import error
+
+    """
+    if isinstance(module_path, ModuleType):
+        mod = module_path
+    else:
+        mod = importlib.import_module(module_path)
+
+    cls_list = []
+
+    def _append_cls(obj):
+        # Leverage the closure trick to reuse code
+        if isinstance(obj, type) and issubclass(obj, cls) and cls not in cls_list:
+            cls_list.append(obj)
+
+    for attr in dir(mod):
+        _append_cls(getattr(mod, attr))
+
+    if hasattr(mod, "__path__"):
+        # if the model is a package
+        for _, modname, _ in pkgutil.iter_modules(mod.__path__):
+            sub_mod = importlib.import_module(f"{mod.__package__}.{modname}")
+            for m_cls in find_all_classes(sub_mod, cls):
+                _append_cls(m_cls)
+    return cls_list
diff --git a/qlib/workflow/record_temp.py b/qlib/workflow/record_temp.py
index 552848395..4f0413274 100644
--- a/qlib/workflow/record_temp.py
+++ b/qlib/workflow/record_temp.py
@@ -136,7 +136,6 @@ class RecordTemp:
             whether the records are stored properly.
         """
         if include_self:
-
             # Some mlflow backend will not list the directly recursively.
             # So we force to the directly
             artifacts = {}
diff --git a/qlib/workflow/task/gen.py b/qlib/workflow/task/gen.py
index 77bd2cbc1..bd98e501d 100644
--- a/qlib/workflow/task/gen.py
+++ b/qlib/workflow/task/gen.py
@@ -339,7 +339,6 @@ class MultiHorizonGenBase(TaskGen):
     def generate(self, task: dict):
         res = []
         for hr in self.horizon:
-
             # Add horizon
             t = copy.deepcopy(task)
             self.set_horizon(t, hr)
diff --git a/qlib/workflow/task/utils.py b/qlib/workflow/task/utils.py
index a914ea54f..19837b3c7 100644
--- a/qlib/workflow/task/utils.py
+++ b/qlib/workflow/task/utils.py
@@ -1,23 +1,25 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
-
 """
 Some tools for task management.
 """
 
 import bisect
+from copy import deepcopy
 import pandas as pd
 from qlib.data import D
+from qlib.utils import hash_args
+from qlib.utils.mod import init_instance_by_config
 from qlib.workflow import R
 from qlib.config import C
 from qlib.log import get_module_logger
 from pymongo import MongoClient
 from pymongo.database import Database
 from typing import Union
+from pathlib import Path
 
 
 def get_mongodb() -> Database:
-
     """
     Get database in MongoDB, which means you need to declare the address and the name of a database at first.
 
@@ -276,3 +278,31 @@ class TimeAdjuster:
             return self.get(start_idx), self.get(end_idx)
         else:
             raise NotImplementedError(f"This type of input is not supported")
+
+
+def replace_task_handler_with_cache(task: dict, cache_dir: Union[str, Path] = ".") -> dict:
+    """
+    Replace the handler in task with a cache handler.
+    It will automatically cache the file and save it in cache_dir.
+
+    >>> import qlib
+    >>> qlib.auto_init()
+    >>> import datetime
+    >>> # it is simplified task
+    >>> task = {"dataset": {"kwargs":{'handler': {'class': 'Alpha158', 'module_path': 'qlib.contrib.data.handler', 'kwargs': {'start_time': datetime.date(2008, 1, 1), 'end_time': datetime.date(2020, 8, 1), 'fit_start_time': datetime.date(2008, 1, 1), 'fit_end_time': datetime.date(2014, 12, 31), 'instruments': 'CSI300'}}}}}
+    >>> new_task = replace_task_handler_with_cache(task)
+    >>> print(new_task)
+    {'dataset': {'kwargs': {'handler': 'file...Alpha158.3584f5f8b4.pkl'}}}
+
+    """
+    cache_dir = Path(cache_dir)
+    task = deepcopy(task)
+    handler = task["dataset"]["kwargs"]["handler"]
+    if isinstance(handler, dict):
+        hash = hash_args(handler)
+        h_path = cache_dir / f"{handler['class']}.{hash[:10]}.pkl"
+        if not h_path.exists():
+            h = init_instance_by_config(handler)
+            h.to_pickle(h_path, dump_all=True)
+        task["dataset"]["kwargs"]["handler"] = f"file://{h_path}"
+    return task
diff --git a/scripts/check_dump_bin.py b/scripts/check_dump_bin.py
index ef8023219..7ae8a26ab 100644
--- a/scripts/check_dump_bin.py
+++ b/scripts/check_dump_bin.py
@@ -15,7 +15,6 @@ from loguru import logger
 
 
 class CheckBin:
-
     NOT_IN_FEATURES = "not in features"
     COMPARE_FALSE = "compare False"
     COMPARE_TRUE = "compare True"
diff --git a/scripts/data_collector/base.py b/scripts/data_collector/base.py
index e3cf1fcac..386bb1b2c 100644
--- a/scripts/data_collector/base.py
+++ b/scripts/data_collector/base.py
@@ -18,7 +18,6 @@ from qlib.utils import code_to_fname
 
 
 class BaseCollector(abc.ABC):
-
     CACHE_FLAG = "CACHED"
     NORMAL_FLAG = "NORMAL"
 
@@ -185,7 +184,6 @@ class BaseCollector(abc.ABC):
             return self.NORMAL_FLAG
 
     def _collector(self, instrument_list):
-
         error_symbol = []
         res = Parallel(n_jobs=self.max_workers)(
             delayed(self._simple_collector)(_inst) for _inst in tqdm(instrument_list)
diff --git a/scripts/data_collector/br_index/collector.py b/scripts/data_collector/br_index/collector.py
index 0dc12eff6..7d32170f0 100644
--- a/scripts/data_collector/br_index/collector.py
+++ b/scripts/data_collector/br_index/collector.py
@@ -21,7 +21,6 @@ quarter_dict = {"1Q": "01-03", "2Q": "05-01", "3Q": "09-01"}
 
 
 class IBOVIndex(IndexBase):
-
     ibov_index_composition = "https://raw.githubusercontent.com/igor17400/IBOV-HCI/main/historic_composition/{}.csv"
     years_4_month_periods = []
 
diff --git a/scripts/data_collector/us_index/collector.py b/scripts/data_collector/us_index/collector.py
index 97cbce825..cb0c3fc95 100644
--- a/scripts/data_collector/us_index/collector.py
+++ b/scripts/data_collector/us_index/collector.py
@@ -143,7 +143,6 @@ class WIKIIndex(IndexBase):
 
 
 class NASDAQ100Index(WIKIIndex):
-
     HISTORY_COMPANIES_URL = (
         "https://indexes.nasdaqomx.com/Index/WeightingData?id=NDX&tradeDate={trade_date}T00%3A00%3A00.000&timeOfDay=SOD"
     )
diff --git a/scripts/dump_pit.py b/scripts/dump_pit.py
index cda872c09..c328eb67a 100644
--- a/scripts/dump_pit.py
+++ b/scripts/dump_pit.py
@@ -237,7 +237,6 @@ class DumpPitData:
                     pass
 
             with open(data_file, "rb+") as fd, open(index_file, "rb+") as fi:
-
                 # update index if needed
                 for i, row in df_sub.iterrows():
                     # get index
diff --git a/setup.py b/setup.py
index bf533cfe4..86d11dd61 100644
--- a/setup.py
+++ b/setup.py
@@ -80,6 +80,7 @@ REQUIRED = [
     "gym",
     # Installing the latest version of protobuf for python versions below 3.8 will cause unit tests to fail.
     "protobuf<=3.20.1;python_version<='3.8'",
+    "cryptography",
 ]
 
 # Numpy include
diff --git a/tests/backtest/test_high_freq_trading.py b/tests/backtest/test_high_freq_trading.py
index fd934914d..a538464db 100644
--- a/tests/backtest/test_high_freq_trading.py
+++ b/tests/backtest/test_high_freq_trading.py
@@ -27,7 +27,6 @@ class TestHFBacktest(TestAutoData):
         return pd.DataFrame(orders, columns=headers)
 
     def test_trading(self):
-
         # date = "2020-02-03"
         # inst = "SH600068"
         # pos = 2.0167
diff --git a/tests/data_mid_layer_tests/test_handler_storage.py b/tests/data_mid_layer_tests/test_handler_storage.py
index 0d8ad4d57..a8bb730f7 100644
--- a/tests/data_mid_layer_tests/test_handler_storage.py
+++ b/tests/data_mid_layer_tests/test_handler_storage.py
@@ -21,7 +21,6 @@ class TestHandler(DataHandlerLP):
         fit_end_time=None,
         drop_raw=True,
     ):
-
         infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
         learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
 
@@ -51,7 +50,6 @@ class TestHandler(DataHandlerLP):
 
 
 class TestHandlerStorage(TestAutoData):
-
     market = "all"
 
     start_time = "2010-01-01"
@@ -82,7 +80,6 @@ class TestHandlerStorage(TestAutoData):
         )
 
         with TimeInspector.logt("random fetch with DataFrame Storage"):
-
             # single stock
             for i in range(100):
                 random_index = np.random.randint(len(instruments), size=1)[0]
@@ -96,7 +93,6 @@ class TestHandlerStorage(TestAutoData):
                 data_handler.fetch(selector=(fetch_stocks, slice(fetch_start_time, fetch_end_time)), level=None)
 
         with TimeInspector.logt("random fetch with HashingStock Storage"):
-
             # single stock
             for i in range(100):
                 random_index = np.random.randint(len(instruments), size=1)[0]
diff --git a/tests/misc/test_sepdf.py b/tests/misc/test_sepdf.py
index 9fdc0bb2d..76bd0e6bd 100644
--- a/tests/misc/test_sepdf.py
+++ b/tests/misc/test_sepdf.py
@@ -11,7 +11,6 @@ class SepDF(unittest.TestCase):
         return "".join(str(obj).split())
 
     def test_index_data(self):
-
         np.random.seed(42)
 
         index = [
diff --git a/tests/rolling_tests/test_update_pred.py b/tests/rolling_tests/test_update_pred.py
index 324611948..b3ca2e036 100644
--- a/tests/rolling_tests/test_update_pred.py
+++ b/tests/rolling_tests/test_update_pred.py
@@ -77,7 +77,6 @@ class TestRolling(TestAutoData):
 
     @pytest.mark.slow
     def test_update_label(self):
-
         task = copy.deepcopy(CSI300_GBDT_TASK)
 
         task["record"] = {
diff --git a/tests/storage_tests/test_storage.py b/tests/storage_tests/test_storage.py
index 50b16a041..92fed34ec 100644
--- a/tests/storage_tests/test_storage.py
+++ b/tests/storage_tests/test_storage.py
@@ -22,7 +22,6 @@ QLIB_DIR.mkdir(exist_ok=True, parents=True)
 
 class TestStorage(TestAutoData):
     def test_calendar_storage(self):
-
         calendar = CalendarStorage(freq="day", future=False, provider_uri=self.provider_uri)
         assert isinstance(calendar[:], Iterable), f"{calendar.__class__.__name__}.__getitem__(s: slice) is not Iterable"
         assert isinstance(calendar.data, Iterable), f"{calendar.__class__.__name__}.data is not Iterable"
diff --git a/tests/test_dump_data.py b/tests/test_dump_data.py
index dfa7f8556..33cae4e80 100644
--- a/tests/test_dump_data.py
+++ b/tests/test_dump_data.py
@@ -35,7 +35,7 @@ class TestDumpData(unittest.TestCase):
 
     @classmethod
     def setUpClass(cls) -> None:
-        GetData().csv_data_cn(SOURCE_DIR)
+        GetData().download_data(file_name="csv_data_cn.zip", target_dir=SOURCE_DIR)
         TestDumpData.DUMP_DATA = DumpDataAll(csv_path=SOURCE_DIR, qlib_dir=QLIB_DIR, include_fields=cls.FIELDS)
         TestDumpData.STOCK_NAMES = list(map(lambda x: x.name[:-4].upper(), SOURCE_DIR.glob("*.csv")))
         provider_uri = str(QLIB_DIR.resolve())
diff --git a/tests/test_get_data.py b/tests/test_get_data.py
index 93a852f55..125b9203e 100644
--- a/tests/test_get_data.py
+++ b/tests/test_get_data.py
@@ -33,7 +33,6 @@ class TestGetData(unittest.TestCase):
         shutil.rmtree(str(DATA_DIR.resolve()))
 
     def test_0_qlib_data(self):
-
         GetData().qlib_data(
             name="qlib_data_simple", target_dir=QLIB_DIR, region="cn", interval="1d", delete_old=False, exists_skip=True
         )
@@ -42,7 +41,7 @@ class TestGetData(unittest.TestCase):
         self.assertFalse(df.dropna().empty, "get qlib data failed")
 
     def test_1_csv_data(self):
-        GetData().csv_data_cn(SOURCE_DIR)
+        GetData().download_data(file_name="csv_data_cn.zip", target_dir=SOURCE_DIR)
         stock_name = set(map(lambda x: x.name[:-4].upper(), SOURCE_DIR.glob("*.csv")))
         self.assertEqual(len(stock_name), 85, "get csv data failed")