Merge branch 'main' of https://github.com/you-n-g/qlib into main

2026-07-21 11:17:34 +08:00 · 2020-11-25 19:41:25 +08:00
parent 5c25f97e64 a99db6a1dc
commit 2c403943b2
29 changed files with 1604 additions and 92 deletions
--- a/README.md
+++ b/README.md
@@ -196,10 +196,12 @@ Here is a list of models built on `Qlib`.
 - [MLP based on pytorch](qlib/contrib/model/pytorch_nn.py)
 - [GRU based on pytorch](qlib/contrib/model/pytorch_gru.py)
 - [LSTM based on pytorcn](qlib/contrib/model/pytorch_lstm.py)
+- [ALSTM based on pytorcn](qlib/contrib/model/pytorch_alstm.py)
 - [GATs based on pytorch](qlib/contrib/model/pytorch_gats.py)
 - [TabNet based on pytorch](qlib/contrib/model/tabnet.py)
 - [SFM based on pytorch](qlib/contrib/model/pytorch_sfm.py)
-<!-- - [TFT based on tensorflow](examples/benchmarks/TFT/tft.py) -->
+- [HATs based on pytorch](qlib/contrib/model/pytorch_hats.py)
+- [TFT based on tensorflow](examples/benchmarks/TFT/tft.py)

 Your PR of new Quant models is highly welcomed.

--- a/examples/benchmarks/ALSTM/requirements.txt
+++ b/examples/benchmarks/ALSTM/requirements.txt
@@ -0,0 +1,4 @@
+numpy==1.17.4
+pandas==1.1.2
+scikit_learn==0.23.2
+torch==1.7.0
--- a/examples/benchmarks/ALSTM/workflow_config_alstm.yaml
+++ b/examples/benchmarks/ALSTM/workflow_config_alstm.yaml
@@ -0,0 +1,69 @@
+provider_uri: "~/.qlib/qlib_data/cn_data"
+region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: ALSTM
+        module_path: qlib.contrib.model.pytorch_alstm
+        kwargs:
+            d_feat: 6
+            hidden_size: 64
+            num_layers: 2
+            dropout: 0.0
+            n_epochs: 200
+            lr: 1e-3
+            early_stop: 20
+            batch_size: 800
+            metric: IC
+            loss: mse
+            seed: 0
+            GPU: 0
+            rnn_type: GRU
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: ALPHA360_Denoise
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/CatBoost/README.md
+++ b/examples/benchmarks/CatBoost/README.md
@@ -0,0 +1,3 @@
+# CatBoost
+* Code: [https://github.com/catboost/catboost](https://github.com/catboost/catboost)
+* Paper: CatBoost: unbiased boosting with categorical features. [https://proceedings.neurips.cc/paper/2018/file/14491b756b3a51daac41c24863285549-Paper.pdf](https://proceedings.neurips.cc/paper/2018/file/14491b756b3a51daac41c24863285549-Paper.pdf).
--- a/examples/benchmarks/DNN/workflow_config_dnn.yaml
+++ b/examples/benchmarks/DNN/workflow_config_dnn.yaml
@@ -30,7 +30,7 @@ task:
        module_path: qlib.contrib.model.pytorch_nn
        kwargs:
            loss: mse
-            input_dim: 360
+            input_dim: 158
            output_dim: 1
            lr: 0.002
            lr_decay: 0.96
--- a/examples/benchmarks/GATs/workflow_config_gats.yaml
+++ b/examples/benchmarks/GATs/workflow_config_gats.yaml
@@ -37,9 +37,10 @@ task:
            lr: 1e-3
            early_stop: 20
            batch_size: 800
-            metric: IC
+            metric: loss
            loss: mse
-            base_model: GRU
+            base_model: LSTM
+            with_pretrain: True
            seed: 0
            GPU: 0
    dataset:
--- a/examples/benchmarks/GRU/model_gru_csi300.pkl
+++ b/examples/benchmarks/GRU/model_gru_csi300.pkl
--- a/examples/benchmarks/HATS/requirements.txt
+++ b/examples/benchmarks/HATS/requirements.txt
@@ -0,0 +1,4 @@
+pandas==1.1.2
+numpy==1.17.4
+scikit_learn==0.23.2
+torch==1.7.0
--- a/examples/benchmarks/HATS/worflow_config_hats.yaml
+++ b/examples/benchmarks/HATS/worflow_config_hats.yaml
@@ -0,0 +1,64 @@
+provider_uri: "~/.qlib/qlib_data/cn_data"
+region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: HATS
+        module_path: qlib.contrib.model.pytorch_gats
+        kwargs:
+            d_feat: 6
+            hidden_size: 64
+            num_layers: 2
+            dropout: 0.6
+            n_epochs: 200
+            lr: 1e-3
+            early_stop: 20
+            batch_size: 800
+            metric: IC
+            loss: mse
+            base_model: GRU
+            seed: 0
+            GPU: 0
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: ALPHA360_Denoise
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/LSTM/model_lstm_csi300.pkl
+++ b/examples/benchmarks/LSTM/model_lstm_csi300.pkl
--- a/examples/benchmarks/LightGBM/README.md
+++ b/examples/benchmarks/LightGBM/README.md
@@ -0,0 +1,4 @@
+# LightGBM
+* Code: [https://github.com/microsoft/LightGBM](https://github.com/microsoft/LightGBM)
+* Paper: LightGBM: A Highly Efficient Gradient Boosting
+Decision Tree. [https://proceedings.neurips.cc/paper/2017/file/6449f44a102fde848669bdd9eb6b76fa-Paper.pdf](https://proceedings.neurips.cc/paper/2017/file/6449f44a102fde848669bdd9eb6b76fa-Paper.pdf).
--- a/examples/benchmarks/SFM/README.md
+++ b/examples/benchmarks/SFM/README.md
@@ -0,0 +1,4 @@
+# State-Frequency-Memory
+- State Frequency Memory (SFM) is a novel recurrent network that uses Discrete Fourier Transform (DFT) to decompose the hidden states of memory cells and capture the multi-frequency trading patterns from past market data to make stock price predictions. 
+- The code used in Qlib is a pyTorch implementation of SFM (Zhang, L., Aggarwal, C., & Qi, G. J. (2017,)).
+- Paper: Stock Price Prediction via Discovering Multi-Frequency Trading Patterns. https://www.cs.ucf.edu/~gqi/publications/kdd2017_stock.pdf.
--- a/examples/benchmarks/TabNet/README.md
+++ b/examples/benchmarks/TabNet/README.md
@@ -0,0 +1,4 @@
+# TabNet
+* TabNet is a novel high-performance and interpretable canonical deep tabular data learning architectur. TabNet uses sequential attention to choose which features to reason from at each decision step, enabling interpretability and more effcient learning as the learning capacity is used for the most salient features.
+* The code used in Qlib is a pyTorch implementation of Tabnet (Arik, S. O., & Pfister, T. (2019). [https://github.com/dreamquark-ai/tabnet](https://github.com/dreamquark-ai/tabnet)
+* Paper: TabNet: Attentive Interpretable Tabular Learning. [https://arxiv.org/pdf/1908.07442.pdf](https://arxiv.org/pdf/1908.07442.pdf).
--- a/examples/benchmarks/XGBoost/README.md
+++ b/examples/benchmarks/XGBoost/README.md
@@ -0,0 +1,3 @@
+# XGBoost
+* Code: [https://github.com/dmlc/xgboost](https://github.com/dmlc/xgboost)
+* Paper: XGBoost: A Scalable Tree Boosting System. [https://dl.acm.org/doi/pdf/10.1145/2939672.2939785](https://dl.acm.org/doi/pdf/10.1145/2939672.2939785).
--- a/examples/workflow_by_code_alstm.py
+++ b/examples/workflow_by_code_alstm.py
@@ -0,0 +1,145 @@
+#  Copyright (c) Microsoft Corporation.
+#  Licensed under the MIT License.
+
+import sys
+from pathlib import Path
+
+import qlib
+import pandas as pd
+from qlib.config import REG_CN
+from qlib.contrib.model.pytorch_alstm import ALSTM
+from qlib.contrib.data.handler import ALPHA360_Denoise
+from qlib.contrib.strategy.strategy import TopkDropoutStrategy
+from qlib.contrib.evaluate import (
+    backtest as normal_backtest,
+    risk_analysis,
+)
+from qlib.utils import exists_qlib_data
+
+# from qlib.model.learner import train_model
+from qlib.utils import init_instance_by_config
+
+import pickle
+
+if __name__ == "__main__":
+
+    # use default data
+    provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
+    if not exists_qlib_data(provider_uri):
+        print(f"Qlib data is not found in {provider_uri}")
+        sys.path.append(str(Path(__file__).resolve().parent.parent.joinpath("scripts")))
+        from get_data import GetData
+
+        GetData().qlib_data(target_dir=provider_uri, region=REG_CN)
+
+    qlib.init(provider_uri=provider_uri, region=REG_CN)
+
+    MARKET = "csi300"
+    BENCHMARK = "SH000300"
+
+    ###################################
+    # train model
+    ###################################
+    DATA_HANDLER_CONFIG = {
+        "start_time": "2008-01-01",
+        "end_time": "2020-08-01",
+        "fit_start_time": "2008-01-01",
+        "fit_end_time": "2014-12-31",
+        "instruments": MARKET,
+    }
+
+    TRAINER_CONFIG = {
+        "train_start_time": "2008-01-01",
+        "train_end_time": "2014-12-31",
+        "validate_start_time": "2015-01-01",
+        "validate_end_time": "2016-12-31",
+        "test_start_time": "2017-01-01",
+        "test_end_time": "2020-08-01",
+    }
+
+    task = {
+        "model": {
+            "class": "ALSTM",
+            "module_path": "qlib.contrib.model.pytorch_alstm",
+            "kwargs": {
+                "d_feat": 6,
+                "hidden_size": 64,
+                "num_layers": 2,
+                "dropout": 0.0,
+                "n_epochs": 200,
+                "lr": 1e-3,
+                "early_stop": 20,
+                "batch_size": 800,
+                "metric": "IC",
+                "loss": "mse",
+                "seed": 0,
+                "GPU": 0,
+                "rnn_type": "GRU",
+            },
+        },
+        "dataset": {
+            "class": "DatasetH",
+            "module_path": "qlib.data.dataset",
+            "kwargs": {
+                "handler": {
+                    "class": "ALPHA360_Denoise",
+                    "module_path": "qlib.contrib.data.handler",
+                    "kwargs": DATA_HANDLER_CONFIG,
+                },
+                "segments": {
+                    "train": ("2008-01-01", "2014-12-31"),
+                    "valid": ("2015-01-01", "2016-12-31"),
+                    "test": ("2017-01-01", "2020-08-01"),
+                },
+            },
+        }
+        # You shoud record the data in specific sequence
+        # "record": ['SignalRecord', 'SigAnaRecord', 'PortAnaRecord'],
+    }
+
+    # model = train_model(task)
+    model = init_instance_by_config(task["model"])
+    dataset = init_instance_by_config(task["dataset"])
+    model.fit(dataset)
+
+    pred_score = model.predict(dataset)
+
+    # save pred_score to file
+    pred_score_path = Path("~/tmp/qlib/pred_score.pkl").expanduser()
+    pred_score_path.parent.mkdir(exist_ok=True, parents=True)
+    pred_score.to_pickle(pred_score_path)
+
+    ###################################
+    # backtest
+    ###################################
+    STRATEGY_CONFIG = {
+        "topk": 50,
+        "n_drop": 5,
+    }
+    BACKTEST_CONFIG = {
+        "verbose": False,
+        "limit_threshold": 0.095,
+        "account": 100000000,
+        "benchmark": BENCHMARK,
+        "deal_price": "close",
+        "open_cost": 0.0005,
+        "close_cost": 0.0015,
+        "min_cost": 5,
+    }
+
+    # use default strategy
+    # custom Strategy, refer to: TODO: Strategy API url
+    strategy = TopkDropoutStrategy(**STRATEGY_CONFIG)
+    report_normal, positions_normal = normal_backtest(pred_score, strategy=strategy, **BACKTEST_CONFIG)
+
+    ###################################
+    # analyze
+    # If need a more detailed analysis, refer to: examples/train_and_bakctest.ipynb
+    ###################################
+    analysis = dict()
+    analysis["excess_return_without_cost"] = risk_analysis(report_normal["return"] - report_normal["bench"])
+    analysis["excess_return_with_cost"] = risk_analysis(
+        report_normal["return"] - report_normal["bench"] - report_normal["cost"]
+    )
+    analysis_df = pd.concat(analysis)  # type: pd.DataFrame
+    print(analysis_df)
--- a/examples/workflow_by_code_gats.py
+++ b/examples/workflow_by_code_gats.py
@@ -70,9 +70,10 @@ if __name__ == "__main__":
                "lr": 1e-3,
                "early_stop": 20,
                "batch_size": 800,
-                "metric": "IC",
+                "metric": "loss",
                "loss": "mse",
-                "base_model": "GRU",
+                "base_model": "LSTM",
+                "with_pretrain": True,
                "seed": 0,
                "GPU": 0,
            },
--- a/examples/workflow_by_code_hats.py
+++ b/examples/workflow_by_code_hats.py
@@ -0,0 +1,145 @@
+#  Copyright (c) Microsoft Corporation.
+#  Licensed under the MIT License.
+
+import sys
+from pathlib import Path
+
+import qlib
+import pandas as pd
+from qlib.config import REG_CN
+from qlib.contrib.model.pytorch_hats import HATS
+from qlib.contrib.data.handler import ALPHA360_Denoise
+from qlib.contrib.strategy.strategy import TopkDropoutStrategy
+from qlib.contrib.evaluate import (
+    backtest as normal_backtest,
+    risk_analysis,
+)
+from qlib.utils import exists_qlib_data
+
+# from qlib.model.learner import train_model
+from qlib.utils import init_instance_by_config
+
+import pickle
+
+if __name__ == "__main__":
+
+    # use default data
+    provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
+    if not exists_qlib_data(provider_uri):
+        print(f"Qlib data is not found in {provider_uri}")
+        sys.path.append(str(Path(__file__).resolve().parent.parent.joinpath("scripts")))
+        from get_data import GetData
+
+        GetData().qlib_data_cn(target_dir=provider_uri)
+
+    qlib.init(provider_uri=provider_uri, region=REG_CN)
+
+    MARKET = "csi300"
+    BENCHMARK = "SH000300"
+
+    ###################################
+    # train model
+    ###################################
+    DATA_HANDLER_CONFIG = {
+        "start_time": "2008-01-01",
+        "end_time": "2020-08-01",
+        "fit_start_time": "2008-01-01",
+        "fit_end_time": "2014-12-31",
+        "instruments": MARKET,
+    }
+
+    TRAINER_CONFIG = {
+        "train_start_time": "2008-01-01",
+        "train_end_time": "2014-12-31",
+        "validate_start_time": "2015-01-01",
+        "validate_end_time": "2016-12-31",
+        "test_start_time": "2017-01-01",
+        "test_end_time": "2020-08-01",
+    }
+
+    task = {
+        "model": {
+            "class": "HATS",
+            "module_path": "qlib.contrib.model.pytorch_hats",
+            "kwargs": {
+                "d_feat": 6,
+                "hidden_size": 64,
+                "num_layers": 2,
+                "dropout": 0.6,
+                "n_epochs": 200,
+                "lr": 1e-3,
+                "early_stop": 20,
+                "batch_size": 800,
+                "metric": "IC",
+                "loss": "mse",
+                "base_model": "LSTM",
+                "seed": 0,
+                "GPU": 0,
+            },
+        },
+        "dataset": {
+            "class": "DatasetH",
+            "module_path": "qlib.data.dataset",
+            "kwargs": {
+                "handler": {
+                    "class": "ALPHA360_Denoise",
+                    "module_path": "qlib.contrib.data.handler",
+                    "kwargs": DATA_HANDLER_CONFIG,
+                },
+                "segments": {
+                    "train": ("2008-01-01", "2014-12-31"),
+                    "valid": ("2015-01-01", "2016-12-31"),
+                    "test": ("2017-01-01", "2020-08-01"),
+                },
+            },
+        }
+        # You shoud record the data in specific sequence
+        # "record": ['SignalRecord', 'SigAnaRecord', 'PortAnaRecord'],
+    }
+
+    # model = train_model(task)
+    model = init_instance_by_config(task["model"])
+    dataset = init_instance_by_config(task["dataset"])
+    model.fit(dataset, save_path="benchmarks/HATS/model_hat.pkl")
+
+    pred_score = model.predict(dataset)
+
+    # save pred_score to file
+    pred_score_path = Path("~/tmp/qlib/pred_score.pkl").expanduser()
+    pred_score_path.parent.mkdir(exist_ok=True, parents=True)
+    pred_score.to_pickle(pred_score_path)
+
+    ###################################
+    # backtest
+    ###################################
+    STRATEGY_CONFIG = {
+        "topk": 50,
+        "n_drop": 5,
+    }
+    BACKTEST_CONFIG = {
+        "verbose": False,
+        "limit_threshold": 0.095,
+        "account": 100000000,
+        "benchmark": BENCHMARK,
+        "deal_price": "close",
+        "open_cost": 0.0005,
+        "close_cost": 0.0015,
+        "min_cost": 5,
+    }
+
+    # use default strategy
+    # custom Strategy, refer to: TODO: Strategy API url
+    strategy = TopkDropoutStrategy(**STRATEGY_CONFIG)
+    report_normal, positions_normal = normal_backtest(pred_score, strategy=strategy, **BACKTEST_CONFIG)
+
+    ###################################
+    # analyze
+    # If need a more detailed analysis, refer to: examples/train_and_bakctest.ipynb
+    ###################################
+    analysis = dict()
+    analysis["excess_return_without_cost"] = risk_analysis(report_normal["return"] - report_normal["bench"])
+    analysis["excess_return_with_cost"] = risk_analysis(
+        report_normal["return"] - report_normal["bench"] - report_normal["cost"]
+    )
+    analysis_df = pd.concat(analysis)  # type: pd.DataFrame
+    print(analysis_df)
--- a/qlib/contrib/evaluate.py
+++ b/qlib/contrib/evaluate.py
@@ -190,7 +190,8 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **k
    Parameters
    ----------

-    # backtest workflow related or commmon arguments
+    - **backtest workflow related or commmon arguments**
+
    pred : pandas.DataFrame
        predict should has <datetime, instrument> index and one `score` column
    account : float
@@ -202,7 +203,8 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **k
    verbose : bool
        whether to print log

-    # strategy related arguments
+    - **strategy related arguments**
+
    strategy : Strategy()
        strategy used in backtest
    topk : int (Default value: 50)
@@ -225,7 +227,8 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **k
    str_type: 'amount', 'weight' or 'dropout'
        strategy type: TopkAmountStrategy ,TopkWeightStrategy or TopkDropoutStrategy

-    # exchange related arguments
+    - **exchange related arguments**
+
    exchange: Exchange()
        pass the exchange for speeding up.
    subscribe_fields: list
--- a/qlib/contrib/model/catboost_model.py
+++ b/qlib/contrib/model/catboost_model.py
@@ -1,3 +1,15 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import numpy as np
 import pandas as pd
 from catboost import Pool, CatBoost
--- a/qlib/contrib/model/pytorch_alstm.py
+++ b/qlib/contrib/model/pytorch_alstm.py
@@ -0,0 +1,394 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+
+from __future__ import division
+from __future__ import print_function
+
+import os
+import numpy as np
+import pandas as pd
+import copy
+from sklearn.metrics import roc_auc_score, mean_squared_error
+import logging
+from ...utils import unpack_archive_with_buffer, save_multiple_parts_file, create_save_path, drop_nan_by_y_index
+from ...log import get_module_logger, TimeInspector
+
+import torch
+import torch.nn as nn
+import torch.optim as optim
+
+from ...model.base import Model
+from ...data.dataset import DatasetH
+from ...data.dataset.handler import DataHandlerLP
+
+
+class ALSTM(Model):
+    """ALSTM Model
+
+    Parameters
+    ----------
+    input_dim : int
+        input dimension
+    output_dim : int
+        output dimension
+    layers : tuple
+        layer sizes
+    lr : float
+        learning rate
+    optimizer : str
+        optimizer name
+    GPU : str
+        the GPU ID(s) used for training
+    """
+
+    def __init__(
+        self,
+        d_feat=6,
+        hidden_size=64,
+        num_layers=2,
+        dropout=0.0,
+        n_epochs=200,
+        lr=0.001,
+        metric="IC",
+        batch_size=2000,
+        early_stop=20,
+        loss="mse",
+        optimizer="adam",
+        GPU="0",
+        seed=0,
+        rnn_type="GRU",
+        **kwargs
+    ):
+        # Set logger.
+        self.logger = get_module_logger("ALSTM")
+        self.logger.info("ALSTM pytorch version...")
+
+        # set hyper-parameters.
+        self.d_feat = d_feat
+        self.hidden_size = hidden_size
+        self.num_layers = num_layers
+        self.dropout = dropout
+        self.n_epochs = n_epochs
+        self.lr = lr
+        self.metric = metric
+        self.batch_size = batch_size
+        self.early_stop = early_stop
+        self.optimizer = optimizer.lower()
+        self.loss = loss
+        self.visible_GPU = GPU
+        self.use_gpu = torch.cuda.is_available()
+        self.seed = seed
+        self.rnn_type = rnn_type
+
+        self.logger.info(
+            "ALSTM parameters setting:"
+            "\nd_feat : {}"
+            "\nhidden_size : {}"
+            "\nnum_layers : {}"
+            "\ndropout : {}"
+            "\nn_epochs : {}"
+            "\nlr : {}"
+            "\nmetric : {}"
+            "\nbatch_size : {}"
+            "\nearly_stop : {}"
+            "\noptimizer : {}"
+            "\nloss_type : {}"
+            "\nvisible_GPU : {}"
+            "\nuse_GPU : {}"
+            "\nseed : {}"
+            "\nrnn_type : {}".format(
+                d_feat,
+                hidden_size,
+                num_layers,
+                dropout,
+                n_epochs,
+                lr,
+                metric,
+                batch_size,
+                early_stop,
+                optimizer.lower(),
+                loss,
+                GPU,
+                self.use_gpu,
+                seed,
+                self.rnn_type,
+            )
+        )
+
+        if loss not in {"mse", "binary"}:
+            raise NotImplementedError("loss {} is not supported!".format(loss))
+        self._scorer = mean_squared_error if loss == "mse" else roc_auc_score
+
+        self.alstm_model = ALSTMModel(
+            d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout
+        )
+        # def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, input_day=20, rnn_type="GRU"):
+
+        if optimizer.lower() == "adam":
+            self.train_optimizer = optim.Adam(self.alstm_model.parameters(), lr=self.lr)
+        elif optimizer.lower() == "gd":
+            self.train_optimizer = optim.SGD(self.alstm_model.parameters(), lr=self.lr)
+        else:
+            raise NotImplementedError("optimizer {} is not supported!".format(optimizer))
+
+        self._fitted = False
+        if self.use_gpu:
+            self.alstm_model.cuda()
+            # set the visible GPU
+            if self.visible_GPU:
+                os.environ["CUDA_VISIBLE_DEVICES"] = self.visible_GPU
+
+    def mse(self, pred, label):
+        loss = (pred - label) ** 2
+        return torch.mean(loss)
+
+    def loss_fn(self, pred, label):
+        mask = ~torch.isnan(label)
+
+        if self.loss == "mse":
+            return self.mse(pred[mask], label[mask])
+
+        raise ValueError("unknown loss `%s`" % self.loss)
+
+    def metric_fn(self, pred, label):
+
+        mask = torch.isfinite(label)
+        if self.metric == "IC":
+            return self.cal_ic(pred[mask], label[mask])
+
+        if self.metric == "" or self.metric == "loss":  # use loss
+            return -self.loss_fn(pred[mask], label[mask])
+
+        raise ValueError("unknown metric `%s`" % self.metric)
+
+    def cal_ic(self, pred, label):
+        return torch.mean(pred * label)
+
+    def train_epoch(self, x_train, y_train):
+
+        x_train_values = x_train.values
+        y_train_values = np.squeeze(y_train.values) * 100
+
+        self.alstm_model.train()
+
+        indices = np.arange(len(x_train_values))
+        np.random.shuffle(indices)
+
+        for i in range(len(indices))[:: self.batch_size]:
+
+            if len(indices) - i < self.batch_size:
+                break
+
+            feature = torch.from_numpy(x_train_values[indices[i : i + self.batch_size]]).float()
+            label = torch.from_numpy(y_train_values[indices[i : i + self.batch_size]]).float()
+
+            if self.use_gpu:
+                feature = feature.cuda()
+                label = label.cuda()
+
+            pred = self.alstm_model(feature)
+            loss = self.loss_fn(pred, label)
+
+            self.train_optimizer.zero_grad()
+            loss.backward()
+            torch.nn.utils.clip_grad_value_(self.alstm_model.parameters(), 3.0)
+            self.train_optimizer.step()
+
+    def test_epoch(self, data_x, data_y):
+
+        # prepare training data
+        x_values = data_x.values
+        y_values = np.squeeze(data_y.values)
+
+        self.alstm_model.eval()
+
+        scores = []
+        losses = []
+
+        indices = np.arange(len(x_values))
+        np.random.shuffle(indices)
+
+        for i in range(len(indices))[:: self.batch_size]:
+
+            if len(indices) - i < self.batch_size:
+                break
+
+            feature = torch.from_numpy(x_values[indices[i : i + self.batch_size]]).float()
+            label = torch.from_numpy(y_values[indices[i : i + self.batch_size]]).float()
+
+            if self.use_gpu:
+                feature = feature.cuda()
+                label = label.cuda()
+
+            pred = self.alstm_model(feature)
+            loss = self.loss_fn(pred, label)
+            losses.append(loss.item())
+
+            score = self.metric_fn(pred, label)
+            scores.append(score.item())
+
+        return np.mean(losses), np.mean(scores)
+
+    def fit(
+        self,
+        dataset: DatasetH,
+        evals_result=dict(),
+        verbose=True,
+        save_path=None,
+    ):
+
+        df_train, df_valid, df_test = dataset.prepare(
+            ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
+        )
+
+        x_train, y_train = df_train["feature"], df_train["label"]
+        x_valid, y_valid = df_valid["feature"], df_valid["label"]
+
+        if save_path == None:
+            save_path = create_save_path(save_path)
+        stop_steps = 0
+        train_loss = 0
+        best_score = -np.inf
+        best_epoch = 0
+        evals_result["train"] = []
+        evals_result["valid"] = []
+
+        # train
+        self.logger.info("training...")
+        self._fitted = True
+        # return
+
+        for step in range(self.n_epochs):
+            self.logger.info("Epoch%d:", step)
+            self.logger.info("training...")
+            self.train_epoch(x_train, y_train)
+            self.logger.info("evaluating...")
+            train_loss, train_score = self.test_epoch(x_train, y_train)
+            val_loss, val_score = self.test_epoch(x_valid, y_valid)
+            self.logger.info("train %.6f, valid %.6f" % (train_score, val_score))
+            evals_result["train"].append(train_score)
+            evals_result["valid"].append(val_score)
+
+            if val_score > best_score:
+                best_score = val_score
+                stop_steps = 0
+                best_epoch = step
+                best_param = copy.deepcopy(self.alstm_model.state_dict())
+            else:
+                stop_steps += 1
+                if stop_steps >= self.early_stop:
+                    self.logger.info("early stop")
+                    break
+
+        self.logger.info("best score: %.6lf @ %d" % (best_score, best_epoch))
+        self.alstm_model.load_state_dict(best_param)
+        torch.save(best_param, save_path)
+
+        if self.use_gpu:
+            torch.cuda.empty_cache()
+
+    def predict(self, dataset):
+        if not self._fitted:
+            raise ValueError("model is not fitted yet!")
+
+        x_test = dataset.prepare("test", col_set="feature")
+        index = x_test.index
+        self.alstm_model.eval()
+        x_values = x_test.values
+        sample_num = x_values.shape[0]
+        preds = []
+
+        for begin in range(sample_num)[:: self.batch_size]:
+
+            if sample_num - begin < self.batch_size:
+                end = sample_num
+            else:
+                end = begin + self.batch_size
+
+            x_batch = torch.from_numpy(x_values[begin:end]).float()
+
+            if self.use_gpu:
+                x_batch = x_batch.cuda()
+
+            with torch.no_grad():
+                if self.use_gpu:
+                    pred = self.alstm_model(x_batch).detach().cpu().numpy()
+                else:
+                    pred = self.alstm_model(x_batch).detach().numpy()
+
+            preds.append(pred)
+
+        return pd.Series(np.concatenate(preds), index=index)
+
+
+class GRUModel(nn.Module):
+    def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0):
+        super().__init__()
+
+        self.rnn = nn.GRU(
+            input_size=d_feat,
+            hidden_size=hidden_size,
+            num_layers=num_layers,
+            batch_first=True,
+            dropout=dropout,
+        )
+        self.fc_out = nn.Linear(hidden_size, 1)
+
+        self.d_feat = d_feat
+
+    def forward(self, x):
+        # x: [N, F*T]
+        x = x.reshape(len(x), self.d_feat, -1)  # [N, F, T]
+        x = x.permute(0, 2, 1)  # [N, T, F]
+        out, _ = self.rnn(x)
+        return self.fc_out(out[:, -1, :]).squeeze()
+
+
+class ALSTMModel(nn.Module):
+    def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, rnn_type="GRU"):
+        super().__init__()
+        self.hid_size = hidden_size
+        self.input_size = d_feat
+        self.dropout = dropout
+        self.rnn_type = rnn_type
+        self.rnn_layer = num_layers
+        self._build_model()
+
+    def _build_model(self):
+        try:
+            klass = getattr(nn, self.rnn_type.upper())
+        except:
+            raise ValueError("unknown rnn_type `%s`" % self.rnn_type)
+        self.net = nn.Sequential()
+        self.net.add_module("fc_in", nn.Linear(in_features=self.input_size, out_features=self.hid_size))
+        self.net.add_module("act", nn.Tanh())
+        self.rnn = klass(
+            input_size=self.hid_size,
+            hidden_size=self.hid_size,
+            num_layers=self.rnn_layer,
+            batch_first=True,
+            dropout=self.dropout,
+        )
+        self.fc_out = nn.Linear(in_features=self.hid_size * 2, out_features=1)
+        # self.fc_out = nn.Linear(in_features=self.hid_size, out_features=1)
+        self.att_net = nn.Sequential()
+        self.att_net.add_module("att_fc_in", nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)))
+        self.att_net.add_module("att_dropout", torch.nn.Dropout(self.dropout))
+        self.att_net.add_module("att_act", nn.Tanh())
+        self.att_net.add_module("att_fc_out", nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False))
+        self.att_net.add_module("att_softmax", nn.Softmax(dim=1))
+
+    def forward(self, inputs):
+        # inputs: [batch_size, input_size*input_day]
+        inputs = inputs.view(len(inputs), self.input_size, -1)
+        inputs = inputs.permute(0, 2, 1)  # [batch, input_size, seq_len] -> [batch, seq_len, input_size]
+        rnn_out, _ = self.rnn(self.net(inputs))  # [batch, seq_len, num_directions * hidden_size]
+        attention_score = self.att_net(rnn_out)  # [batch, seq_len, 1]
+        out_att = torch.mul(rnn_out, attention_score)
+        out_att = torch.sum(out_att, dim=1)
+        out = self.fc_out(
+            torch.cat((rnn_out[:, -1, :], out_att), dim=1)
+        )  # [batch, seq_len, num_directions * hidden_size] -> [batch, 1]
+        # out = self.fc_out(rnn_out[:, -1, :] + out_att)
+        return out[..., 0]
--- a/qlib/contrib/model/pytorch_gats.py
+++ b/qlib/contrib/model/pytorch_gats.py
@@ -55,6 +55,7 @@ class GAT(Model):
        early_stop=20,
        loss="mse",
        base_model="GRU",
+        with_pretrain=True,
        optimizer="adam",
        GPU="0",
        seed=0,
@@ -77,6 +78,7 @@ class GAT(Model):
        self.optimizer = optimizer.lower()
        self.loss = loss
        self.base_model = base_model
+        self.with_pretrain = with_pretrain
        self.visible_GPU = GPU
        self.use_gpu = torch.cuda.is_available()
        self.seed = seed
@@ -95,6 +97,7 @@ class GAT(Model):
            "\noptimizer : {}"
            "\nloss_type : {}"
            "\nbase_model : {}"
+            "\nwith_pretrain : {}"
            "\nvisible_GPU : {}"
            "\nuse_GPU : {}"
            "\nseed : {}".format(
@@ -110,6 +113,7 @@ class GAT(Model):
                optimizer.lower(),
                loss,
                base_model,
+                with_pretrain,
                GPU,
                self.use_gpu,
                seed,
@@ -256,6 +260,25 @@ class GAT(Model):
        evals_result["train"] = []
        evals_result["valid"] = []

+        # load pretrained base_model
+        if self.with_pretrain:
+            self.logger.info("Loading pretrained model...")
+            if self.base_model == "LSTM":
+                from ...contrib.model.pytorch_lstm import LSTMModel
+
+                pretrained_model = LSTMModel()
+                pretrained_model.load_state_dict(torch.load("benchmarks/LSTM/model_lstm_csi300.pkl"))
+            elif self.base_model == "GRU":
+                from ...contrib.model.pytorch_gru import GRUModel
+
+                pretrained_model = GRUModel()
+                pretrained_model.load_state_dict(torch.load("benchmarks/GRU/model_gru_csi300.pkl"))
+            model_dict = self.GAT_model.state_dict()
+            pretrained_dict = {k: v for k, v in pretrained_model.state_dict().items() if k in model_dict}
+            model_dict.update(pretrained_dict)
+            self.GAT_model.load_state_dict(model_dict)
+            self.logger.info("Loading pretrained model Done...")
+
        # train
        self.logger.info("training...")
        self._fitted = True
--- a/qlib/contrib/model/pytorch_hats.py
+++ b/qlib/contrib/model/pytorch_hats.py
@@ -0,0 +1,504 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+
+from __future__ import division
+from __future__ import print_function
+
+import os
+import numpy as np
+import pandas as pd
+import copy
+from sklearn.metrics import roc_auc_score, mean_squared_error
+import logging
+from ...utils import unpack_archive_with_buffer, save_multiple_parts_file, create_save_path, drop_nan_by_y_index
+from ...log import get_module_logger, TimeInspector
+
+import torch
+import torch.nn as nn
+import torch.optim as optim
+
+from ...model.base import Model
+from ...data.dataset import DatasetH
+from ...data.dataset.handler import DataHandlerLP
+
+
+class HATS(Model):
+    """HATS Model
+
+    Parameters
+    ----------
+    input_dim : int
+        input dimension
+    output_dim : int
+        output dimension
+    layers : tuple
+        layer sizes
+    lr : float
+        learning rate
+    optimizer : str
+        optimizer name
+    GPU : str
+        the GPU ID(s) used for training
+    """
+
+    def __init__(
+        self,
+        d_feat=6,
+        hidden_size=64,
+        num_layers=2,
+        dropout=0.5,
+        n_epochs=200,
+        lr=0.01,
+        metric="IC",
+        batch_size=800,
+        early_stop=20,
+        loss="mse",
+        base_model="GRU",
+        with_pretrain=True,
+        optimizer="adam",
+        GPU="0",
+        seed=0,
+        **kwargs
+    ):
+        # Set logger.
+        self.logger = get_module_logger("HATS")
+        self.logger.info("HATS pytorch version...")
+
+        # set hyper-parameters.
+        self.d_feat = d_feat
+        self.hidden_size = hidden_size
+        self.num_layers = num_layers
+        self.dropout = dropout
+        self.n_epochs = n_epochs
+        self.lr = lr
+        self.metric = metric
+        self.batch_size = batch_size
+        self.early_stop = early_stop
+        self.optimizer = optimizer.lower()
+        self.loss = loss
+        self.base_model = base_model
+        self.with_pretrain = with_pretrain  #### True if train HATS with pretrained base model
+        self.visible_GPU = GPU
+        self.use_gpu = torch.cuda.is_available()
+        self.seed = seed
+
+        self.logger.info(
+            "HATS parameters setting:"
+            "\nd_feat : {}"
+            "\nhidden_size : {}"
+            "\nnum_layers : {}"
+            "\ndropout : {}"
+            "\nn_epochs : {}"
+            "\nlr : {}"
+            "\nmetric : {}"
+            "\nbatch_size : {}"
+            "\nearly_stop : {}"
+            "\noptimizer : {}"
+            "\nloss_type : {}"
+            "\nbase_model : {}"
+            "\nwith_pretrain : {}"  ##### debug
+            "\nvisible_GPU : {}"
+            "\nuse_GPU : {}"
+            "\nseed : {}".format(
+                d_feat,
+                hidden_size,
+                num_layers,
+                dropout,
+                n_epochs,
+                lr,
+                metric,
+                batch_size,
+                early_stop,
+                optimizer.lower(),
+                loss,
+                base_model,
+                with_pretrain,  ### debug
+                GPU,
+                self.use_gpu,
+                seed,
+            )
+        )
+
+        if loss not in {"mse", "binary"}:
+            raise NotImplementedError("loss {} is not supported!".format(loss))
+        self._scorer = mean_squared_error if loss == "mse" else roc_auc_score
+
+        self.HATS_model = HATSModel(
+            d_feat=self.d_feat,
+            hidden_size=self.hidden_size,
+            num_layers=self.num_layers,
+            dropout=self.dropout,
+            base_model=self.base_model,
+        )
+        if optimizer.lower() == "adam":
+            self.train_optimizer = optim.Adam(self.HATS_model.parameters(), lr=self.lr)
+        elif optimizer.lower() == "gd":
+            self.train_optimizer = optim.SGD(self.HATS_model.parameters(), lr=self.lr)
+        else:
+            raise NotImplementedError("optimizer {} is not supported!".format(optimizer))
+
+        self._fitted = False
+        if self.use_gpu:
+            self.HATS_model.cuda()
+            # set the visible GPU
+            if self.visible_GPU:
+                os.environ["CUDA_VISIBLE_DEVICES"] = self.visible_GPU
+
+    def mse(self, pred, label):
+        loss = (pred - label) ** 2
+        return torch.mean(loss)
+
+    def loss_fn(self, pred, label):
+        mask = ~torch.isnan(label)
+
+        if self.loss == "mse":
+            return self.mse(pred[mask], label[mask])
+
+        raise ValueError("unknown loss `%s`" % self.loss)
+
+    def metric_fn(self, pred, label):
+
+        mask = torch.isfinite(label)
+        if self.metric == "IC":
+            return self.cal_ic(pred[mask], label[mask])
+
+        if self.metric == "" or self.metric == "loss":  # use loss
+            return -self.loss_fn(pred[mask], label[mask])
+
+        raise ValueError("unknown metric `%s`" % self.metric)
+
+    def cal_ic(self, pred, label):
+        return torch.mean(pred * label)
+
+    def train_epoch(self, x_train, y_train):
+
+        x_train_values = x_train.values
+        y_train_values = np.squeeze(y_train.values) * 100
+
+        self.HATS_model.train()
+
+        indices = np.arange(len(x_train_values))
+        np.random.shuffle(indices)
+
+        for i in range(len(indices))[:: self.batch_size]:
+
+            if len(indices) - i < self.batch_size:
+                break
+
+            feature = torch.from_numpy(x_train_values[indices[i : i + self.batch_size]]).float()
+            label = torch.from_numpy(y_train_values[indices[i : i + self.batch_size]]).float()
+
+            if self.use_gpu:
+                feature = feature.cuda()
+                label = label.cuda()
+
+            pred = self.HATS_model(feature)
+            loss = self.loss_fn(pred, label)
+
+            self.train_optimizer.zero_grad()
+            loss.backward()
+            torch.nn.utils.clip_grad_value_(self.HATS_model.parameters(), 3.0)
+            self.train_optimizer.step()
+
+    def test_epoch(self, data_x, data_y):
+
+        # prepare training data
+        x_values = data_x.values
+        y_values = np.squeeze(data_y.values)
+
+        self.HATS_model.eval()
+
+        scores = []
+        losses = []
+
+        indices = np.arange(len(x_values))
+        np.random.shuffle(indices)
+
+        for i in range(len(indices))[:: self.batch_size]:
+
+            if len(indices) - i < self.batch_size:
+                break
+
+            feature = torch.from_numpy(x_values[indices[i : i + self.batch_size]]).float()
+            label = torch.from_numpy(y_values[indices[i : i + self.batch_size]]).float()
+
+            if self.use_gpu:
+                feature = feature.cuda()
+                label = label.cuda()
+
+            pred = self.HATS_model(feature)
+            loss = self.loss_fn(pred, label)
+            losses.append(loss.item())
+
+            score = self.metric_fn(pred, label)
+            scores.append(score.item())
+
+        return np.mean(losses), np.mean(scores)
+
+    def fit(
+        self,
+        dataset: DatasetH,
+        evals_result=dict(),
+        verbose=True,
+        save_path=None,
+    ):
+
+        df_train, df_valid, df_test = dataset.prepare(
+            ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
+        )
+
+        x_train, y_train = df_train["feature"], df_train["label"]
+        x_valid, y_valid = df_valid["feature"], df_valid["label"]
+
+        if save_path == None:
+            save_path = create_save_path(save_path)
+        stop_steps = 0
+        train_loss = 0
+        best_score = -np.inf
+        best_epoch = 0
+        evals_result["train"] = []
+        evals_result["valid"] = []
+
+        # load pretrained base_model
+        if self.with_pretrain:
+            self.logger.info("loading pretrained model...")
+            if self.base_model == "LSTM":
+                from ...contrib.model.pytorch_lstm import LSTMModel
+
+                pretrained_model = LSTMModel()
+                pretrained_model.load_state_dict(torch.load("benchmarks/LSTM/model_lstm_csi300.pkl"))
+            elif self.base_model == "GRU":
+                from ...contrib.model.pytorch_gru import GRUModel
+
+                pretrained_model = GRUModel()
+                pretrained_model.load_state_dict(torch.load("benchmarks/GRU/model_gru_csi300.pkl"))
+            model_dict = self.HATS_model.state_dict()
+
+            # filter unnecessary parameters
+            pretrained_dict = {k: v for k, v in pretrained_model.state_dict().items() if k in model_dict}
+            # overwrite entries in the existing state dict
+            model_dict.update(pretrained_dict)
+            # load the new state dict
+            self.HATS_model.load_state_dict(model_dict)
+            self.logger.info("loading pretrained model Done...")
+
+        # train
+        self.logger.info("training...")
+        self._fitted = True
+        # return
+
+        for step in range(self.n_epochs):
+            self.logger.info("Epoch%d:", step)
+            self.logger.info("training...")
+            self.train_epoch(x_train, y_train)
+            self.logger.info("evaluating...")
+            train_loss, train_score = self.test_epoch(x_train, y_train)
+            val_loss, val_score = self.test_epoch(x_valid, y_valid)
+            self.logger.info("train %.6f, valid %.6f" % (train_score, val_score))
+            evals_result["train"].append(train_score)
+            evals_result["valid"].append(val_score)
+
+            if val_score > best_score:
+                best_score = val_score
+                stop_steps = 0
+                best_epoch = step
+                best_param = copy.deepcopy(self.HATS_model.state_dict())
+            else:
+                stop_steps += 1
+                if stop_steps >= self.early_stop:
+                    self.logger.info("early stop")
+                    break
+
+        self.logger.info("best score: %.6lf @ %d" % (best_score, best_epoch))
+        self.HATS_model.load_state_dict(best_param)
+        torch.save(best_param, save_path)
+
+        if self.use_gpu:
+            torch.cuda.empty_cache()
+
+    def predict(self, dataset):
+        if not self._fitted:
+            raise ValueError("model is not fitted yet!")
+
+        x_test = dataset.prepare("test", col_set="feature")
+        index = x_test.index
+        self.HATS_model.eval()
+        x_values = x_test.values
+        sample_num = x_values.shape[0]
+        preds = []
+
+        for begin in range(sample_num)[:: self.batch_size]:
+
+            if sample_num - begin < self.batch_size:
+                end = sample_num
+            else:
+                end = begin + self.batch_size
+
+            x_batch = torch.from_numpy(x_values[begin:end]).float()
+
+            if self.use_gpu:
+                x_batch = x_batch.cuda()
+
+            with torch.no_grad():
+                if self.use_gpu:
+                    pred = self.HATS_model(x_batch).detach().cpu().numpy()
+                else:
+                    pred = self.HATS_model(x_batch).detach().numpy()
+
+            preds.append(pred)
+
+        return pd.Series(np.concatenate(preds), index=index)
+
+
+class HATSModel(nn.Module):
+    def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, base_model="GRU"):
+        super().__init__()
+
+        if base_model == "GRU":
+            self.model = nn.GRU(
+                input_size=d_feat,
+                hidden_size=hidden_size,
+                num_layers=num_layers,
+                batch_first=True,
+                dropout=dropout,
+            )
+        elif base_model == "LSTM":
+            self.model = nn.LSTM(
+                input_size=d_feat,
+                hidden_size=hidden_size,
+                num_layers=num_layers,
+                batch_first=True,
+                dropout=dropout,
+            )
+        else:
+            raise ValueError("unknown base model name `%s`" % base_model)
+
+        self.hidden_size = hidden_size
+        self.bn1 = nn.BatchNorm1d(num_features=hidden_size, track_running_stats=False)
+        self.fc = nn.Linear(hidden_size, hidden_size)
+        self.bn2 = nn.BatchNorm1d(num_features=hidden_size, track_running_stats=False)
+        self.fc_out = nn.Linear(hidden_size, 1)
+        self.leaky_relu = nn.LeakyReLU()
+        self.softmax = nn.Softmax(dim=1)
+        self.d_feat = d_feat
+
+        num_head_att = [1] * num_layers
+        hidden_dim = [hidden_size] * num_layers
+        dims = [d_feat] + [d * nh for (d, nh) in zip(hidden_dim, num_head_att[:-1])] + [num_head_att[-1]]
+        in_dims = dims[:-1]
+        out_dims = [d // nh for (d, nh) in zip(dims[1:], num_head_att)]
+        self.attn = nn.ModuleList(
+            [GraphAttention(i, o, nh, dropout) for (i, o, nh) in zip(in_dims, out_dims, num_head_att)]
+        )
+        self.bns = nn.ModuleList([nn.BatchNorm1d(dim) for dim in dims[1:-1]])
+        self.dropout = nn.Dropout(dropout)
+        self.elu = nn.ELU()
+
+    def forward(self, x):
+        x = x.reshape(len(x), self.d_feat, -1)  # [N, F, T]
+        x = x.permute(0, 2, 1)  # [N, T, F]
+        out, _ = self.model(x)
+        hidden = out[:, -1, :]
+        hidden = self.bn1(hidden)
+        attention = GraphAttention.cal_attention(hidden, hidden)
+        output = attention.mm(hidden)
+        output = self.fc(output)
+        output = self.bn2(output)
+        output = self.leaky_relu(output)
+        return self.fc_out(output).squeeze()
+
+
+class GraphAttention(nn.Module):
+    def __init__(self, input_dim, output_dim, num_heads, dropout=0.5):
+
+        super().__init__()
+
+        """
+        Parameters
+        ----------
+        input_dim : int
+            Dimension of input node features.
+        output_dim : int
+            Dimension of output node features.
+        num_heads : list of ints
+            Number of attention heads in each hidden layer and output layer. Must be non empty. Note that len(num_heads) = len(hidden_dims)+1.
+        dropout : float
+            Dropout rate. Default: 0.5.
+        """
+
+        self.input_dim = input_dim
+        self.output_dim = output_dim
+        self.num_heads = num_heads
+
+        self.fcs = nn.ModuleList([nn.Linear(input_dim, output_dim) for _ in range(num_heads)])
+        self.a = nn.ModuleList([nn.Linear(2 * output_dim, 1) for _ in range(num_heads)])
+
+        self.dropout = nn.Dropout(dropout)
+        self.softmax = nn.Softmax(dim=0)
+        self.leakyrelu = nn.LeakyReLU()
+
+    def forward(self, features, nodes, mapping, rows):
+
+        """
+        Parameters
+        ----------
+        features : torch.Tensor
+            An (n' x input_dim) tensor of input node features.
+        node_layers : list of numpy array
+            node_layers[i] is an array of the nodes in the ith layer of the
+            computation graph.
+        mappings : list of dictionary
+            mappings[i] is a dictionary mapping node v (labelled 0 to |V|-1)
+            in node_layers[i] to its position in node_layers[i]. For example,
+            if node_layers[i] = [2,5], then mappings[i][2] = 0 and
+            mappings[i][5] = 1.
+        rows : numpy array
+            rows[i] is an array of neighbors of node i.
+        Returns
+        -------
+        out : torch.Tensor
+            An (len(node_layers[-1]) x output_dim) tensor of output node features.
+        """
+
+        nprime = features.shape[0]
+        rows = [np.array([mapping[v] for v in row], dtype=np.int64) for row in rows]
+        sum_degs = np.hstack(([0], np.cumsum([len(row) for row in rows])))
+        mapped_nodes = [mapping[v] for v in nodes]
+        indices = torch.LongTensor([[v, c] for (v, row) in zip(mapped_nodes, rows) for c in row]).t()
+
+        out = []
+        for k in range(self.num_heads):
+            h = self.fcs[k](features)
+
+            nbr_h = torch.cat(tuple([h[row] for row in rows]), dim=0)
+            self_h = torch.cat(tuple([h[mapping[nodes[i]]].repeat(len(row), 1) for (i, row) in enumerate(rows)]), dim=0)
+            cat_h = torch.cat((self_h, nbr_h), dim=1)
+
+            e = self.leakyrelu(self.a[k](cat_h))
+
+            alpha = [self.softmax(e[lo:hi]) for (lo, hi) in zip(sum_degs, sum_degs[1:])]
+            alpha = torch.cat(tuple(alpha), dim=0)
+            alpha = alpha.squeeze(1)
+            alpha = self.dropout(alpha)
+
+            adj = torch.sparse.FloatTensor(indices, alpha, torch.Size([nprime, nprime]))
+            out.append(torch.sparse.mm(adj, h)[mapped_nodes])
+
+        return out
+
+    def cal_attention(x, y):
+
+        att_x = torch.mean(x, dim=1).reshape(-1, 1)
+        att_y = torch.mean(y, dim=1).reshape(-1, 1)
+        att = att_x.mm(torch.t(att_y))
+        x_att = x.reshape(x.shape[0], 1, x.shape[1]).repeat(1, y.shape[0], 1)
+        y_att = y.reshape(1, y.shape[0], y.shape[1]).repeat(x.shape[0], 1, 1)
+        return (
+            torch.mean(
+                x.reshape(x.shape[0], 1, x.shape[1]).repeat(1, y.shape[0], 1)
+                * y.reshape(1, y.shape[0], y.shape[1]).repeat(x.shape[0], 1, 1),
+                dim=2,
+            )
+            - att
+        )
--- a/qlib/contrib/model/xgboost.py
+++ b/qlib/contrib/model/xgboost.py
@@ -1,5 +1,14 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

 import numpy as np
 import pandas as pd
--- a/qlib/contrib/strategy/strategy.py
+++ b/qlib/contrib/strategy/strategy.py
@@ -26,7 +26,9 @@ class BaseStrategy:

    def generate_order_list(self, score_series, current, trade_exchange, pred_date, trade_date):
        """
-        Parameters:
+        DO NOT directly change the state of current
+
+        Parameters
        -----------
        score_series : pd.Seires
            stock_id , score
@@ -39,14 +41,12 @@ class BaseStrategy:
            predict date
        trade_date : pd.Timestamp
            trade date
-
-        DO NOT directly change the state of current
        """
        pass

    def update(self, score_series, pred_date, trade_date):
        """User can use this method to update strategy state each trade date.
-        Parameters:
+        Parameters
        -----------
        score_series : pd.Series
            stock_id , score
@@ -98,8 +98,9 @@ class AdjustTimer:
    """AdjustTimer
    Responsible for timing of position adjusting

-    This is designed as multiple inheritance mechanism due to
+    This is designed as multiple inheritance mechanism due to:
    - the is_adjust may need access to the internel state of a strategy
+
    - it can be reguard as a enhancement to the existing strategy
    """

@@ -140,21 +141,24 @@ class WeightStrategyBase(BaseStrategy, AdjustTimer):

    def generate_target_weight_position(self, score, current, trade_date):
        """
-        Parameters:
+        Generate target position from score for this date and the current position.The cash is not considered in the position
+
+        Parameters
        -----------
-        score : pred score for this trade date, pd.Series, index is stock_id, contain 'score' column
-        current : current position, use Position() class
+        score : pd.Series
+            pred score for this trade date, index is stock_id, contain 'score' column
+        current : Position()
+            current position
        trade_exchange : Exchange()
-        trade_date : trade date
-        generate target position from score for this date and the current position
-        The cash is not considered in the position
+        trade_date : pd.Timestamp
+            trade date
        """
        raise NotImplementedError()

    def generate_order_list(self, score_series, current, trade_exchange, pred_date, trade_date):
        """
-        Parameters:
-        ----------
+        Parameters
+        -----------
        score_series : pd.Seires
            stock_id , score
        current : Position()
@@ -186,16 +190,29 @@ class WeightStrategyBase(BaseStrategy, AdjustTimer):


 class TopkDropoutStrategy(BaseStrategy, ListAdjustTimer):
-    def __init__(self, topk, n_drop, method="bottom", risk_degree=0.95, thresh=1, hold_thresh=1, **kwargs):
+    def __init__(
+        self,
+        topk,
+        n_drop,
+        method_sell="bottom",
+        method_buy="top",
+        risk_degree=0.95,
+        thresh=1,
+        hold_thresh=1,
+        only_tradable=False,
+        **kwargs,
+    ):
        """
-        Parameters:
+        Parameters
        -----------
        topk : int
            The number of stocks in the portfolio
        n_drop : int
            number of stocks to be replaced in each trading date
-        method : str
-            dropout method, random/bottom
+        method_sell : str
+            dropout method_sell, random/bottom
+        method_buy : str
+            dropout method_buy, random/top
        risk_degree : float
            position percentage of total value
        thresh : int
@@ -203,12 +220,19 @@ class TopkDropoutStrategy(BaseStrategy, ListAdjustTimer):
        hold_thresh : int
            minimum holding days
            before sell stock , will check current.get_stock_count(order.stock_id) >= self.thresh
+        only_tradable : bool
+            will the strategy only consider the tradable stock when buying and selling.
+            if only_tradable:
+                strategy will make buy sell decision without checking the tradable state of the stock
+            else:
+                strategy will make decision with the tradable state of the stock info and avoid buy and sell them
        """
        super(TopkDropoutStrategy, self).__init__()
        ListAdjustTimer.__init__(self, kwargs.get("adjust_dates", None))
        self.topk = topk
        self.n_drop = n_drop
-        self.method = method
+        self.method_sell = method_sell
+        self.method_buy = method_buy
        self.risk_degree = risk_degree
        self.thresh = thresh
        # self.stock_count['code'] will be the days the stock has been hold
@@ -216,6 +240,7 @@ class TopkDropoutStrategy(BaseStrategy, ListAdjustTimer):
        self.stock_count = {}

        self.hold_thresh = hold_thresh
+        self.only_tradable = only_tradable

    def get_risk_degree(self, date):
        """get_risk_degree
@@ -229,7 +254,7 @@ class TopkDropoutStrategy(BaseStrategy, ListAdjustTimer):
        """
        Gnererate order list according to score_series at trade_date, will not change current.

-        Parameters:
+        Parameters
        -----------
        score_series : pd.Series
            stock_id , score
@@ -244,24 +269,85 @@ class TopkDropoutStrategy(BaseStrategy, ListAdjustTimer):
        """
        if not self.is_adjust(trade_date):
            return []
+
+        if self.only_tradable:
+            # If The strategy only consider tradable stock when make decision
+            # It needs following actions to filter stocks
+            def get_first_n(l, n, reverse=False):
+                cur_n = 0
+                res = []
+                for si in reversed(l) if reverse else l:
+                    if trade_exchange.is_stock_tradable(stock_id=si, trade_date=trade_date):
+                        res.append(si)
+                        cur_n += 1
+                        if cur_n >= n:
+                            break
+                return res[::-1] if reverse else res
+
+            def get_last_n(l, n):
+                return get_first_n(l, n, reverse=True)
+
+            def filter_stock(l):
+                return [si for si in l if trade_exchange.is_stock_tradable(stock_id=si, trade_date=trade_date)]
+
+        else:
+            # Otherwise, the stock will make decision with out the stock tradable info
+            def get_first_n(l, n):
+                return list(l)[:n]
+
+            def get_last_n(l, n):
+                return list(l)[-n:]
+
+            def filter_stock(l):
+                return l
+
        current_temp = copy.deepcopy(current)
        # generate order list for this adjust date
        sell_order_list = []
        buy_order_list = []
        # load score
+        cash = current_temp.get_cash()
        current_stock_list = current_temp.get_stock_list()
+        # last position (sorted by score)
        last = score_series.reindex(current_stock_list).sort_values(ascending=False).index
-        today = (
-            score_series[~score_series.index.isin(last)]
-            .sort_values(ascending=False)
-            .index[: self.n_drop + self.topk - len(last)]
-        )
-        comb = score_series.reindex(last.union(today)).sort_values(ascending=False).index
-        if self.method == "bottom":
-            sell = last[last.isin(comb[-self.n_drop :])]
-        elif self.method == "random":
-            sell = pd.Index(np.random.choice(last, self.n_drop) if len(last) else [])
+        # The new stocks today want to buy **at most**
+        if self.method_buy == "top":
+            today = get_first_n(
+                score_series[~score_series.index.isin(last)].sort_values(ascending=False).index,
+                self.n_drop + self.topk - len(last),
+            )
+        elif self.method_buy == "random":
+            topk_candi = get_first_n(score_series.sort_values(ascending=False).index, self.topk)
+            candi = list(filter(lambda x: x not in last, topk_candi))
+            n = self.n_drop + self.topk - len(last)
+            try:
+                today = np.random.choice(candi, n, replace=False)
+            except ValueError:
+                today = candi
+        else:
+            raise NotImplementedError(f"This type of input is not supported")
+        # combine(new stocks + last stocks),  we will drop stocks from this list
+        # In case of dropping higher score stock and buying lower score stock.
+        comb = score_series.reindex(last.union(pd.Index(today))).sort_values(ascending=False).index
+
+        # Get the stock list we really want to sell (After filtering the case that we sell high and buy low)
+        if self.method_sell == "bottom":
+            sell = last[last.isin(get_last_n(comb, self.n_drop))]
+        elif self.method_sell == "random":
+            candi = filter_stock(last)
+            try:
+                sell = pd.Index(np.random.choice(candi, self.n_drop, replace=False) if len(last) else [])
+            except ValueError:  #  No enough candidates
+                sell = candi
+        else:
+            raise NotImplementedError(f"This type of input is not supported")
+
+        # Get the stock list we really want to buy
        buy = today[: len(sell) + self.topk - len(last)]
+
+        # buy singal: if a stock falls into topk, it appear in the buy_sinal
+        buy_signal = score_series.sort_values(ascending=False).iloc[: self.topk].index
+
        for code in current_stock_list:
            if not trade_exchange.is_stock_tradable(stock_id=code, trade_date=trade_date):
                continue
@@ -285,12 +371,14 @@ class TopkDropoutStrategy(BaseStrategy, ListAdjustTimer):
                if trade_exchange.check_order(sell_order):
                    sell_order_list.append(sell_order)
                    trade_val, trade_cost, trade_price = trade_exchange.deal_order(sell_order, position=current_temp)
+                    # update cash
+                    cash += trade_val - trade_cost
                    # sold
                    del self.stock_count[code]
                else:
                    # no buy signal, but the stock is kept
                    self.stock_count[code] += 1
-            elif code in buy:
+            elif code in buy_signal:
                # NOTE: This is different from the original version
                # get new buy signal
                # Only the stock fall in to topk will produce buy signal
@@ -300,7 +388,7 @@ class TopkDropoutStrategy(BaseStrategy, ListAdjustTimer):
        # buy new stock
        # note the current has been changed
        current_stock_list = current_temp.get_stock_list()
-        value = current_temp.get_cash() * self.risk_degree / len(buy) if len(buy) > 0 else 0
+        value = cash * self.risk_degree / len(buy) if len(buy) > 0 else 0

        # open_cost should be considered in the real trading environment, while the backtest in evaluate.py does not consider it
        # as the aim of demo is to accomplish same strategy as evaluate.py, so comment out this line
--- a/qlib/data/dataset/init.py
+++ b/qlib/data/dataset/init.py
@@ -14,9 +14,11 @@ class Dataset(Serializable):

    def __init__(self, *args, **kwargs):
        """
-        init is designed to finish following steps
+        init is designed to finish following steps:
+
        - setup data
            - The data related attributes' names should start with '_' so that it will not be saved on disk when serializing
+
        - initialize the state of the dataset(info to prepare the data)
            - The name of essential state for preparing data should not start with '_' so that it could be serialized on disk when serializing.

@@ -29,11 +31,15 @@ class Dataset(Serializable):
        """
        setup the data

-        We split the setup_data function for following situation
-        - 1) User have a Dataset object with learned status on disk
-        - 2) User load the Dataset object from the disk(Note the init function is skiped)
-        - 3) User call `setup_data` to load new data
-        - 4) User prepare data for model based on previous status
+        We split the setup_data function for following situation:
+
+        - User have a Dataset object with learned status on disk
+
+        - User load the Dataset object from the disk(Note the init function is skiped)
+
+        - User call `setup_data` to load new data
+
+        - User prepare data for model based on previous status
        """
        pass

@@ -41,8 +47,9 @@ class Dataset(Serializable):
        """
        The type of dataset depends on the model. (It could be pd.DataFrame, pytorch.DataLoader, etc.)
        The parameters should specify the scope for the prepared data
-        The method sould
+        The method should:
        - process the data
+
        - return the processed data

        Returns
@@ -55,11 +62,12 @@ class Dataset(Serializable):

 class DatasetH(Dataset):
    """
-    Dataset with Data(H)anler
+    Dataset with Data(H)andler

    User should try to put the data preprocessing functions into handler.
-    Only following data processing functions should be placed in Dataset
+    Only following data processing functions should be placed in Dataset:
    - The processing is related to specific model.
+
    - The processing is related to data split
    """

@@ -81,21 +89,26 @@ class DatasetH(Dataset):
        Parameters
        ----------
        handler : Union[dict, DataHandler]
-            handler could be
-            1) insntance of `DataHandler`
-            2) config of `DataHandler`.  Please refer to `DataHandler`
+            handler could be:
+
+            - insntance of `DataHandler`
+
+            - config of `DataHandler`.  Please refer to `DataHandler`
        segments : list
            Describe the options to segment the data.
-            Here are some examples
-            1) 'segments': {
-                    'train': ("2008-01-01", "2014-12-31"),
-                    'valid': ("2017-01-01", "2020-08-01",),
-                    'test': ("2015-01-01", "2016-12-31",),
-                }
-            2) 'segments': {
-                    'insample': ("2008-01-01", "2014-12-31"),
-                    'outsample': ("2017-01-01", "2020-08-01",),
-                }
+            Here are some examples:
+
+            .. code-block::
+
+                1) 'segments': {
+                        'train': ("2008-01-01", "2014-12-31"),
+                        'valid': ("2017-01-01", "2020-08-01",),
+                        'test': ("2015-01-01", "2016-12-31",),
+                    }
+                2) 'segments': {
+                        'insample': ("2008-01-01", "2014-12-31"),
+                        'outsample': ("2017-01-01", "2020-08-01",),
+                    }
        """
        self._handler = init_instance_by_config(handler, accept_types=DataHandler)
        self._segments = segments.copy()
@@ -114,9 +127,11 @@ class DatasetH(Dataset):
        ----------
        segments : Union[List[str], Tuple[str], str, slice]
            Describe the scope of the data to be prepared
-            Here are some examples
-            1) 'train'
-            2) ['train', 'valid']
+            Here are some examples:
+
+            - 'train'
+
+            - ['train', 'valid']
        col_set : str
            The col_set will be passed to self._handler when fetching data
        data_key: str
--- a/qlib/data/dataset/handler.py
+++ b/qlib/data/dataset/handler.py
@@ -41,7 +41,7 @@ class DataHandler(Serializable):
    Example of the data:
    The multi-index of the columns is optional.

-    .. code-block::
+    .. code-block:: python

                                feature                                                            label
                                $close     $volume  Ref($close, 1)  Mean($close, 3)  $high-$low  LABEL0
@@ -109,7 +109,8 @@ class DataHandler(Serializable):
        Parameters
        ----------
        enable_cache : bool
-            default value is false
+            default value is false:
+
            - if `enable_cache` == True:

                the processed data will be saved on disk, and handler will load the cached data from the disk directly
@@ -378,8 +379,10 @@ class DataHandlerLP(DataHandler):
        init_type : str
            The type `IT_*` listed above
        enable_cache : bool
-            default value is false
-            if `enable_cache` == True:
+            default value is false:
+
+            - if `enable_cache` == True:
+
                the processed data will be saved on disk, and handler will load the cached data from the disk directly
                when we call `init` next time
        """
--- a/qlib/data/dataset/loader.py
+++ b/qlib/data/dataset/loader.py
@@ -39,14 +39,16 @@ class DataLoader(abc.ABC):
        pd.DataFrame:
            data load from the under layer source

-            Example of the data:
-            (The multi-index of the columns is optional.)
-                                    feature                                                             label
-                                    $close     $volume     Ref($close, 1)  Mean($close, 3)  $high-$low  LABEL0
-            datetime    instrument
-            2010-01-04  SH600000    81.807068  17145150.0       83.737389        83.016739    2.741058  0.0032
-                        SH600004    13.313329  11800983.0       13.313329        13.317701    0.183632  0.0042
-                        SH600005    37.796539  12231662.0       38.258602        37.919757    0.970325  0.0289
+            Example of the data (The multi-index of the columns is optional.):
+
+            .. code-block::
+
+                                        feature                                                             label
+                                        $close     $volume     Ref($close, 1)  Mean($close, 3)  $high-$low  LABEL0
+                datetime    instrument
+                2010-01-04  SH600000    81.807068  17145150.0       83.737389        83.016739    2.741058  0.0032
+                            SH600004    13.313329  11800983.0       13.313329        13.317701    0.183632  0.0042
+                            SH600005    37.796539  12231662.0       38.258602        37.919757    0.970325  0.0289
        """
        pass

@@ -55,7 +57,7 @@ class DLWParser(DataLoader):
    """
    (D)ata(L)oader (W)ith (P)arser for features and names

-    Extracting this class so that QlibDataLoader and other dataloaders(such as QdbDataLoader) can share the fields
+    Extracting this class so that QlibDataLoader and other dataloaders(such as QdbDataLoader) can share the fields.
    """

    def __init__(self, config: Tuple[list, tuple, dict]):
@@ -65,14 +67,16 @@ class DLWParser(DataLoader):
        config : Tuple[list, tuple, dict]
            Config will be used to describe the fields and column names

-            <config> := {
-                "group_name1": <fields_info1>
-                "group_name2": <fields_info2>
-            }
-            or
-            <config> := <fields_info>
+            .. code-block:: YAML

-            <fields_info> := ["expr", ...] | (["expr", ...], ["col_name", ...])
+                <config> := {
+                    "group_name1": <fields_info1>
+                    "group_name2": <fields_info2>
+                }
+                or
+                <config> := <fields_info>
+
+                <fields_info> := ["expr", ...] | (["expr", ...], ["col_name", ...])
        """
        self.is_group = isinstance(config, dict)

--- a/scripts/README.md
+++ b/scripts/README.md
@@ -43,6 +43,8 @@ python get_data.py qlib_data --help

 ### US data

+> Need to download data first: [Downlaod US Data](#Downlaod-US-Data)
+
 ```python
 import qlib
 from qlib.config import REG_US
@@ -52,6 +54,8 @@ qlib.init(provider_uri=provider_uri, region=REG_US)

 ### CN data

+> Need to download data first: [Download CN Data](#Download-CN-Data)
+
 ```python
 import qlib
 from qlib.config import REG_CN
--- a/scripts/dump_bin.py
+++ b/scripts/dump_bin.py
@@ -140,7 +140,7 @@ class DumpDataBase:

    def _get_source_data(self, file_path: Path) -> pd.DataFrame:
        df = pd.read_csv(str(file_path.resolve()), low_memory=False)
-        df[self.date_field_name] = df[self.date_field_name].astype(np.datetime64)
+        df[self.date_field_name] = df[self.date_field_name].astype(str).astype(np.datetime64)
        # df.drop_duplicates([self.date_field_name], inplace=True)
        return df

@@ -339,10 +339,10 @@ class DumpDataFix(DumpDataAll):
    def dump(self):
        self._calendars_list = self._read_calendars(self._calendars_dir.joinpath(f"{self.freq}.txt"))
        # noinspection PyAttributeOutsideInit
-        self._old_instruments = self._read_instruments(
-            self._instruments_dir.joinpath(self.INSTRUMENTS_FILE_NAME)
-        ).to_dict(
-            orient="index"
+        self._old_instruments = (
+            self._read_instruments(self._instruments_dir.joinpath(self.INSTRUMENTS_FILE_NAME))
+            .set_index([self.symbol_field_name])
+            .to_dict(orient="index")
        )  # type: dict
        self._dump_instruments()
        self._dump_features()