Merge branch 'main' of github.com:you-n-g/qlib into main

2026-07-21 11:17:34 +08:00 · 2020-11-19 09:18:24 +00:00
parent aa971e017a df406d58a5
commit 11dc307a96
29 changed files with 521 additions and 1394 deletions
--- a/examples/benchmarks/CatBoost/requirements.txt
+++ b/examples/benchmarks/CatBoost/requirements.txt
@@ -0,0 +1,3 @@
+pandas==1.1.2
+numpy==1.17.4
+catboost==0.24.3
--- a/examples/benchmarks/DNN/requirements.txt
+++ b/examples/benchmarks/DNN/requirements.txt
@@ -0,0 +1,4 @@
+pandas==1.1.2
+numpy==1.17.4
+scikit_learn==0.23.2
+torch==1.7.0
--- a/examples/benchmarks/DNN/workflow_config_dnn.yaml
+++ b/examples/benchmarks/DNN/workflow_config_dnn.yaml
@@ -0,0 +1,62 @@
+provider_uri: "~/.qlib/qlib_data/cn_data"
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: DNNModelPytorch
+        module_path: qlib.contrib.model.pytorch_nn
+        kwargs:
+            input_dim: 360
+            output_dim: 1
+            layers: [256, 512, 1024, 512, 256, 128, 64]
+            lr: 0.001
+            max_steps: 300
+            batch_size: 2000
+            early_stop_rounds: 50
+            eval_steps: 20
+            lr_decay: 0.96
+            lr_decay_steps: 100
+            optimizer: gd
+            loss: mse
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: ALPHA360_Denoise
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/GATs/requirements.txt
+++ b/examples/benchmarks/GATs/requirements.txt
@@ -0,0 +1,4 @@
+pandas==1.1.2
+numpy==1.17.4
+scikit_learn==0.23.2
+torch==1.7.0
--- a/examples/benchmarks/GATs/worflow_config_gats.yaml
+++ b/examples/benchmarks/GATs/worflow_config_gats.yaml
@@ -0,0 +1,63 @@
+provider_uri: "~/.qlib/qlib_data/cn_data"
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: GAT
+        module_path: qlib.contrib.model.pytorch_gats
+        kwargs:
+            d_feat: 6
+            hidden_size: 64
+            num_layers: 2
+            dropout: 0.0
+            n_epochs: 200
+            lr: 1e-3
+            early_stop: 20
+            batch_size: 800
+            metric: IC
+            loss: mse
+            base_model: GRU
+            seed: 0
+            GPU: 0
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: ALPHA360_Denoise
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/GBDT/requirements.txt
+++ b/examples/benchmarks/GBDT/requirements.txt
@@ -0,0 +1,3 @@
+pandas==1.1.2
+numpy==1.17.4
+lightgbm==3.1.0
--- a/examples/benchmarks/GBDT/workflow_config_gbdt.yaml
+++ b/examples/benchmarks/GBDT/workflow_config_gbdt.yaml
@@ -0,0 +1,59 @@
+provider_uri: "~/.qlib/qlib_data/cn_data"
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: LGBModel
+        module_path: qlib.contrib.model.gbdt
+        kwargs:
+            loss: mse
+            colsample_bytree: 0.8879
+            learning_rate: 0.0421
+            subsample: 0.8789
+            lambda_l1: 205.6999
+            lambda_l2: 580.9768
+            max_depth: 8
+            num_leaves: 210
+            num_threads: 20
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha158
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/GRU/requirements.txt
+++ b/examples/benchmarks/GRU/requirements.txt
@@ -0,0 +1,4 @@
+numpy==1.17.4
+pandas==1.1.2
+scikit_learn==0.23.2
+torch==1.7.0
--- a/examples/benchmarks/GRU/workflow_config_gru.yaml
+++ b/examples/benchmarks/GRU/workflow_config_gru.yaml
@@ -0,0 +1,62 @@
+provider_uri: "~/.qlib/qlib_data/cn_data"
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: GRU
+        module_path: qlib.contrib.model.pytorch_gru
+        kwargs:
+            d_feat: 6
+            hidden_size: 64
+            num_layers: 2
+            dropout: 0.0
+            n_epochs: 200
+            lr: 1e-3
+            early_stop: 20
+            batch_size: 800
+            metric: IC
+            loss: mse
+            seed: 0
+            GPU: 0
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: ALPHA360_Denoise
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/LSTM/requirements.txt
+++ b/examples/benchmarks/LSTM/requirements.txt
@@ -0,0 +1,4 @@
+numpy==1.17.4
+pandas==1.1.2
+scikit_learn==0.23.2
+torch==1.7.0
--- a/examples/benchmarks/LSTM/workflow_config_lstm.yaml
+++ b/examples/benchmarks/LSTM/workflow_config_lstm.yaml
@@ -0,0 +1,62 @@
+provider_uri: "~/.qlib/qlib_data/cn_data"
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: LSTM
+        module_path: qlib.contrib.model.pytorch_lstm
+        kwargs:
+            d_feat: 6
+            hidden_size: 64
+            num_layers: 2
+            dropout: 0.0
+            n_epochs: 200
+            lr: 1e-3
+            early_stop: 20
+            batch_size: 800
+            metric: IC
+            loss: mse
+            seed: 0
+            GPU: 0
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: ALPHA360_Denoise
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/XGBoost/requirements.txt
+++ b/examples/benchmarks/XGBoost/requirements.txt
@@ -0,0 +1,3 @@
+numpy==1.17.4
+pandas==1.1.2
+xgboost==1.2.1
--- a/examples/benchmarks/XGBoost/workflow_config_xgboost.yaml
+++ b/examples/benchmarks/XGBoost/workflow_config_xgboost.yaml
@@ -0,0 +1,62 @@
+provider_uri: "~/.qlib/qlib_data/cn_data"
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: XGBModel
+        module_path: qlib.contrib.model.xgboost
+        kwargs:
+            objective: reg:linear
+            n_estimators: 5000
+            colsample_bytree: 0.85
+            learning_rate: 0.0421
+            subsample: 0.8789
+            max_depth: 8
+            num_leaves: 210
+            num_threads: 20
+            missing: -1
+            min_child_weight: 1
+            nthread: 4
+            tree_method: hist
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha158
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/XGBoost/xgboost.py
+++ b/examples/benchmarks/XGBoost/xgboost.py
@@ -0,0 +1,64 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import numpy as np
+import pandas as pd
+import xgboost as xgb
+
+from ...model.base import Model
+from ...data.dataset import DatasetH
+from ...data.dataset.handler import DataHandlerLP
+
+
+class XGBModel(Model):
+    """XGBModel Model"""
+
+    def __init__(self, obj="mse", **kwargs):
+        if obj not in {"mse", "binary"}:
+            raise NotImplementedError
+        self._params = {"obj": obj}
+        self._params.update(kwargs)
+        self.model = None
+
+    def fit(
+        self,
+        dataset: DatasetH,
+        num_boost_round=1000,
+        early_stopping_rounds=50,
+        verbose_eval=20,
+        evals_result=dict(),
+        **kwargs
+    ):
+
+        df_train, df_valid = dataset.prepare(
+            ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
+        )
+        x_train, y_train = df_train["feature"], df_train["label"]
+        x_valid, y_valid = df_valid["feature"], df_valid["label"]
+
+        # Lightgbm need 1D array as its label
+        if y_train.values.ndim == 2 and y_train.values.shape[1] == 1:
+            y_train_1d, y_valid_1d = np.squeeze(y_train.values), np.squeeze(y_valid.values)
+        else:
+            raise ValueError("XGBoost doesn't support multi-label training")
+
+        dtrain = xgb.DMatrix(x_train.values, label=y_train_1d)
+        dvalid = xgb.DMatrix(x_valid.values, label=y_valid_1d)
+        self.model = xgb.train(
+            self._params,
+            dtrain=dtrain,
+            num_boost_round=num_boost_round,
+            evals=[(dtrain, "train"), (dvalid, "valid")],
+            early_stopping_rounds=early_stopping_rounds,
+            verbose_eval=verbose_eval,
+            evals_result=evals_result,
+            **kwargs
+        )
+        evals_result["train"] = list(evals_result["train"].values())[0]
+        evals_result["valid"] = list(evals_result["valid"].values())[0]
+
+    def predict(self, dataset):
+        if self.model is None:
+            raise ValueError("model is not fitted yet!")
+        x_test = dataset.prepare("test", col_set="feature")
+        return pd.Series(self.model.predict(xgb.DMatrix(np.squeeze(x_test.values))), index=x_test.index)
--- a/examples/estimator/analyze_from_estimator.ipynb
+++ b/examples/estimator/analyze_from_estimator.ipynb
@@ -1,222 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import sys\n",
-    "import json\n",
-    "import yaml\n",
-    "import pickle\n",
-    "from pathlib import Path\n",
-    "\n",
-    "import qlib\n",
-    "import pandas as pd\n",
-    "from qlib.config import REG_CN\n",
-    "from qlib.utils import exists_qlib_data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "CUR_DIR = Path.cwd()\n",
-    "MARKET = \"csi300\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# use default data\n",
-    "# NOTE: need to download data from remote: python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data\n",
-    "provider_uri = \"~/.qlib/qlib_data/cn_data\"  # target_dir\n",
-    "if not exists_qlib_data(provider_uri):\n",
-    "    print(f\"Qlib data is not found in {provider_uri}\")\n",
-    "    sys.path.append(str(CUR_DIR.parent.parent.joinpath(\"scripts\")))\n",
-    "    from get_data import GetData\n",
-    "    GetData().qlib_data(target_dir=provider_uri)\n",
-    "qlib.init(provider_uri=provider_uri, region=REG_CN)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with CUR_DIR.joinpath('estimator_config.yaml').open() as fp:\n",
-    "    estimator_name = yaml.load(fp, Loader=yaml.FullLoader)['experiment']['name']\n",
-    "with CUR_DIR.joinpath(estimator_name, 'exp_info.json').open() as fp:\n",
-    "    latest_id = json.load(fp)['id']\n",
-    "    \n",
-    "estimator_dir = CUR_DIR.joinpath(estimator_name, 'sacred', latest_id)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# read estimator result"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pred_df = pd.read_pickle(estimator_dir.joinpath('pred.pkl'))\n",
-    "report_normal_df = pd.read_pickle(estimator_dir.joinpath('report_normal.pkl'))\n",
-    "report_normal_df.index.names = ['index']\n",
-    "\n",
-    "analysis_df = pd.read_pickle(estimator_dir.joinpath('analysis.pkl'))\n",
-    "positions = pickle.load(estimator_dir.joinpath('positions.pkl').open('rb'))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# analyze graphs"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from qlib.data import D\n",
-    "from qlib.contrib.report import analysis_model, analysis_position\n",
-    "pred_df_dates = pred_df.index.get_level_values(level='datetime')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## analysis position"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "stock_ret = D.features(D.instruments(MARKET), ['Ref($close, -1)/$close - 1'], pred_df_dates.min(), pred_df_dates.max())\n",
-    "stock_ret.columns = ['label']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### report"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "analysis_position.report_graph(report_normal_df)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### risk analysis"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "analysis_position.risk_analysis_graph(analysis_df, report_normal_df)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## analysis model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "label_df = D.features(D.instruments(MARKET), ['Ref($close, -2)/Ref($close, -1) - 1'], pred_df_dates.min(), pred_df_dates.max())\n",
-    "label_df.columns = ['label']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### score IC"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pred_label = pd.concat([label_df, pred_df], axis=1, sort=True).reindex(label_df.index)\n",
-    "analysis_position.score_ic_graph(pred_label)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### model performance"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "analysis_model.model_performance_graph(pred_label)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
--- a/examples/estimator/estimator_config.yaml
+++ b/examples/estimator/estimator_config.yaml
@@ -1,53 +0,0 @@
-experiment:
-  name: estimator_example
-  observer_type: file_storage
-  mode: train
-
-model:
-  class: LGBModel
-  module_path: qlib.gbdt.model.gbdt
-  args:
-    loss: mse
-    colsample_bytree: 0.8879
-    learning_rate: 0.0421
-    subsample: 0.8789
-    lambda_l1: 205.6999
-    lambda_l2: 580.9768
-    max_depth: 8
-    num_leaves: 210
-    num_threads: 20
-data:
-  class: Alpha158
-  args:
-    dropna_label: True
-  filter:
-    market: csi300
-trainer:
-  class: StaticTrainer
-  args:
-    train_start_date: 2008-01-01
-    train_end_date: 2014-12-31
-    validate_start_date: 2015-01-01
-    validate_end_date: 2016-12-31
-    test_start_date: 2017-01-01
-    test_end_date: 2020-08-01
-strategy:
-  class: TopkDropoutStrategy
-  args:
-    topk: 50
-    n_drop: 5
-backtest:
-  normal_backtest_args:
-    verbose: False
-    limit_threshold: 0.095
-    account: 100000000
-    benchmark: SH000300
-    deal_price: close
-    open_cost: 0.0005
-    close_cost: 0.0015
-    min_cost: 5
-
-qlib_data:
-  # when testing, please modify the following parameters according to the specific environment
-  provider_uri: "~/.qlib/qlib_data/cn_data"
-  region: "cn"
--- a/examples/estimator/estimator_config_dnn.yaml
+++ b/examples/estimator/estimator_config_dnn.yaml
@@ -1,55 +0,0 @@
-experiment:
-  name: estimator_example
-  observer_type: file_storage
-  mode: train
-
-model:
-    module_path: qlib.model.pytorch_nn
-    class: DNNModelPytorch
-    args:
-        loss: mse
-        input_dim: 158
-        output_dim: 1
-        lr: 0.002
-        lr_decay: 0.96
-        lr_decay_steps: 100
-        optimizer: 'adam'
-        max_steps: 8000
-        batch_size: 4096
-        GPU: '0'
-data:
-  class: Alpha158
-  args:
-    dropna_label: True
-    dropna_feature: True
-  filter:
-    market: csi300
-trainer:
-  class: StaticTrainer
-  args:
-    train_start_date: 2007-01-01
-    train_end_date: 2014-12-31
-    validate_start_date: 2015-01-01
-    validate_end_date: 2016-12-31
-    test_start_date: 2017-01-01
-    test_end_date: 2020-08-01
-strategy:
-  class: TopkDropoutStrategy
-  args:
-    topk: 50
-    n_drop: 5
-backtest:
-  normal_backtest_args:
-    verbose: False
-    limit_threshold: 0.095
-    account: 100000000
-    benchmark: SH000300
-    deal_price: close
-    open_cost: 0.0005
-    close_cost: 0.0015
-    min_cost: 5
-
-qlib_data:
-  # when testing, please modify the following parameters according to the specific environment
-  provider_uri: "~/.qlib/qlib_data/cn_data"
-  region: "cn"
--- a/examples/train_and_backtest.py
+++ b/examples/train_and_backtest.py
@@ -1,121 +0,0 @@
-#  Copyright (c) Microsoft Corporation.
-#  Licensed under the MIT License.
-
-import sys
-from pathlib import Path
-
-import qlib
-import pandas as pd
-from qlib.config import REG_CN
-from qlib.contrib.model.gbdt import LGBModel
-from qlib.contrib.data.handler import Alpha158
-from qlib.contrib.strategy.strategy import TopkDropoutStrategy
-from qlib.contrib.evaluate import (
-    backtest as normal_backtest,
-    risk_analysis,
-)
-from qlib.utils import exists_qlib_data
-
-
-if __name__ == "__main__":
-
-    # use default data
-    provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
-    if not exists_qlib_data(provider_uri):
-        print(f"Qlib data is not found in {provider_uri}")
-        sys.path.append(str(Path(__file__).resolve().parent.parent.joinpath("scripts")))
-        from get_data import GetData
-
-        GetData().qlib_data(target_dir=provider_uri)
-
-    qlib.init(provider_uri=provider_uri, region=REG_CN)
-
-    MARKET = "CSI300"
-    BENCHMARK = "SH000300"
-
-    ###################################
-    # train model
-    ###################################
-    DATA_HANDLER_CONFIG = {
-        "dropna_label": True,
-        "start_date": "2008-01-01",
-        "end_date": "2020-08-01",
-        "market": MARKET,
-    }
-
-    TRAINER_CONFIG = {
-        "train_start_date": "2008-01-01",
-        "train_end_date": "2014-12-31",
-        "validate_start_date": "2015-01-01",
-        "validate_end_date": "2016-12-31",
-        "test_start_date": "2017-01-01",
-        "test_end_date": "2020-08-01",
-    }
-
-    # use default DataHandler
-    # custom DataHandler, refer to: TODO: DataHandler API url
-    x_train, y_train, x_validate, y_validate, x_test, y_test = Alpha158(**DATA_HANDLER_CONFIG).get_split_data(
-        **TRAINER_CONFIG
-    )
-
-    MODEL_CONFIG = {
-        "loss": "mse",
-        "colsample_bytree": 0.8879,
-        "learning_rate": 0.0421,
-        "subsample": 0.8789,
-        "lambda_l1": 205.6999,
-        "lambda_l2": 580.9768,
-        "max_depth": 8,
-        "num_leaves": 210,
-        "num_threads": 20,
-    }
-    # use default model
-    # custom Model, refer to: TODO: Model API url
-    model = LGBModel(**MODEL_CONFIG)
-    model.fit(x_train, y_train, x_validate, y_validate)
-    _pred = model.predict(x_test)
-    _pred = pd.DataFrame(_pred, index=x_test.index, columns=y_test.columns)
-
-    # backtest requires pred_score
-    pred_score = pd.DataFrame(index=_pred.index)
-    pred_score["score"] = _pred.iloc(axis=1)[0]
-
-    # save pred_score to file
-    pred_score_path = Path("~/tmp/qlib/pred_score.pkl").expanduser()
-    pred_score_path.parent.mkdir(exist_ok=True, parents=True)
-    pred_score.to_pickle(pred_score_path)
-
-    ###################################
-    # backtest
-    ###################################
-    STRATEGY_CONFIG = {
-        "topk": 50,
-        "n_drop": 5,
-    }
-    BACKTEST_CONFIG = {
-        "verbose": False,
-        "limit_threshold": 0.095,
-        "account": 100000000,
-        "benchmark": BENCHMARK,
-        "deal_price": "close",
-        "open_cost": 0.0005,
-        "close_cost": 0.0015,
-        "min_cost": 5,
-    }
-
-    # use default strategy
-    # custom Strategy, refer to: TODO: Strategy API url
-    strategy = TopkDropoutStrategy(**STRATEGY_CONFIG)
-    report_normal, positions_normal = normal_backtest(pred_score, strategy=strategy, **BACKTEST_CONFIG)
-
-    ###################################
-    # analyze
-    # If need a more detailed analysis, refer to: examples/train_and_bakctest.ipynb
-    ###################################
-    analysis = dict()
-    analysis["excess_return_without_cost"] = risk_analysis(report_normal["return"] - report_normal["bench"])
-    analysis["excess_return_with_cost"] = risk_analysis(
-        report_normal["return"] - report_normal["bench"] - report_normal["cost"]
-    )
-    analysis_df = pd.concat(analysis)  # type: pd.DataFrame
-    print(analysis_df)
--- a/examples/train_backtest_analyze.ipynb
+++ b/examples/train_backtest_analyze.ipynb
@@ -1,338 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import sys\n",
-    "from pathlib import Path\n",
-    "\n",
-    "import qlib\n",
-    "import pandas as pd\n",
-    "from qlib.config import REG_CN\n",
-    "from qlib.contrib.model.gbdt import LGBModel\n",
-    "from qlib.contrib.estimator.handler import Alpha158\n",
-    "from qlib.contrib.strategy.strategy import TopkDropoutStrategy\n",
-    "from qlib.contrib.evaluate import (\n",
-    "    backtest as normal_backtest,\n",
-    "    risk_analysis,\n",
-    ")\n",
-    "from qlib.utils import exists_qlib_data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# use default data\n",
-    "# NOTE: need to download data from remote: python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn\n",
-    "provider_uri = \"~/.qlib/qlib_data/cn_data\"  # target_dir\n",
-    "if not exists_qlib_data(provider_uri):\n",
-    "    print(f\"Qlib data is not found in {provider_uri}\")\n",
-    "    sys.path.append(str(Path.cwd().parent.joinpath(\"scripts\")))\n",
-    "    from get_data import GetData\n",
-    "    GetData().qlib_data(target_dir=provider_uri)\n",
-    "qlib.init(provider_uri=provider_uri, region=REG_CN)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "MARKET = \"csi300\"\n",
-    "BENCHMARK = \"SH000300\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# train model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "###################################\n",
-    "# train model\n",
-    "###################################\n",
-    "DATA_HANDLER_CONFIG = {\n",
-    "    \"dropna_label\": True,\n",
-    "    \"start_date\": \"2008-01-01\",\n",
-    "    \"end_date\": \"2020-08-01\",\n",
-    "    \"market\": MARKET,\n",
-    "}\n",
-    "\n",
-    "TRAINER_CONFIG = {\n",
-    "    \"train_start_date\": \"2008-01-01\",\n",
-    "    \"train_end_date\": \"2014-12-31\",\n",
-    "    \"validate_start_date\": \"2015-01-01\",\n",
-    "    \"validate_end_date\": \"2016-12-31\",\n",
-    "    \"test_start_date\": \"2017-01-01\",\n",
-    "    \"test_end_date\": \"2020-08-01\",\n",
-    "}\n",
-    "\n",
-    "# use default DataHandler\n",
-    "# custom DataHandler, refer to: TODO: DataHandler api url\n",
-    "x_train, y_train, x_validate, y_validate, x_test, y_test = Alpha158(**DATA_HANDLER_CONFIG).get_split_data(**TRAINER_CONFIG)\n",
-    "\n",
-    "\n",
-    "MODEL_CONFIG = {\n",
-    "    \"loss\": \"mse\",\n",
-    "    \"colsample_bytree\": 0.8879,\n",
-    "    \"learning_rate\": 0.0421,\n",
-    "    \"subsample\": 0.8789,\n",
-    "    \"lambda_l1\": 205.6999,\n",
-    "    \"lambda_l2\": 580.9768,\n",
-    "    \"max_depth\": 8,\n",
-    "    \"num_leaves\": 210,\n",
-    "    \"num_threads\": 20,\n",
-    "}\n",
-    "# use default model\n",
-    "# custom Model, refer to: TODO: Model api url\n",
-    "model = LGBModel(**MODEL_CONFIG)\n",
-    "model.fit(x_train, y_train, x_validate, y_validate)\n",
-    "_pred = model.predict(x_test)\n",
-    "_pred = pd.DataFrame(_pred, index=x_test.index, columns=y_test.columns)\n",
-    "\n",
-    "# backtest requires pred_score\n",
-    "pred_score = pd.DataFrame(index=_pred.index)\n",
-    "pred_score[\"score\"] = _pred.iloc(axis=1)[0]\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# backtest"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "###################################\n",
-    "# backtest\n",
-    "###################################\n",
-    "STRATEGY_CONFIG = {\n",
-    "    \"topk\": 50,\n",
-    "    \"n_drop\": 5}\n",
-    "BACKTEST_CONFIG = {\n",
-    "    \"verbose\": False,\n",
-    "    \"limit_threshold\": 0.095,\n",
-    "    \"account\": 100000000,\n",
-    "    \"benchmark\": BENCHMARK,\n",
-    "    \"deal_price\": \"close\",\n",
-    "    \"open_cost\": 0.0005,\n",
-    "    \"close_cost\": 0.0015,\n",
-    "    \"min_cost\": 5,\n",
-    "    \n",
-    "}\n",
-    "\n",
-    "# use default strategy\n",
-    "# custom Strategy, refer to: TODO: Strategy api url\n",
-    "strategy = TopkDropoutStrategy(**STRATEGY_CONFIG)\n",
-    "report_normal, positions_normal = normal_backtest(pred_score, strategy=strategy, **BACKTEST_CONFIG)\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# analyze"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "###################################\n",
-    "# analyze\n",
-    "# If need a more detailed analysis, refer to: examples/train_and_bakctest.ipynb\n",
-    "###################################\n",
-    "analysis = dict()\n",
-    "analysis[\"excess_return_without_cost\"] = risk_analysis(report_normal[\"return\"] - report_normal[\"bench\"])\n",
-    "analysis[\"excess_return_with_cost\"] = risk_analysis(\n",
-    "    report_normal[\"return\"] - report_normal[\"bench\"] - report_normal[\"cost\"]\n",
-    ")\n",
-    "analysis_df = pd.concat(analysis)  # type: pd.DataFrame\n",
-    "print(analysis_df)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# analyze graphs"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from qlib.contrib.report import analysis_model, analysis_position\n",
-    "from qlib.data import D\n",
-    "pred_df_dates = pred_score.index.get_level_values(level='datetime')\n",
-    "report_normal_df = report_normal\n",
-    "positions = positions_normal\n",
-    "pred_df = pred_score"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## analysis position"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "stock_ret = D.features(D.instruments(MARKET), ['Ref($close, -1)/$close - 1'], pred_df_dates.min(), pred_df_dates.max())\n",
-    "stock_ret.columns = ['label']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### report"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "analysis_position.report_graph(report_normal_df)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### risk analysis"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "analysis_position.risk_analysis_graph(analysis_df, report_normal_df)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## analysis model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "label_df = D.features(D.instruments(MARKET), ['Ref($close, -2)/Ref($close, -1) - 1'], pred_df_dates.min(), pred_df_dates.max())\n",
-    "label_df.columns = ['label']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### score IC"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pred_label = pd.concat([label_df, pred_df], axis=1, sort=True).reindex(label_df.index)\n",
-    "analysis_position.score_ic_graph(pred_label)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### model performance"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "analysis_model.model_performance_graph(pred_label)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3"
-  },
-  "toc": {
-   "base_numbering": 1,
-   "nav_menu": {},
-   "number_sections": true,
-   "sideBar": true,
-   "skip_h1_title": false,
-   "title_cell": "Table of Contents",
-   "title_sidebar": "Contents",
-   "toc_cell": false,
-   "toc_position": {},
-   "toc_section_display": true,
-   "toc_window_display": false
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
--- a/examples/workflow_by_code_gats.py
+++ b/examples/workflow_by_code_gats.py
@@ -1,145 +0,0 @@
-#  Copyright (c) Microsoft Corporation.
-#  Licensed under the MIT License.
-
-import sys
-from pathlib import Path
-
-import qlib
-import pandas as pd
-from qlib.config import REG_CN
-from qlib.contrib.model.pytorch_gats import GAT
-from qlib.contrib.data.handler import ALPHA360_Denoise
-from qlib.contrib.strategy.strategy import TopkDropoutStrategy
-from qlib.contrib.evaluate import (
-    backtest as normal_backtest,
-    risk_analysis,
-)
-from qlib.utils import exists_qlib_data
-
-# from qlib.model.learner import train_model
-from qlib.utils import init_instance_by_config
-
-import pickle
-
-if __name__ == "__main__":
-
-    # use default data
-    provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
-    if not exists_qlib_data(provider_uri):
-        print(f"Qlib data is not found in {provider_uri}")
-        sys.path.append(str(Path(__file__).resolve().parent.parent.joinpath("scripts")))
-        from get_data import GetData
-
-        GetData().qlib_data_cn(target_dir=provider_uri)
-
-    qlib.init(provider_uri=provider_uri, region=REG_CN)
-
-    MARKET = "csi300"
-    BENCHMARK = "SH000300"
-
-    ###################################
-    # train model
-    ###################################
-    DATA_HANDLER_CONFIG = {
-        "start_time": "2008-01-01",
-        "end_time": "2020-08-01",
-        "fit_start_time": "2008-01-01",
-        "fit_end_time": "2014-12-31",
-        "instruments": MARKET,
-    }
-
-    TRAINER_CONFIG = {
-        "train_start_time": "2008-01-01",
-        "train_end_time": "2014-12-31",
-        "validate_start_time": "2015-01-01",
-        "validate_end_time": "2016-12-31",
-        "test_start_time": "2017-01-01",
-        "test_end_time": "2020-08-01",
-    }
-
-    task = {
-        "model": {
-            "class": "GAT",
-            "module_path": "qlib.contrib.model.pytorch_gats",
-            "kwargs": {
-                "d_feat": 6,
-                "hidden_size": 64,
-                "num_layers": 2,
-                "dropout": 0.0,
-                "n_epochs": 200,
-                "lr": 1e-3,
-                "early_stop": 20,
-                "batch_size": 800,
-                "metric": "IC",
-                "loss": "mse",
-                "base_model":"GRU",
-                "seed": 0,
-                "GPU": 0,
-            },
-        },
-        "dataset": {
-            "class": "DatasetH",
-            "module_path": "qlib.data.dataset",
-            "kwargs": {
-                "handler": {
-                    "class": "ALPHA360_Denoise",
-                    "module_path": "qlib.contrib.data.handler",
-                    "kwargs": DATA_HANDLER_CONFIG,
-                },
-                "segments": {
-                    "train": ("2008-01-01", "2014-12-31"),
-                    "valid": ("2015-01-01", "2016-12-31"),
-                    "test": ("2017-01-01", "2020-08-01"),
-                },
-            },
-        }
-        # You shoud record the data in specific sequence
-        # "record": ['SignalRecord', 'SigAnaRecord', 'PortAnaRecord'],
-    }
-
-    # model = train_model(task)
-    model = init_instance_by_config(task["model"])
-    dataset = init_instance_by_config(task["dataset"])
-    model.fit(dataset)
-
-    pred_score = model.predict(dataset)
-
-    # save pred_score to file
-    pred_score_path = Path("~/tmp/qlib/pred_score.pkl").expanduser()
-    pred_score_path.parent.mkdir(exist_ok=True, parents=True)
-    pred_score.to_pickle(pred_score_path)
-
-    ###################################
-    # backtest
-    ###################################
-    STRATEGY_CONFIG = {
-        "topk": 50,
-        "n_drop": 5,
-    }
-    BACKTEST_CONFIG = {
-        "verbose": False,
-        "limit_threshold": 0.095,
-        "account": 100000000,
-        "benchmark": BENCHMARK,
-        "deal_price": "close",
-        "open_cost": 0.0005,
-        "close_cost": 0.0015,
-        "min_cost": 5,
-    }
-
-    # use default strategy
-    # custom Strategy, refer to: TODO: Strategy API url
-    strategy = TopkDropoutStrategy(**STRATEGY_CONFIG)
-    report_normal, positions_normal = normal_backtest(pred_score, strategy=strategy, **BACKTEST_CONFIG)
-
-    ###################################
-    # analyze
-    # If need a more detailed analysis, refer to: examples/train_and_bakctest.ipynb
-    ###################################
-    analysis = dict()
-    analysis["excess_return_without_cost"] = risk_analysis(report_normal["return"] - report_normal["bench"])
-    analysis["excess_return_with_cost"] = risk_analysis(
-        report_normal["return"] - report_normal["bench"] - report_normal["cost"]
-    )
-    analysis_df = pd.concat(analysis)  # type: pd.DataFrame
-    print(analysis_df)
--- a/examples/workflow_by_code_gru.py
+++ b/examples/workflow_by_code_gru.py
@@ -1,144 +0,0 @@
-#  Copyright (c) Microsoft Corporation.
-#  Licensed under the MIT License.
-
-import sys
-from pathlib import Path
-
-import qlib
-import pandas as pd
-from qlib.config import REG_CN
-from qlib.contrib.model.pytorch_gru import GRU
-from qlib.contrib.data.handler import ALPHA360_Denoise
-from qlib.contrib.strategy.strategy import TopkDropoutStrategy
-from qlib.contrib.evaluate import (
-    backtest as normal_backtest,
-    risk_analysis,
-)
-from qlib.utils import exists_qlib_data
-
-# from qlib.model.learner import train_model
-from qlib.utils import init_instance_by_config
-
-import pickle
-
-if __name__ == "__main__":
-
-    # use default data
-    provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
-    if not exists_qlib_data(provider_uri):
-        print(f"Qlib data is not found in {provider_uri}")
-        sys.path.append(str(Path(__file__).resolve().parent.parent.joinpath("scripts")))
-        from get_data import GetData
-
-        GetData().qlib_data_cn(target_dir=provider_uri)
-
-    qlib.init(provider_uri=provider_uri, region=REG_CN)
-
-    MARKET = "csi300"
-    BENCHMARK = "SH000300"
-
-    ###################################
-    # train model
-    ###################################
-    DATA_HANDLER_CONFIG = {
-        "start_time": "2008-01-01",
-        "end_time": "2020-08-01",
-        "fit_start_time": "2008-01-01",
-        "fit_end_time": "2014-12-31",
-        "instruments": MARKET,
-    }
-
-    TRAINER_CONFIG = {
-        "train_start_time": "2008-01-01",
-        "train_end_time": "2014-12-31",
-        "validate_start_time": "2015-01-01",
-        "validate_end_time": "2016-12-31",
-        "test_start_time": "2017-01-01",
-        "test_end_time": "2020-08-01",
-    }
-
-    task = {
-        "model": {
-            "class": "GRU",
-            "module_path": "qlib.contrib.model.pytorch_gru",
-            "kwargs": {
-                "d_feat": 6,
-                "hidden_size": 64,
-                "num_layers": 2,
-                "dropout": 0.0,
-                "n_epochs": 200,
-                "lr": 1e-3,
-                "early_stop": 20,
-                "batch_size": 800,
-                "metric": "IC",
-                "loss": "mse",
-                "seed": 0,
-                "GPU": 0,
-            },
-        },
-        "dataset": {
-            "class": "DatasetH",
-            "module_path": "qlib.data.dataset",
-            "kwargs": {
-                "handler": {
-                    "class": "ALPHA360_Denoise",
-                    "module_path": "qlib.contrib.data.handler",
-                    "kwargs": DATA_HANDLER_CONFIG,
-                },
-                "segments": {
-                    "train": ("2008-01-01", "2014-12-31"),
-                    "valid": ("2015-01-01", "2016-12-31"),
-                    "test": ("2017-01-01", "2020-08-01"),
-                },
-            },
-        }
-        # You shoud record the data in specific sequence
-        # "record": ['SignalRecord', 'SigAnaRecord', 'PortAnaRecord'],
-    }
-
-    # model = train_model(task)
-    model = init_instance_by_config(task["model"])
-    dataset = init_instance_by_config(task["dataset"])
-    model.fit(dataset)
-
-    pred_score = model.predict(dataset)
-
-    # save pred_score to file
-    pred_score_path = Path("~/tmp/qlib/pred_score.pkl").expanduser()
-    pred_score_path.parent.mkdir(exist_ok=True, parents=True)
-    pred_score.to_pickle(pred_score_path)
-
-    ###################################
-    # backtest
-    ###################################
-    STRATEGY_CONFIG = {
-        "topk": 50,
-        "n_drop": 5,
-    }
-    BACKTEST_CONFIG = {
-        "verbose": False,
-        "limit_threshold": 0.095,
-        "account": 100000000,
-        "benchmark": BENCHMARK,
-        "deal_price": "close",
-        "open_cost": 0.0005,
-        "close_cost": 0.0015,
-        "min_cost": 5,
-    }
-
-    # use default strategy
-    # custom Strategy, refer to: TODO: Strategy API url
-    strategy = TopkDropoutStrategy(**STRATEGY_CONFIG)
-    report_normal, positions_normal = normal_backtest(pred_score, strategy=strategy, **BACKTEST_CONFIG)
-
-    ###################################
-    # analyze
-    # If need a more detailed analysis, refer to: examples/train_and_bakctest.ipynb
-    ###################################
-    analysis = dict()
-    analysis["excess_return_without_cost"] = risk_analysis(report_normal["return"] - report_normal["bench"])
-    analysis["excess_return_with_cost"] = risk_analysis(
-        report_normal["return"] - report_normal["bench"] - report_normal["cost"]
-    )
-    analysis_df = pd.concat(analysis)  # type: pd.DataFrame
-    print(analysis_df)
--- a/examples/workflow_by_code_lstm.py
+++ b/examples/workflow_by_code_lstm.py
@@ -1,144 +0,0 @@
-#  Copyright (c) Microsoft Corporation.
-#  Licensed under the MIT License.
-
-import sys
-from pathlib import Path
-
-import qlib
-import pandas as pd
-from qlib.config import REG_CN
-from qlib.contrib.model.pytorch_lstm import LSTM
-from qlib.contrib.data.handler import ALPHA360_Denoise
-from qlib.contrib.strategy.strategy import TopkDropoutStrategy
-from qlib.contrib.evaluate import (
-    backtest as normal_backtest,
-    risk_analysis,
-)
-from qlib.utils import exists_qlib_data
-
-# from qlib.model.learner import train_model
-from qlib.utils import init_instance_by_config
-
-import pickle
-
-if __name__ == "__main__":
-
-    # use default data
-    provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
-    if not exists_qlib_data(provider_uri):
-        print(f"Qlib data is not found in {provider_uri}")
-        sys.path.append(str(Path(__file__).resolve().parent.parent.joinpath("scripts")))
-        from get_data import GetData
-
-        GetData().qlib_data_cn(target_dir=provider_uri)
-
-    qlib.init(provider_uri=provider_uri, region=REG_CN)
-
-    MARKET = "csi300"
-    BENCHMARK = "SH000300"
-
-    ###################################
-    # train model
-    ###################################
-    DATA_HANDLER_CONFIG = {
-        "start_time": "2008-01-01",
-        "end_time": "2020-08-01",
-        "fit_start_time": "2008-01-01",
-        "fit_end_time": "2014-12-31",
-        "instruments": MARKET,
-    }
-
-    TRAINER_CONFIG = {
-        "train_start_time": "2008-01-01",
-        "train_end_time": "2014-12-31",
-        "validate_start_time": "2015-01-01",
-        "validate_end_time": "2016-12-31",
-        "test_start_time": "2017-01-01",
-        "test_end_time": "2020-08-01",
-    }
-
-    task = {
-        "model": {
-            "class": "LSTM",
-            "module_path": "qlib.contrib.model.pytorch_lstm",
-            "kwargs": {
-                "d_feat": 6,
-                "hidden_size": 64,
-                "num_layers": 2,
-                "dropout": 0.0,
-                "n_epochs": 200,
-                "lr": 1e-3,
-                "early_stop": 20,
-                "batch_size": 800,
-                "metric": "IC",
-                "loss": "mse",
-                "seed": 0,
-                "GPU": 0,
-            },
-        },
-        "dataset": {
-            "class": "DatasetH",
-            "module_path": "qlib.data.dataset",
-            "kwargs": {
-                "handler": {
-                    "class": "ALPHA360_Denoise",
-                    "module_path": "qlib.contrib.data.handler",
-                    "kwargs": DATA_HANDLER_CONFIG,
-                },
-                "segments": {
-                    "train": ("2008-01-01", "2014-12-31"),
-                    "valid": ("2015-01-01", "2016-12-31"),
-                    "test": ("2017-01-01", "2020-08-01"),
-                },
-            },
-        }
-        # You shoud record the data in specific sequence
-        # "record": ['SignalRecord', 'SigAnaRecord', 'PortAnaRecord'],
-    }
-
-    # model = train_model(task)
-    model = init_instance_by_config(task["model"])
-    dataset = init_instance_by_config(task["dataset"])
-    model.fit(dataset)
-
-    pred_score = model.predict(dataset)
-
-    # save pred_score to file
-    pred_score_path = Path("~/tmp/qlib/pred_score.pkl").expanduser()
-    pred_score_path.parent.mkdir(exist_ok=True, parents=True)
-    pred_score.to_pickle(pred_score_path)
-
-    ###################################
-    # backtest
-    ###################################
-    STRATEGY_CONFIG = {
-        "topk": 50,
-        "n_drop": 5,
-    }
-    BACKTEST_CONFIG = {
-        "verbose": False,
-        "limit_threshold": 0.095,
-        "account": 100000000,
-        "benchmark": BENCHMARK,
-        "deal_price": "close",
-        "open_cost": 0.0005,
-        "close_cost": 0.0015,
-        "min_cost": 5,
-    }
-
-    # use default strategy
-    # custom Strategy, refer to: TODO: Strategy API url
-    strategy = TopkDropoutStrategy(**STRATEGY_CONFIG)
-    report_normal, positions_normal = normal_backtest(pred_score, strategy=strategy, **BACKTEST_CONFIG)
-
-    ###################################
-    # analyze
-    # If need a more detailed analysis, refer to: examples/train_and_bakctest.ipynb
-    ###################################
-    analysis = dict()
-    analysis["excess_return_without_cost"] = risk_analysis(report_normal["return"] - report_normal["bench"])
-    analysis["excess_return_with_cost"] = risk_analysis(
-        report_normal["return"] - report_normal["bench"] - report_normal["cost"]
-    )
-    analysis_df = pd.concat(analysis)  # type: pd.DataFrame
-    print(analysis_df)
--- a/examples/workflow_by_code_xgboost.py
+++ b/examples/workflow_by_code_xgboost.py
@@ -1,142 +0,0 @@
-#  Copyright (c) Microsoft Corporation.
-#  Licensed under the MIT License.
-
-import sys
-from pathlib import Path
-
-import qlib
-import pandas as pd
-from qlib.config import REG_CN
-from qlib.contrib.model.xgboost import XGBModel
-from qlib.contrib.data.handler import Alpha158
-from qlib.contrib.strategy.strategy import TopkDropoutStrategy
-from qlib.contrib.evaluate import (
-    backtest as normal_backtest,
-    risk_analysis,
-)
-from qlib.utils import exists_qlib_data
-
-# from qlib.model.learner import train_model
-from qlib.utils import init_instance_by_config
-
-if __name__ == "__main__":
-
-    # use default data
-    provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
-    if not exists_qlib_data(provider_uri):
-        print(f"Qlib data is not found in {provider_uri}")
-        sys.path.append(str(Path(__file__).resolve().parent.parent.joinpath("scripts")))
-        from get_data import GetData
-
-        GetData().qlib_data_cn(target_dir=provider_uri)
-
-    qlib.init(provider_uri=provider_uri, region=REG_CN)
-
-    MARKET = "csi300"
-    BENCHMARK = "SH000300"
-
-    ###################################
-    # train model
-    ###################################
-    DATA_HANDLER_CONFIG = {
-        "start_time": "2008-01-01",
-        "end_time": "2020-08-01",
-        "fit_start_time": "2008-01-01",
-        "fit_end_time": "2014-12-31",
-        "instruments": MARKET,
-    }
-
-    TRAINER_CONFIG = {
-        "train_start_time": "2008-01-01",
-        "train_end_time": "2014-12-31",
-        "validate_start_time": "2015-01-01",
-        "validate_end_time": "2016-12-31",
-        "test_start_time": "2017-01-01",
-        "test_end_time": "2020-08-01",
-    }
-
-    task = {
-        "model": {
-            "class": "XGBModel",
-            "module_path": "qlib.contrib.model.xgboost",
-            "kwargs": {
-                "objective": "reg:linear",
-                "n_estimators": 5000,
-                "colsample_bytree": 0.85,
-                "learning_rate": 0.0421,
-                "subsample": 0.8789,
-                "max_depth": 8,
-                "num_leaves": 210,
-                "num_threads": 20,
-                "missing": -1,
-                "min_child_weight": 1,
-                "nthread": 4,
-                "tree_method": "hist",
-            },
-        },
-        "dataset": {
-            "class": "DatasetH",
-            "module_path": "qlib.data.dataset",
-            "kwargs": {
-                "handler": {
-                    "class": "Alpha158",
-                    "module_path": "qlib.contrib.data.handler",
-                    "kwargs": DATA_HANDLER_CONFIG,
-                },
-                "segments": {
-                    "train": ("2008-01-01", "2014-12-31"),
-                    "valid": ("2015-01-01", "2016-12-31"),
-                    "test": ("2017-01-01", "2020-08-01"),
-                },
-            },
-        }
-        # You shoud record the data in specific sequence
-        # "record": ['SignalRecord', 'SigAnaRecord', 'PortAnaRecord'],
-    }
-
-    # model = train_model(task)
-    model = init_instance_by_config(task["model"])
-    dataset = init_instance_by_config(task["dataset"])
-
-    model.fit(dataset)
-    pred_score = model.predict(dataset)
-
-    # save pred_score to file
-    pred_score_path = Path("~/tmp/qlib/pred_score.pkl").expanduser()
-    pred_score_path.parent.mkdir(exist_ok=True, parents=True)
-    pred_score.to_pickle(pred_score_path)
-
-    ###################################
-    # backtest
-    ###################################
-    STRATEGY_CONFIG = {
-        "topk": 50,
-        "n_drop": 5,
-    }
-    BACKTEST_CONFIG = {
-        "verbose": False,
-        "limit_threshold": 0.095,
-        "account": 100000000,
-        "benchmark": BENCHMARK,
-        "deal_price": "close",
-        "open_cost": 0.0005,
-        "close_cost": 0.0015,
-        "min_cost": 5,
-    }
-
-    # use default strategy
-    # custom Strategy, refer to: TODO: Strategy API url
-    strategy = TopkDropoutStrategy(**STRATEGY_CONFIG)
-    report_normal, positions_normal = normal_backtest(pred_score, strategy=strategy, **BACKTEST_CONFIG)
-
-    ###################################
-    # analyze
-    # If need a more detailed analysis, refer to: examples/train_and_bakctest.ipynb
-    ###################################
-    analysis = dict()
-    analysis["excess_return_without_cost"] = risk_analysis(report_normal["return"] - report_normal["bench"])
-    analysis["excess_return_with_cost"] = risk_analysis(
-        report_normal["return"] - report_normal["bench"] - report_normal["cost"]
-    )
-    analysis_df = pd.concat(analysis)  # type: pd.DataFrame
-    print(analysis_df)
--- a/qlib/contrib/model/pytorch_gats.py
+++ b/qlib/contrib/model/pytorch_gats.py
@@ -121,7 +121,11 @@ class GAT(Model):
        self._scorer = mean_squared_error if loss == "mse" else roc_auc_score

        self.GAT_model = GATModel(
-            d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, base_model=self.base_model
+            d_feat=self.d_feat,
+            hidden_size=self.hidden_size,
+            num_layers=self.num_layers,
+            dropout=self.dropout,
+            base_model=self.base_model,
        )
        if optimizer.lower() == "adam":
            self.train_optimizer = optim.Adam(self.GAT_model.parameters(), lr=self.lr)
@@ -321,11 +325,10 @@ class GAT(Model):


 class GATModel(nn.Module):
-    
-    def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, base_model='GRU'):
+    def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, base_model="GRU"):
        super().__init__()

-        if base_model == 'GRU':
+        if base_model == "GRU":
            self.rnn = nn.GRU(
                input_size=d_feat,
                hidden_size=hidden_size,
@@ -333,7 +336,7 @@ class GATModel(nn.Module):
                batch_first=True,
                dropout=dropout,
            )
-        elif base_model == 'LSTM':
+        elif base_model == "LSTM":
            self.rnn = nn.LSTM(
                input_size=d_feat,
                hidden_size=hidden_size,
@@ -342,7 +345,7 @@ class GATModel(nn.Module):
                dropout=dropout,
            )
        else:
-            raise ValueError('unknown base model name `%s`'%base_model) 
+            raise ValueError("unknown base model name `%s`" % base_model)

        self.hidden_size = hidden_size
        self.bn1 = nn.BatchNorm1d(num_features=hidden_size, track_running_stats=False)
@@ -354,19 +357,19 @@ class GATModel(nn.Module):

        self.d_feat = d_feat

-    def cal_convariance(self, x, y): # the 2nd dimension of x and y are the same
-        e_x = torch.mean(x, dim = 1).reshape(-1, 1)
-        e_y = torch.mean(y, dim = 1).reshape(-1, 1)
+    def cal_convariance(self, x, y):  # the 2nd dimension of x and y are the same
+        e_x = torch.mean(x, dim=1).reshape(-1, 1)
+        e_y = torch.mean(y, dim=1).reshape(-1, 1)
        e_x_e_y = e_x.mm(torch.t(e_y))
        x_extend = x.reshape(x.shape[0], 1, x.shape[1]).repeat(1, y.shape[0], 1)
        y_extend = y.reshape(1, y.shape[0], y.shape[1]).repeat(x.shape[0], 1, 1)
-        e_xy = torch.mean(x_extend*y_extend, dim = 2)
+        e_xy = torch.mean(x_extend * y_extend, dim=2)
        return e_xy - e_x_e_y

    def forward(self, x):
        # x: [N, F*T]
-        x = x.reshape(len(x), self.d_feat, -1) # [N, F, T]
-        x = x.permute(0, 2, 1) # [N, T, F]
+        x = x.reshape(len(x), self.d_feat, -1)  # [N, F, T]
+        x = x.permute(0, 2, 1)  # [N, T, F]
        out, _ = self.rnn(x)
        hidden = out[:, -1, :]
        hidden = self.bn1(hidden)
@@ -380,4 +383,4 @@ class GATModel(nn.Module):
        output = self.fc(output)
        output = self.bn2(output)
        output = self.leaky_relu(output)
-        return self.fc_out(output).squeeze()
+        return self.fc_out(output).squeeze()
--- a/qlib/contrib/model/pytorch_lstm.py
+++ b/qlib/contrib/model/pytorch_lstm.py
@@ -317,7 +317,6 @@ class LSTM(Model):


 class LSTMModel(nn.Module):
-
    def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0):
        super().__init__()

@@ -334,7 +333,7 @@ class LSTMModel(nn.Module):

    def forward(self, x):
        # x: [N, F*T]
-        x = x.reshape(len(x), self.d_feat, -1) # [N, F, T]
-        x = x.permute(0, 2, 1) # [N, T, F]
+        x = x.reshape(len(x), self.d_feat, -1)  # [N, F, T]
+        x = x.permute(0, 2, 1)  # [N, T, F]
        out, _ = self.rnn(x)
-        return self.fc_out(out[:, -1, :]).squeeze()
+        return self.fc_out(out[:, -1, :]).squeeze()
--- a/qlib/contrib/report/analysis_position/report.py
+++ b/qlib/contrib/report/analysis_position/report.py
@@ -75,11 +75,12 @@ def _report_figure(df: pd.DataFrame) -> [list, tuple]:
    max_start_date, max_end_date = _calculate_maximum(report_df)
    ex_max_start_date, ex_max_end_date = _calculate_maximum(report_df, True)

+    index_name = report_df.index.name
    _temp_df = report_df.reset_index()
    _temp_df.loc[-1] = 0
    _temp_df = _temp_df.shift(1)
-    _temp_df.loc[0, "index"] = "T0"
-    _temp_df.set_index("index", inplace=True)
+    _temp_df.loc[0, index_name] = "T0"
+    _temp_df.set_index(index_name, inplace=True)
    _temp_df.iloc[0] = 0
    report_df = _temp_df

--- a/qlib/contrib/report/graph.py
+++ b/qlib/contrib/report/graph.py
@@ -11,7 +11,7 @@ import pandas as pd
 import plotly.offline as py
 import plotly.graph_objs as go

-from plotly.tools import make_subplots
+from plotly.subplots import make_subplots
 from plotly.figure_factory import create_distplot

 from ...utils import get_module_by_module_path
@@ -357,7 +357,7 @@ class SubplotsGraph(object):
            #     _item.pop('yaxis', None)

            for _g_obj in _graph_data:
-                self._figure.append_trace(_g_obj, row=row, col=col)
+                self._figure.add_trace(_g_obj, row=row, col=col)

        if self._sub_graph_layout is not None:
            for k, v in self._sub_graph_layout.items():
--- a/qlib/workflow/cli.py
+++ b/qlib/workflow/cli.py
@@ -6,8 +6,8 @@ from pathlib import Path

 import qlib
 import fire
-import yaml
 import pandas as pd
+import ruamel.yaml as yaml
 from qlib.config import REG_CN
 from qlib.utils import init_instance_by_config
 from qlib.workflow import R
@@ -16,7 +16,7 @@ from qlib.workflow.record_temp import SignalRecord
 # worflow handler function
 def workflow(config_path):
    with open(config_path) as fp:
-        config = yaml.load(fp, Loader=yaml.FullLoader)
+        config = yaml.load(fp, Loader=yaml.Loader)

    provider_uri = config.get("provider_uri")
    qlib.init(provider_uri=provider_uri, region=REG_CN)
@@ -26,7 +26,8 @@ def workflow(config_path):
    dataset = init_instance_by_config(config.get("task")["dataset"])

    # start exp
-    with R.start("workflow"):
+    with R.start(experiment_name="workflow"):
+        R.log_paramters(**flatten_dict(task))
        model.fit(dataset)
        recorder = R.get_recorder()

--- a/qlib/workflow/record_temp.py
+++ b/qlib/workflow/record_temp.py
@@ -1,6 +1,7 @@
 #  Copyright (c) Microsoft Corporation.
 #  Licensed under the MIT License.

+import re
 import pandas as pd
 from pathlib import Path
 from pprint import pprint
@@ -37,12 +38,14 @@ class RecordTemp:
        """
        raise NotImplementedError(f"Please implement the `generate` method.")

-    def load(self, **kwargs):
+    def load(self, name, **kwargs):
        """
        Load the stored records.

        Parameters
        ----------
+        name : str
+            the name for the file to be load.
        kwargs

        Return
@@ -51,6 +54,16 @@ class RecordTemp:
        """
        raise NotImplementedError(f"Please implement the `load` method.")

+    def list(self):
+        """
+        List the stored records.
+
+        Return
+        ------
+        A list of all the stored records.
+        """
+        raise NotImplementedError(f"Please implement the `list` method.")
+
    def check(self, **kwargs):
        """
        Check if the records is properly generated and saved.
@@ -81,6 +94,8 @@ class SignalRecord(RecordTemp):
    def generate(self, **kwargs):
        # generate prediciton
        pred = self.model.predict(self.dataset)
+        if isinstance(pred, pd.Series):
+            pred = pred.to_frame("score")
        self.recorder.save_objects(**{"pred.pkl": pred})
        logger.info(
            f"Signal record 'pred.pkl' has been saved as the artifact of the Experiment {self.recorder.experiment_id}"
@@ -89,11 +104,14 @@ class SignalRecord(RecordTemp):
        pprint(f"The following are prediction results of the {type(self.model).__name__} model.")
        pprint(pred.head(5))

-    def load(self):
+    def load(self, name="pred.pkl"):
        # try to load the saved object
-        pred = self.recorder.load_object("pred.pkl")
+        pred = self.recorder.load_object(name)
        return pred

+    def list(self):
+        return ["pred.pkl"]
+
    def check(self, **kwargs):
        artifacts = self.recorder.list_artifacts()
        for artifact in artifacts:
@@ -165,10 +183,20 @@ class PortAnaRecord(SignalRecord):
        pprint("The following are analysis results of the excess return with cost.")
        pprint(analysis["excess_return_with_cost"])

-    def load(self):
+    def load(self, name):
        # try to load the saved object
-        pred = self.recorder.load_object(self.artifact_path / "port_analysis.pkl")
-        return pred
+        if self.artifact_path not in name:
+            file_name = re.split(r" |/|\\", name)[-1]
+            name = f"{self.artifact_path}/{file_name}"
+        result = self.recorder.load_object(name)
+        return result
+
+    def list(self):
+        return [
+            f"{self.artifact_path}/report_normal.pkl",
+            f"{self.artifact_path}/positions_normal.pkl",
+            f"{self.artifact_path}/port_analysis.pkl",
+        ]

    def check(self):
        artifacts = self.recorder.list_artifacts(self.artifact_path)