Update benchmark based on new backtest (#634)

* free random seed * update model baselines * more robust for parameters
2026-07-21 11:17:34 +08:00 · 2021-10-07 22:57:19 +08:00
parent 8c8d1336de
commit e99224e5c2
12 changed files with 229 additions and 199 deletions
--- a/README.md
+++ b/README.md
@@ -306,7 +306,7 @@ All the models listed above are runnable with ``Qlib``. Users can find the confi
 - Users can use the tool `qrun` mentioned above to run a model's workflow based from a config file.
 - Users can create a `workflow_by_code` python script based on the [one](examples/workflow_by_code.py) listed in the `examples` folder.

- Users can use the script [`run_all_model.py`](examples/run_all_model.py) listed in the `examples` folder to run a model. Here is an example of the specific shell command to be used: `python run_all_model.py --models=lightgbm`, where the `--models` arguments can take any number of models listed above(the available models can be found  in [benchmarks](examples/benchmarks/)). For more use cases, please refer to the file's [docstrings](examples/run_all_model.py).
+- Users can use the script [`run_all_model.py`](examples/run_all_model.py) listed in the `examples` folder to run a model. Here is an example of the specific shell command to be used: `python run_all_model.py run --models=lightgbm`, where the `--models` arguments can take any number of models listed above(the available models can be found  in [benchmarks](examples/benchmarks/)). For more use cases, please refer to the file's [docstrings](examples/run_all_model.py).
    - **NOTE**: Each baseline has different environment dependencies, please make sure that your python version aligns with the requirements(e.g. TFT only supports Python 3.6~3.7 due to the limitation of `tensorflow==1.15.0`)

 ## Run multiple models
@@ -316,7 +316,7 @@ The script will create a unique virtual environment for each model, and delete t

 Here is an example of running all the models for 10 iterations:
 ```python
-python run_all_model.py 10
+python run_all_model.py run 10
 ```

 It also provides the API to run specific models at once. For more use cases, please refer to the file's [docstrings](examples/run_all_model.py). 
--- a/examples/benchmarks/README.md
+++ b/examples/benchmarks/README.md
@@ -8,44 +8,48 @@ The numbers shown below demonstrate the performance of the entire `workflow` of
 >
 > In the new version of qlib, the default dataset is **v2**. Since the data is collected from the YahooFinance API (which is not very stable), the results of *v2* and *v1* may differ

-## Alpha360 dataset
-| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown |
-|---|---|---|---|---|---|---|---|---|
-| Linear | Alpha360 | 0.0150±0.00 | 0.1049±0.00| 0.0284±0.00 | 0.1970±0.00 | -0.0659±0.00 | -0.7072±0.00| -0.2955±0.00 |
-| CatBoost (Liudmila Prokhorenkova, et al.) | Alpha360 | 0.0397±0.00 | 0.2878±0.00| 0.0470±0.00 | 0.3703±0.00 | 0.0342±0.00 | 0.4092±0.00| -0.1057±0.00 |
-| XGBoost (Tianqi Chen, et al.) | Alpha360 | 0.0400±0.00 | 0.3031±0.00| 0.0461±0.00 | 0.3862±0.00 | 0.0528±0.00 | 0.6307±0.00| -0.1113±0.00 |
-| LightGBM (Guolin Ke, et al.) | Alpha360 | 0.0399±0.00 | 0.3075±0.00| 0.0492±0.00 | 0.4019±0.00 | 0.0323±0.00 | 0.4370±0.00| -0.0917±0.00 |
-| MLP | Alpha360 | 0.0285±0.00 | 0.1981±0.02| 0.0402±0.00 | 0.2993±0.02 | 0.0073±0.02 | 0.0880±0.22| -0.1446±0.03 |
-| GRU (Kyunghyun Cho, et al.) | Alpha360 | 0.0490±0.01 | 0.3787±0.05| 0.0581±0.00 | 0.4664±0.04 | 0.0726±0.02 | 0.9817±0.34| -0.0902±0.03 |
-| LSTM (Sepp Hochreiter, et al.) | Alpha360 | 0.0443±0.01 | 0.3401±0.05| 0.0536±0.01 | 0.4248±0.05 | 0.0627±0.03 | 0.8441±0.48| -0.0882±0.03 |
-| ALSTM (Yao Qin, et al.) | Alpha360 | 0.0493±0.01 | 0.3778±0.06| 0.0585±0.00 | 0.4606±0.04 | 0.0513±0.03 | 0.6727±0.38| -0.1085±0.02 |
-| GATs (Petar Velickovic, et al.) | Alpha360 | 0.0475±0.00 | 0.3515±0.02| 0.0592±0.00 | 0.4585±0.01 | 0.0876±0.02 | 1.1513±0.27| -0.0795±0.02 |
-| DoubleEnsemble (Chuheng Zhang, et al.) | Alpha360 | 0.0407±0.00| 0.3053±0.00 | 0.0490±0.00 | 0.3840±0.00 | 0.0380±0.02 | 0.5000±0.21 | -0.0984±0.02 |
-| TabNet (Sercan O. Arik, et al.)| Alpha360 | 0.0192±0.00 | 0.1401±0.00| 0.0291±0.00 | 0.2163±0.00 | -0.0258±0.00 | -0.2961±0.00| -0.1429±0.00 |
-| TCTS (Xueqing Wu, et al.)| Alpha360 | 0.0485±0.00 | 0.3689±0.04| 0.0586±0.00 | 0.4669±0.02 | 0.0816±0.02 | 1.1572±0.30| -0.0689±0.02 |
-| Transformer (Ashish Vaswani, et al.)| Alpha360 | 0.0141±0.00 | 0.0917±0.02| 0.0331±0.00 | 0.2357±0.03 | -0.0259±0.03 | -0.3323±0.43| -0.1763±0.07 |
-| Localformer (Juyong Jiang, et al.)| Alpha360 | 0.0408±0.00 | 0.2988±0.03| 0.0538±0.00 | 0.4105±0.02 | 0.0275±0.03 | 0.3464±0.37| -0.1182±0.03 |
-| TRA (Hengxu Lin, et al.)| Alpha360 | 0.0491±0.01 | 0.3868±0.06 | 0.0589±0.00 | 0.4802±0.04 | 0.0898±0.02 | 1.2490±0.32 | -0.0778±0.02 |
-
 ## Alpha158 dataset
-| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown |
-|---|---|---|---|---|---|---|---|---|
-| Linear | Alpha158 | 0.0393±0.00 | 0.2980±0.00| 0.0475±0.00 | 0.3546±0.00 | 0.0795±0.00 | 1.0712±0.00| -0.1449±0.00 |
-| CatBoost (Liudmila Prokhorenkova, et al.) | Alpha158 | 0.0503±0.00 | 0.3586±0.00| 0.0483±0.00 | 0.3667±0.00 | 0.1080±0.00 | 1.1561±0.00| -0.0787±0.00 |
-| XGBoost (Tianqi Chen, et al.) | Alpha158 | 0.0481±0.00 | 0.3659±0.00| 0.0495±0.00 | 0.4033±0.00 | 0.1111±0.00 | 1.2915±0.00| -0.0893±0.00 |
-| LightGBM (Guolin Ke, et al.) | Alpha158 | 0.0475±0.00 | 0.3979±0.00| 0.0485±0.00 | 0.4123±0.00 | 0.1143±0.00 | 1.2744±0.00| -0.0800±0.00 |
-| MLP | Alpha158 | 0.0358±0.00 | 0.2738±0.03| 0.0425±0.00 | 0.3221±0.01 | 0.0836±0.02 | 1.0323±0.25| -0.1127±0.02 |
-| TFT (Bryan Lim, et al.) | Alpha158 (with selected 20 features) | 0.0343±0.00 | 0.2071±0.02| 0.0107±0.00 | 0.0660±0.02 | 0.0623±0.02 | 0.5818±0.20| -0.1762±0.01 |
-| GRU (Kyunghyun Cho, et al.) | Alpha158 (with selected 20 features) | 0.0311±0.00 | 0.2418±0.04| 0.0425±0.00 | 0.3434±0.02 | 0.0330±0.02 | 0.4805±0.30| -0.1021±0.02 |
-| LSTM (Sepp Hochreiter, et al.) | Alpha158 (with selected 20 features) | 0.0312±0.00 | 0.2394±0.04| 0.0418±0.00 | 0.3324±0.03 | 0.0298±0.02 | 0.4198±0.33| -0.1348±0.03 |
-| ALSTM (Yao Qin, et al.) | Alpha158 (with selected 20 features) | 0.0385±0.01 | 0.3022±0.06| 0.0478±0.00 | 0.3874±0.04 | 0.0486±0.03 | 0.7141±0.45| -0.1088±0.03 |
-| GATs (Petar Velickovic, et al.) | Alpha158 (with selected 20 features) | 0.0349±0.00 | 0.2511±0.01| 0.0457±0.00 | 0.3537±0.01 | 0.0578±0.02 | 0.8221±0.25| -0.0824±0.02 |
-| DoubleEnsemble (Chuheng Zhang, et al.) | Alpha158 | 0.0544±0.00 | 0.4338±0.01 | 0.0523±0.00 | 0.4257±0.01 | 0.1253±0.01 | 1.4105±0.14 | -0.0902±0.01 |
-| TabNet (Sercan O. Arik, et al.)| Alpha158 | 0.0383±0.00 | 0.3414±0.00| 0.0388±0.00 | 0.3460±0.00 | 0.0226±0.00 | 0.2652±0.00| -0.1072±0.00 |
-| Transformer (Ashish Vaswani, et al.)| Alpha158 | 0.0274±0.00 | 0.2166±0.04| 0.0409±0.00 | 0.3342±0.04 | 0.0204±0.03 | 0.2888±0.40| -0.1216±0.04 |
-| Localformer (Juyong Jiang, et al.)| Alpha158 | 0.0355±0.00 | 0.2747±0.04| 0.0466±0.00 | 0.3762±0.03 | 0.0506±0.02 | 0.7447±0.34| -0.0875±0.02 |
-| TRA (Hengxu Lin, et al.)| Alpha158 (with selected 20 features)| 0.0409±0.00 | 0.3253±0.04 | 0.0488±0.00 | 0.4045±0.02 | 0.0673±0.02 | 1.0389±0.39 | -0.0830±0.02 |
-| TRA (Hengxu Lin, et al.)| Alpha158 | 0.0442±0.00 | 0.3426±0.03 | 0.0555±0.00 | 0.4395±0.03 | 0.0833±0.03 | 1.2064±0.36 | -0.0849±0.02 |
+
+| Model Name                               | Dataset                             | IC          | ICIR        | Rank IC     | Rank ICIR   | Annualized Return | Information Ratio | Max Drawdown |
+|------------------------------------------|-------------------------------------|-------------|-------------|-------------|-------------|-------------------|-------------------|--------------|
+| TabNet(Sercan O. Arik, et al.)           | Alpha158                            | 0.0204±0.01 | 0.1554±0.07 | 0.0333±0.00 | 0.2552±0.05 | 0.0227±0.04       | 0.3676±0.54       | -0.1089±0.08 |
+| Transformer(Ashish Vaswani, et al.)      | Alpha158                            | 0.0264±0.00 | 0.2053±0.02 | 0.0407±0.00 | 0.3273±0.02 | 0.0273±0.02       | 0.3970±0.26       | -0.1101±0.02 |
+| GRU(Kyunghyun Cho, et al.)               | Alpha158(with selected 20 features) | 0.0315±0.00 | 0.2450±0.04 | 0.0428±0.00 | 0.3440±0.03 | 0.0344±0.02       | 0.5160±0.25       | -0.1017±0.02 |
+| LSTM(Sepp Hochreiter, et al.)            | Alpha158(with selected 20 features) | 0.0318±0.00 | 0.2367±0.04 | 0.0435±0.00 | 0.3389±0.03 | 0.0381±0.03       | 0.5561±0.46       | -0.1207±0.04 |
+| Localformer(Juyong Jiang, et al.)        | Alpha158                            | 0.0356±0.00 | 0.2756±0.03 | 0.0468±0.00 | 0.3784±0.03 | 0.0438±0.02       | 0.6600±0.33       | -0.0952±0.02 |
+| SFM(Liheng Zhang, et al.)                | Alpha158                            | 0.0379±0.00 | 0.2959±0.04 | 0.0464±0.00 | 0.3825±0.04 | 0.0465±0.02       | 0.5672±0.29       | -0.1282±0.03 |
+| ALSTM (Yao Qin, et al.)                  | Alpha158(with selected 20 features) | 0.0362±0.01 | 0.2789±0.06 | 0.0463±0.01 | 0.3661±0.05 | 0.0470±0.03       | 0.6992±0.47       | -0.1072±0.03 |
+| GATs (Petar Velickovic, et al.)          | Alpha158(with selected 20 features) | 0.0349±0.00 | 0.2511±0.01 | 0.0462±0.00 | 0.3564±0.01 | 0.0497±0.01       | 0.7338±0.19       | -0.0777±0.02 |
+| TRA(Hengxu Lin, et al.)                  | Alpha158(with selected 20 features) | 0.0404±0.00 | 0.3197±0.05 | 0.0490±0.00 | 0.4047±0.04 | 0.0649±0.02       | 1.0091±0.30       | -0.0860±0.02 |
+| Linear                                   | Alpha158                            | 0.0397±0.00 | 0.3000±0.00 | 0.0472±0.00 | 0.3531±0.00 | 0.0692±0.00       | 0.9209±0.00       | -0.1509±0.00 |
+| TRA(Hengxu Lin, et al.)                  | Alpha158                            | 0.0440±0.00 | 0.3535±0.05 | 0.0540±0.00 | 0.4451±0.03 | 0.0718±0.02       | 1.0835±0.35       | -0.0760±0.02 |
+| CatBoost(Liudmila Prokhorenkova, et al.) | Alpha158                            | 0.0481±0.00 | 0.3366±0.00 | 0.0454±0.00 | 0.3311±0.00 | 0.0765±0.00       | 0.8032±0.01       | -0.1092±0.00 |
+| XGBoost(Tianqi Chen, et al.)             | Alpha158                            | 0.0498±0.00 | 0.3779±0.00 | 0.0505±0.00 | 0.4131±0.00 | 0.0780±0.00       | 0.9070±0.00       | -0.1168±0.00 |
+| TFT (Bryan Lim, et al.)                  | Alpha158(with selected 20 features) | 0.0358±0.00 | 0.2160±0.03 | 0.0116±0.01 | 0.0720±0.03 | 0.0847±0.02       | 0.8131±0.19       | -0.1824±0.03 |
+| MLP                                      | Alpha158                            | 0.0376±0.00 | 0.2846±0.02 | 0.0429±0.00 | 0.3220±0.01 | 0.0895±0.02       | 1.1408±0.23       | -0.1103±0.02 |
+| LightGBM(Guolin Ke, et al.)              | Alpha158                            | 0.0448±0.00 | 0.3660±0.00 | 0.0469±0.00 | 0.3877±0.00 | 0.0901±0.00       | 1.0164±0.00       | -0.1038±0.00 |
+| DoubleEnsemble(Chuheng Zhang, et al.)    | Alpha158                            | 0.0544±0.00 | 0.4340±0.00 | 0.0523±0.00 | 0.4284±0.01 | 0.1168±0.01       | 1.3384±0.12       | -0.1036±0.01 |
+
+
+
+## Alpha360 dataset
+
+| Model Name                                | Dataset  | IC          | ICIR        | Rank IC     | Rank ICIR   | Annualized Return | Information Ratio | Max Drawdown |
+|-------------------------------------------|----------|-------------|-------------|-------------|-------------|-------------------|-------------------|--------------|
+| Transformer(Ashish Vaswani, et al.)       | Alpha360 | 0.0114±0.00 | 0.0716±0.03 | 0.0327±0.00 | 0.2248±0.02 | -0.0270±0.03      | -0.3378±0.37      | -0.1653±0.05 |
+| TabNet(Sercan O. Arik, et al.)            | Alpha360 | 0.0099±0.00 | 0.0593±0.00 | 0.0290±0.00 | 0.1887±0.00 | -0.0369±0.00      | -0.3892±0.00      | -0.2145±0.00 |
+| MLP                                       | Alpha360 | 0.0273±0.00 | 0.1870±0.02 | 0.0396±0.00 | 0.2910±0.02 | 0.0029±0.02       | 0.0274±0.23       | -0.1385±0.03 |
+| Localformer(Juyong Jiang, et al.)         | Alpha360 | 0.0404±0.00 | 0.2932±0.04 | 0.0542±0.00 | 0.4110±0.03 | 0.0246±0.02       | 0.3211±0.21       | -0.1095±0.02 |
+| CatBoost((Liudmila Prokhorenkova, et al.) | Alpha360 | 0.0378±0.00 | 0.2714±0.00 | 0.0467±0.00 | 0.3659±0.00 | 0.0292±0.00       | 0.3781±0.00       | -0.0862±0.00 |
+| XGBoost(Tianqi Chen, et al.)              | Alpha360 | 0.0394±0.00 | 0.2909±0.00 | 0.0448±0.00 | 0.3679±0.00 | 0.0344±0.00       | 0.4527±0.02       | -0.1004±0.00 |
+| DoubleEnsemble(Chuheng Zhang, et al.)     | Alpha360 | 0.0404±0.00 | 0.3023±0.00 | 0.0495±0.00 | 0.3898±0.00 | 0.0468±0.01       | 0.6302±0.20       | -0.0860±0.01 |
+| LightGBM(Guolin Ke, et al.)               | Alpha360 | 0.0400±0.00 | 0.3037±0.00 | 0.0499±0.00 | 0.4042±0.00 | 0.0558±0.00       | 0.7632±0.00       | -0.0659±0.00 |
+| ALSTM (Yao Qin, et al.)                   | Alpha360 | 0.0497±0.00 | 0.3829±0.04 | 0.0599±0.00 | 0.4736±0.03 | 0.0626±0.02       | 0.8651±0.31       | -0.0994±0.03 |
+| LSTM(Sepp Hochreiter, et al.)             | Alpha360 | 0.0448±0.00 | 0.3474±0.04 | 0.0549±0.00 | 0.4366±0.03 | 0.0647±0.03       | 0.8963±0.39       | -0.0875±0.02 |
+| GRU(Kyunghyun Cho, et al.)                | Alpha360 | 0.0493±0.00 | 0.3772±0.04 | 0.0584±0.00 | 0.4638±0.03 | 0.0720±0.02       | 0.9730±0.33       | -0.0821±0.02 |
+| TCTS(Xueqing Wu, et al.)                  | Alpha360 | 0.0454±0.01 | 0.3457±0.06 | 0.0566±0.01 | 0.4492±0.05 | 0.0744±0.03       | 1.0594±0.41       | -0.0761±0.03 |
+| GATs (Petar Velickovic, et al.)           | Alpha360 | 0.0476±0.00 | 0.3508±0.02 | 0.0598±0.00 | 0.4604±0.01 | 0.0824±0.02       | 1.1079±0.26       | -0.0894±0.03 |
+| TRA(Hengxu Lin, et al.)                   | Alpha360 | 0.0485±0.00 | 0.3787±0.03 | 0.0587±0.00 | 0.4756±0.03 | 0.0920±0.03       | 1.2789±0.42       | -0.0834±0.02 |

 - The selected 20 features are based on the feature importance of a lightgbm-based model.
 - The base model of DoubleEnsemble is LGBM.
--- a/examples/benchmarks/TCTS/workflow_config_tcts_Alpha360.yaml
+++ b/examples/benchmarks/TCTS/workflow_config_tcts_Alpha360.yaml
@@ -69,6 +69,7 @@ task:
            steps: 3
            target_label: 1
            lowest_valid_performance: 0.993
+            seed: 0
    dataset:
        class: DatasetH
        module_path: qlib.data.dataset
--- a/examples/benchmarks/TFT/data_formatters/qlib_Alpha158.py
+++ b/examples/benchmarks/TFT/data_formatters/qlib_Alpha158.py
@@ -195,7 +195,8 @@ class Alpha158Formatter(GenericDataFormatter):

        for col in column_names:
            if col not in {"forecast_time", "identifier"}:
-                output[col] = self._target_scaler.inverse_transform(predictions[col])
+                # Using [col] is for aligning with the format when fitting
+                output[col] = self._target_scaler.inverse_transform(predictions[[col]])

        return output

--- a/examples/benchmarks/TFT/tft.py
+++ b/examples/benchmarks/TFT/tft.py
@@ -311,5 +311,11 @@ class TFTModel(ModelFT):
        # self.model.save(path)

        # save qlib model wrapper
-        self.model = None
+        drop_attrs = ["model", "tf_graph", "sess", "data_formatter"]
+        orig_attr = {}
+        for attr in drop_attrs:
+            orig_attr[attr] = getattr(self, attr)
+            setattr(self, attr, None)
        super(TFTModel, self).to_pickle(path)
+        for attr in drop_attrs:
+            setattr(self, attr, orig_attr[attr])
--- a/examples/benchmarks/TRA/src/model.py
+++ b/examples/benchmarks/TRA/src/model.py
@@ -38,7 +38,7 @@ class TRAModel(Model):
        model_init_state=None,
        lamb=0.0,
        rho=0.99,
-        seed=0,
+        seed=None,
        logdir=None,
        eval_train=True,
        eval_test=False,
--- a/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha158.yaml
+++ b/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha158.yaml
@@ -50,6 +50,7 @@ task:
        kwargs:
            d_feat: 158
            pretrain: True
+            seed: 993
    dataset:
        class: DatasetH
        module_path: qlib.data.dataset
--- a/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha360.yaml
+++ b/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha360.yaml
@@ -50,6 +50,7 @@ task:
        kwargs:
            d_feat: 360
            pretrain: True
+            seed: 993
    dataset:
        class: DatasetH
        module_path: qlib.data.dataset
--- a/examples/run_all_model.py
+++ b/examples/run_all_model.py
@@ -151,6 +151,9 @@ def get_all_results(folders) -> dict:
            if recorders[recorder_id].status == "FINISHED":
                recorder = R.get_recorder(recorder_id=recorder_id, experiment_name=fn)
                metrics = recorder.list_metrics()
+                if "1day.excess_return_with_cost.annualized_return" not in metrics:
+                    print(f"{recorder_id} is skipped due to incomplete result")
+                    continue
                result["annualized_return_with_cost"].append(metrics["1day.excess_return_with_cost.annualized_return"])
                result["information_ratio_with_cost"].append(metrics["1day.excess_return_with_cost.information_ratio"])
                result["max_drawdown_with_cost"].append(metrics["1day.excess_return_with_cost.max_drawdown"])
@@ -200,174 +203,183 @@ def gen_yaml_file_without_seed_kwargs(yaml_path, temp_dir):
        return temp_path


-# function to run the all the models
-@only_allow_defined_args
-def run(
-    times=1,
-    models=None,
-    dataset="Alpha360",
-    exclude=False,
-    qlib_uri: str = "git+https://github.com/microsoft/qlib#egg=pyqlib",
-    exp_folder_name: str = "run_all_model_records",
-    wait_before_rm_env: bool = False,
-    wait_when_err: bool = False,
-):
-    """
-    Please be aware that this function can only work under Linux. MacOS and Windows will be supported in the future.
-    Any PR to enhance this method is highly welcomed. Besides, this script doesn't support parallel running the same model
-    for multiple times, and this will be fixed in the future development.
+class ModelRunner:
+    def _init_qlib(self, exp_folder_name):
+        # init qlib
+        GetData().qlib_data(exists_skip=True)
+        qlib.init(
+            exp_manager={
+                "class": "MLflowExpManager",
+                "module_path": "qlib.workflow.expm",
+                "kwargs": {
+                    "uri": "file:" + str(Path(os.getcwd()).resolve() / exp_folder_name),
+                    "default_exp_name": "Experiment",
+                },
+            }
+        )

-    Parameters:
-    -----------
-    times : int
-        determines how many times the model should be running.
-    models : str or list
-        determines the specific model or list of models to run or exclude.
-    exclude : boolean
-        determines whether the model being used is excluded or included.
-    dataset : str
-        determines the dataset to be used for each model.
-    qlib_uri : str
-        the uri to install qlib with pip
-        it could be url on the we or local path
-    exp_folder_name: str
-        the name of the experiment folder
-    wait_before_rm_env : bool
-        wait before remove environment.
-    wait_when_err : bool
-        wait when errors raised when executing commands
+    # function to run the all the models
+    @only_allow_defined_args
+    def run(
+        self,
+        times=1,
+        models=None,
+        dataset="Alpha360",
+        exclude=False,
+        qlib_uri: str = "git+https://github.com/microsoft/qlib#egg=pyqlib",
+        exp_folder_name: str = "run_all_model_records",
+        wait_before_rm_env: bool = False,
+        wait_when_err: bool = False,
+    ):
+        """
+        Please be aware that this function can only work under Linux. MacOS and Windows will be supported in the future.
+        Any PR to enhance this method is highly welcomed. Besides, this script doesn't support parallel running the same model
+        for multiple times, and this will be fixed in the future development.

-    Usage:
-    -------
-    Here are some use cases of the function in the bash:
+        Parameters:
+        -----------
+        times : int
+            determines how many times the model should be running.
+        models : str or list
+            determines the specific model or list of models to run or exclude.
+        exclude : boolean
+            determines whether the model being used is excluded or included.
+        dataset : str
+            determines the dataset to be used for each model.
+        qlib_uri : str
+            the uri to install qlib with pip
+            it could be url on the we or local path
+        exp_folder_name: str
+            the name of the experiment folder
+        wait_before_rm_env : bool
+            wait before remove environment.
+        wait_when_err : bool
+            wait when errors raised when executing commands

-    .. code-block:: bash
+        Usage:
+        -------
+        Here are some use cases of the function in the bash:

-        # Case 1 - run all models multiple times
-        python run_all_model.py 3
+        .. code-block:: bash

-        # Case 2 - run specific models multiple times
-        python run_all_model.py 3 mlp
+            # Case 1 - run all models multiple times
+            python run_all_model.py run 3

-        # Case 3 - run specific models multiple times with specific dataset
-        python run_all_model.py 3 mlp Alpha158
+            # Case 2 - run specific models multiple times
+            python run_all_model.py run 3 mlp

-        # Case 4 - run other models except those are given as arguments for multiple times
-        python run_all_model.py 3 [mlp,tft,lstm] --exclude=True
+            # Case 3 - run specific models multiple times with specific dataset
+            python run_all_model.py run 3 mlp Alpha158

-        # Case 5 - run specific models for one time
-        python run_all_model.py --models=[mlp,lightgbm]
+            # Case 4 - run other models except those are given as arguments for multiple times
+            python run_all_model.py run 3 [mlp,tft,lstm] --exclude=True

-        # Case 6 - run other models except those are given as arguments for one time
-        python run_all_model.py --models=[mlp,tft,sfm] --exclude=True
+            # Case 5 - run specific models for one time
+            python run_all_model.py run --models=[mlp,lightgbm]

-    """
-    # init qlib
-    GetData().qlib_data(exists_skip=True)
-    qlib.init(
-        exp_manager={
-            "class": "MLflowExpManager",
-            "module_path": "qlib.workflow.expm",
-            "kwargs": {
-                "uri": "file:" + str(Path(os.getcwd()).resolve() / exp_folder_name),
-                "default_exp_name": "Experiment",
-            },
-        }
-    )
+            # Case 6 - run other models except those are given as arguments for one time
+            python run_all_model.py run --models=[mlp,tft,sfm] --exclude=True

-    # get all folders
-    folders = get_all_folders(models, exclude)
-    # init error messages:
-    errors = dict()
-    # run all the model for iterations
-    for fn in folders:
-        # get all files
-        sys.stderr.write("Retrieving files...\n")
-        yaml_path, req_path = get_all_files(folders[fn], dataset)
-        if yaml_path is None:
-            sys.stderr.write(f"There is no {dataset}.yaml file in {folders[fn]}")
-            continue
-        sys.stderr.write("\n")
-        # create env by anaconda
-        temp_dir, env_path, python_path, conda_activate = create_env()
+        """
+        self._init_qlib(exp_folder_name)

-        # install requirements.txt
-        sys.stderr.write("Installing requirements.txt...\n")
-        with open(req_path) as f:
-            content = f.read()
-        if "torch" in content:
-            # automatically install pytorch according to nvidia's version
-            execute(
-                f"{python_path} -m pip install light-the-torch", wait_when_err=wait_when_err
-            )  # for automatically installing torch according to the nvidia driver
-            execute(
-                f"{env_path / 'bin' / 'ltt'} install --install-cmd '{python_path} -m pip install {{packages}}' -- -r {req_path}",
-                wait_when_err=wait_when_err,
-            )
-        else:
-            execute(f"{python_path} -m pip install -r {req_path}", wait_when_err=wait_when_err)
-        sys.stderr.write("\n")
-
-        # read yaml, remove seed kwargs of model, and then save file in the temp_dir
-        yaml_path = gen_yaml_file_without_seed_kwargs(yaml_path, temp_dir)
-        # setup gpu for tft
-        if fn == "TFT":
-            execute(
-                f"conda install -y --prefix {env_path} anaconda cudatoolkit=10.0 && conda install -y --prefix {env_path} cudnn",
-                wait_when_err=wait_when_err,
-            )
+        # get all folders
+        folders = get_all_folders(models, exclude)
+        # init error messages:
+        errors = dict()
+        # run all the model for iterations
+        for fn in folders:
+            # get all files
+            sys.stderr.write("Retrieving files...\n")
+            yaml_path, req_path = get_all_files(folders[fn], dataset)
+            if yaml_path is None:
+                sys.stderr.write(f"There is no {dataset}.yaml file in {folders[fn]}")
+                continue
            sys.stderr.write("\n")
-        # install qlib
-        sys.stderr.write("Installing qlib...\n")
-        execute(f"{python_path} -m pip install --upgrade pip", wait_when_err=wait_when_err)  # TODO: FIX ME!
-        execute(f"{python_path} -m pip install --upgrade cython", wait_when_err=wait_when_err)  # TODO: FIX ME!
-        if fn == "TFT":
-            execute(
-                f"cd {env_path} && {python_path} -m pip install --upgrade --force-reinstall --ignore-installed PyYAML -e {qlib_uri}",
-                wait_when_err=wait_when_err,
-            )  # TODO: FIX ME!
-        else:
-            execute(
-                f"cd {env_path} && {python_path} -m pip install --upgrade --force-reinstall -e {qlib_uri}",
-                wait_when_err=wait_when_err,
-            )  # TODO: FIX ME!
-        sys.stderr.write("\n")
-        # run workflow_by_config for multiple times
-        for i in range(times):
-            sys.stderr.write(f"Running the model: {fn} for iteration {i+1}...\n")
-            errs = execute(
-                f"{python_path} {env_path / 'bin' / 'qrun'} {yaml_path} {fn} {exp_folder_name}",
-                wait_when_err=wait_when_err,
-            )
-            if errs is not None:
-                _errs = errors.get(fn, {})
-                _errs.update({i: errs})
-                errors[fn] = _errs
+            # create env by anaconda
+            temp_dir, env_path, python_path, conda_activate = create_env()
+
+            # install requirements.txt
+            sys.stderr.write("Installing requirements.txt...\n")
+            with open(req_path) as f:
+                content = f.read()
+            if "torch" in content:
+                # automatically install pytorch according to nvidia's version
+                execute(
+                    f"{python_path} -m pip install light-the-torch", wait_when_err=wait_when_err
+                )  # for automatically installing torch according to the nvidia driver
+                execute(
+                    f"{env_path / 'bin' / 'ltt'} install --install-cmd '{python_path} -m pip install {{packages}}' -- -r {req_path}",
+                    wait_when_err=wait_when_err,
+                )
+            else:
+                execute(f"{python_path} -m pip install -r {req_path}", wait_when_err=wait_when_err)
+            sys.stderr.write("\n")
+
+            # read yaml, remove seed kwargs of model, and then save file in the temp_dir
+            yaml_path = gen_yaml_file_without_seed_kwargs(yaml_path, temp_dir)
+            # setup gpu for tft
+            if fn == "TFT":
+                execute(
+                    f"conda install -y --prefix {env_path} anaconda cudatoolkit=10.0 && conda install -y --prefix {env_path} cudnn",
+                    wait_when_err=wait_when_err,
+                )
+                sys.stderr.write("\n")
+            # install qlib
+            sys.stderr.write("Installing qlib...\n")
+            execute(f"{python_path} -m pip install --upgrade pip", wait_when_err=wait_when_err)  # TODO: FIX ME!
+            execute(f"{python_path} -m pip install --upgrade cython", wait_when_err=wait_when_err)  # TODO: FIX ME!
+            if fn == "TFT":
+                execute(
+                    f"cd {env_path} && {python_path} -m pip install --upgrade --force-reinstall --ignore-installed PyYAML -e {qlib_uri}",
+                    wait_when_err=wait_when_err,
+                )  # TODO: FIX ME!
+            else:
+                execute(
+                    f"cd {env_path} && {python_path} -m pip install --upgrade --force-reinstall -e {qlib_uri}",
+                    wait_when_err=wait_when_err,
+                )  # TODO: FIX ME!
+            sys.stderr.write("\n")
+            # run workflow_by_config for multiple times
+            for i in range(times):
+                sys.stderr.write(f"Running the model: {fn} for iteration {i+1}...\n")
+                errs = execute(
+                    f"{python_path} {env_path / 'bin' / 'qrun'} {yaml_path} {fn} {exp_folder_name}",
+                    wait_when_err=wait_when_err,
+                )
+                if errs is not None:
+                    _errs = errors.get(fn, {})
+                    _errs.update({i: errs})
+                    errors[fn] = _errs
+                sys.stderr.write("\n")
+            # remove env
+            sys.stderr.write(f"Deleting the environment: {env_path}...\n")
+            if wait_before_rm_env:
+                input("Press Enter to Continue")
+            shutil.rmtree(env_path)
+        # print errors
+        sys.stderr.write(f"Here are some of the errors of the models...\n")
+        pprint(errors)
+        self._collect_results(exp_folder_name, dataset)
+
+    def _collect_results(self, exp_folder_name, dataset):
+        folders = get_all_folders(exp_folder_name, dataset)
+        # getting all results
+        sys.stderr.write(f"Retrieving results...\n")
+        results = get_all_results(folders)
+        if len(results) > 0:
+            # calculating the mean and std
+            sys.stderr.write(f"Calculating the mean and std of results...\n")
+            results = cal_mean_std(results)
+            # generating md table
+            sys.stderr.write(f"Generating markdown table...\n")
+            gen_and_save_md_table(results, dataset)
            sys.stderr.write("\n")
-        # remove env
-        sys.stderr.write(f"Deleting the environment: {env_path}...\n")
-        if wait_before_rm_env:
-            input("Press Enter to Continue")
-        shutil.rmtree(env_path)
-    # getting all results
-    sys.stderr.write(f"Retrieving results...\n")
-    results = get_all_results(folders)
-    if len(results) > 0:
-        # calculating the mean and std
-        sys.stderr.write(f"Calculating the mean and std of results...\n")
-        results = cal_mean_std(results)
-        # generating md table
-        sys.stderr.write(f"Generating markdown table...\n")
-        gen_and_save_md_table(results, dataset)
        sys.stderr.write("\n")
-    # print errors
-    sys.stderr.write(f"Here are some of the errors of the models...\n")
-    pprint(errors)
-    sys.stderr.write("\n")
-    # move results folder
-    shutil.move(exp_folder_name, exp_folder_name + f"_{dataset}_{datetime.now().strftime('%Y-%m-%d_%H:%M:%S')}")
-    shutil.move("table.md", f"table_{dataset}_{datetime.now().strftime('%Y-%m-%d_%H:%M:%S')}.md")
+        # move results folder
+        shutil.move(exp_folder_name, exp_folder_name + f"_{dataset}_{datetime.now().strftime('%Y-%m-%d_%H:%M:%S')}")
+        shutil.move("table.md", f"table_{dataset}_{datetime.now().strftime('%Y-%m-%d_%H:%M:%S')}.md")


 if __name__ == "__main__":
-    fire.Fire(run)  # run all the model
+    fire.Fire(ModelRunner)  # run all the model
--- a/qlib/contrib/model/pytorch_tcts.py
+++ b/qlib/contrib/model/pytorch_tcts.py
@@ -61,7 +61,7 @@ class TCTS(Model):
        weight_lr=5e-7,
        steps=3,
        GPU=0,
-        seed=0,
+        seed=None,
        target_label=0,
        lowest_valid_performance=0.993,
        **kwargs
--- a/qlib/contrib/model/pytorch_tra.py
+++ b/qlib/contrib/model/pytorch_tra.py
@@ -74,7 +74,7 @@ class TRAModel(Model):
        lamb=0.0,
        rho=0.99,
        alpha=1.0,
-        seed=0,
+        seed=None,
        logdir=None,
        eval_train=False,
        eval_test=False,
@@ -99,8 +99,9 @@ class TRAModel(Model):
        if transport_method == "router" and not eval_train:
            self.logger.warning("`eval_train` will be ignored when using TRA.router")

-        np.random.seed(seed)
-        torch.manual_seed(seed)
+        if seed is not None:
+            np.random.seed(seed)
+            torch.manual_seed(seed)

        self.model_config = model_config
        self.tra_config = tra_config
--- a/qlib/strategy/base.py
+++ b/qlib/strategy/base.py
@@ -7,6 +7,7 @@ if TYPE_CHECKING:
    from qlib.backtest.exchange import Exchange
    from qlib.backtest.position import BasePosition
 from typing import List, Tuple, Union
+import pandas as pd

 from ..model.base import BaseModel
 from ..data.dataset import DatasetH
@@ -219,6 +220,8 @@ class ModelStrategy(BaseStrategy):
        self.model = model
        self.dataset = dataset
        self.pred_scores = convert_index_format(self.model.predict(dataset), level="datetime")
+        if isinstance(self.pred_scores, pd.DataFrame):
+            self.pred_scores = self.pred_scores.iloc[:, 0]

    def _update_model(self):
        """