diff --git a/README.md b/README.md index 820324e3d..0f9cccceb 100644 --- a/README.md +++ b/README.md @@ -306,7 +306,7 @@ All the models listed above are runnable with ``Qlib``. Users can find the confi - Users can use the tool `qrun` mentioned above to run a model's workflow based from a config file. - Users can create a `workflow_by_code` python script based on the [one](examples/workflow_by_code.py) listed in the `examples` folder. -- Users can use the script [`run_all_model.py`](examples/run_all_model.py) listed in the `examples` folder to run a model. Here is an example of the specific shell command to be used: `python run_all_model.py --models=lightgbm`, where the `--models` arguments can take any number of models listed above(the available models can be found in [benchmarks](examples/benchmarks/)). For more use cases, please refer to the file's [docstrings](examples/run_all_model.py). +- Users can use the script [`run_all_model.py`](examples/run_all_model.py) listed in the `examples` folder to run a model. Here is an example of the specific shell command to be used: `python run_all_model.py run --models=lightgbm`, where the `--models` arguments can take any number of models listed above(the available models can be found in [benchmarks](examples/benchmarks/)). For more use cases, please refer to the file's [docstrings](examples/run_all_model.py). - **NOTE**: Each baseline has different environment dependencies, please make sure that your python version aligns with the requirements(e.g. TFT only supports Python 3.6~3.7 due to the limitation of `tensorflow==1.15.0`) ## Run multiple models @@ -316,7 +316,7 @@ The script will create a unique virtual environment for each model, and delete t Here is an example of running all the models for 10 iterations: ```python -python run_all_model.py 10 +python run_all_model.py run 10 ``` It also provides the API to run specific models at once. For more use cases, please refer to the file's [docstrings](examples/run_all_model.py). diff --git a/examples/benchmarks/README.md b/examples/benchmarks/README.md index cd4276781..a18b46854 100644 --- a/examples/benchmarks/README.md +++ b/examples/benchmarks/README.md @@ -8,44 +8,48 @@ The numbers shown below demonstrate the performance of the entire `workflow` of > > In the new version of qlib, the default dataset is **v2**. Since the data is collected from the YahooFinance API (which is not very stable), the results of *v2* and *v1* may differ -## Alpha360 dataset -| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown | -|---|---|---|---|---|---|---|---|---| -| Linear | Alpha360 | 0.0150±0.00 | 0.1049±0.00| 0.0284±0.00 | 0.1970±0.00 | -0.0659±0.00 | -0.7072±0.00| -0.2955±0.00 | -| CatBoost (Liudmila Prokhorenkova, et al.) | Alpha360 | 0.0397±0.00 | 0.2878±0.00| 0.0470±0.00 | 0.3703±0.00 | 0.0342±0.00 | 0.4092±0.00| -0.1057±0.00 | -| XGBoost (Tianqi Chen, et al.) | Alpha360 | 0.0400±0.00 | 0.3031±0.00| 0.0461±0.00 | 0.3862±0.00 | 0.0528±0.00 | 0.6307±0.00| -0.1113±0.00 | -| LightGBM (Guolin Ke, et al.) | Alpha360 | 0.0399±0.00 | 0.3075±0.00| 0.0492±0.00 | 0.4019±0.00 | 0.0323±0.00 | 0.4370±0.00| -0.0917±0.00 | -| MLP | Alpha360 | 0.0285±0.00 | 0.1981±0.02| 0.0402±0.00 | 0.2993±0.02 | 0.0073±0.02 | 0.0880±0.22| -0.1446±0.03 | -| GRU (Kyunghyun Cho, et al.) | Alpha360 | 0.0490±0.01 | 0.3787±0.05| 0.0581±0.00 | 0.4664±0.04 | 0.0726±0.02 | 0.9817±0.34| -0.0902±0.03 | -| LSTM (Sepp Hochreiter, et al.) | Alpha360 | 0.0443±0.01 | 0.3401±0.05| 0.0536±0.01 | 0.4248±0.05 | 0.0627±0.03 | 0.8441±0.48| -0.0882±0.03 | -| ALSTM (Yao Qin, et al.) | Alpha360 | 0.0493±0.01 | 0.3778±0.06| 0.0585±0.00 | 0.4606±0.04 | 0.0513±0.03 | 0.6727±0.38| -0.1085±0.02 | -| GATs (Petar Velickovic, et al.) | Alpha360 | 0.0475±0.00 | 0.3515±0.02| 0.0592±0.00 | 0.4585±0.01 | 0.0876±0.02 | 1.1513±0.27| -0.0795±0.02 | -| DoubleEnsemble (Chuheng Zhang, et al.) | Alpha360 | 0.0407±0.00| 0.3053±0.00 | 0.0490±0.00 | 0.3840±0.00 | 0.0380±0.02 | 0.5000±0.21 | -0.0984±0.02 | -| TabNet (Sercan O. Arik, et al.)| Alpha360 | 0.0192±0.00 | 0.1401±0.00| 0.0291±0.00 | 0.2163±0.00 | -0.0258±0.00 | -0.2961±0.00| -0.1429±0.00 | -| TCTS (Xueqing Wu, et al.)| Alpha360 | 0.0485±0.00 | 0.3689±0.04| 0.0586±0.00 | 0.4669±0.02 | 0.0816±0.02 | 1.1572±0.30| -0.0689±0.02 | -| Transformer (Ashish Vaswani, et al.)| Alpha360 | 0.0141±0.00 | 0.0917±0.02| 0.0331±0.00 | 0.2357±0.03 | -0.0259±0.03 | -0.3323±0.43| -0.1763±0.07 | -| Localformer (Juyong Jiang, et al.)| Alpha360 | 0.0408±0.00 | 0.2988±0.03| 0.0538±0.00 | 0.4105±0.02 | 0.0275±0.03 | 0.3464±0.37| -0.1182±0.03 | -| TRA (Hengxu Lin, et al.)| Alpha360 | 0.0491±0.01 | 0.3868±0.06 | 0.0589±0.00 | 0.4802±0.04 | 0.0898±0.02 | 1.2490±0.32 | -0.0778±0.02 | - ## Alpha158 dataset -| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown | -|---|---|---|---|---|---|---|---|---| -| Linear | Alpha158 | 0.0393±0.00 | 0.2980±0.00| 0.0475±0.00 | 0.3546±0.00 | 0.0795±0.00 | 1.0712±0.00| -0.1449±0.00 | -| CatBoost (Liudmila Prokhorenkova, et al.) | Alpha158 | 0.0503±0.00 | 0.3586±0.00| 0.0483±0.00 | 0.3667±0.00 | 0.1080±0.00 | 1.1561±0.00| -0.0787±0.00 | -| XGBoost (Tianqi Chen, et al.) | Alpha158 | 0.0481±0.00 | 0.3659±0.00| 0.0495±0.00 | 0.4033±0.00 | 0.1111±0.00 | 1.2915±0.00| -0.0893±0.00 | -| LightGBM (Guolin Ke, et al.) | Alpha158 | 0.0475±0.00 | 0.3979±0.00| 0.0485±0.00 | 0.4123±0.00 | 0.1143±0.00 | 1.2744±0.00| -0.0800±0.00 | -| MLP | Alpha158 | 0.0358±0.00 | 0.2738±0.03| 0.0425±0.00 | 0.3221±0.01 | 0.0836±0.02 | 1.0323±0.25| -0.1127±0.02 | -| TFT (Bryan Lim, et al.) | Alpha158 (with selected 20 features) | 0.0343±0.00 | 0.2071±0.02| 0.0107±0.00 | 0.0660±0.02 | 0.0623±0.02 | 0.5818±0.20| -0.1762±0.01 | -| GRU (Kyunghyun Cho, et al.) | Alpha158 (with selected 20 features) | 0.0311±0.00 | 0.2418±0.04| 0.0425±0.00 | 0.3434±0.02 | 0.0330±0.02 | 0.4805±0.30| -0.1021±0.02 | -| LSTM (Sepp Hochreiter, et al.) | Alpha158 (with selected 20 features) | 0.0312±0.00 | 0.2394±0.04| 0.0418±0.00 | 0.3324±0.03 | 0.0298±0.02 | 0.4198±0.33| -0.1348±0.03 | -| ALSTM (Yao Qin, et al.) | Alpha158 (with selected 20 features) | 0.0385±0.01 | 0.3022±0.06| 0.0478±0.00 | 0.3874±0.04 | 0.0486±0.03 | 0.7141±0.45| -0.1088±0.03 | -| GATs (Petar Velickovic, et al.) | Alpha158 (with selected 20 features) | 0.0349±0.00 | 0.2511±0.01| 0.0457±0.00 | 0.3537±0.01 | 0.0578±0.02 | 0.8221±0.25| -0.0824±0.02 | -| DoubleEnsemble (Chuheng Zhang, et al.) | Alpha158 | 0.0544±0.00 | 0.4338±0.01 | 0.0523±0.00 | 0.4257±0.01 | 0.1253±0.01 | 1.4105±0.14 | -0.0902±0.01 | -| TabNet (Sercan O. Arik, et al.)| Alpha158 | 0.0383±0.00 | 0.3414±0.00| 0.0388±0.00 | 0.3460±0.00 | 0.0226±0.00 | 0.2652±0.00| -0.1072±0.00 | -| Transformer (Ashish Vaswani, et al.)| Alpha158 | 0.0274±0.00 | 0.2166±0.04| 0.0409±0.00 | 0.3342±0.04 | 0.0204±0.03 | 0.2888±0.40| -0.1216±0.04 | -| Localformer (Juyong Jiang, et al.)| Alpha158 | 0.0355±0.00 | 0.2747±0.04| 0.0466±0.00 | 0.3762±0.03 | 0.0506±0.02 | 0.7447±0.34| -0.0875±0.02 | -| TRA (Hengxu Lin, et al.)| Alpha158 (with selected 20 features)| 0.0409±0.00 | 0.3253±0.04 | 0.0488±0.00 | 0.4045±0.02 | 0.0673±0.02 | 1.0389±0.39 | -0.0830±0.02 | -| TRA (Hengxu Lin, et al.)| Alpha158 | 0.0442±0.00 | 0.3426±0.03 | 0.0555±0.00 | 0.4395±0.03 | 0.0833±0.03 | 1.2064±0.36 | -0.0849±0.02 | + +| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown | +|------------------------------------------|-------------------------------------|-------------|-------------|-------------|-------------|-------------------|-------------------|--------------| +| TabNet(Sercan O. Arik, et al.) | Alpha158 | 0.0204±0.01 | 0.1554±0.07 | 0.0333±0.00 | 0.2552±0.05 | 0.0227±0.04 | 0.3676±0.54 | -0.1089±0.08 | +| Transformer(Ashish Vaswani, et al.) | Alpha158 | 0.0264±0.00 | 0.2053±0.02 | 0.0407±0.00 | 0.3273±0.02 | 0.0273±0.02 | 0.3970±0.26 | -0.1101±0.02 | +| GRU(Kyunghyun Cho, et al.) | Alpha158(with selected 20 features) | 0.0315±0.00 | 0.2450±0.04 | 0.0428±0.00 | 0.3440±0.03 | 0.0344±0.02 | 0.5160±0.25 | -0.1017±0.02 | +| LSTM(Sepp Hochreiter, et al.) | Alpha158(with selected 20 features) | 0.0318±0.00 | 0.2367±0.04 | 0.0435±0.00 | 0.3389±0.03 | 0.0381±0.03 | 0.5561±0.46 | -0.1207±0.04 | +| Localformer(Juyong Jiang, et al.) | Alpha158 | 0.0356±0.00 | 0.2756±0.03 | 0.0468±0.00 | 0.3784±0.03 | 0.0438±0.02 | 0.6600±0.33 | -0.0952±0.02 | +| SFM(Liheng Zhang, et al.) | Alpha158 | 0.0379±0.00 | 0.2959±0.04 | 0.0464±0.00 | 0.3825±0.04 | 0.0465±0.02 | 0.5672±0.29 | -0.1282±0.03 | +| ALSTM (Yao Qin, et al.) | Alpha158(with selected 20 features) | 0.0362±0.01 | 0.2789±0.06 | 0.0463±0.01 | 0.3661±0.05 | 0.0470±0.03 | 0.6992±0.47 | -0.1072±0.03 | +| GATs (Petar Velickovic, et al.) | Alpha158(with selected 20 features) | 0.0349±0.00 | 0.2511±0.01 | 0.0462±0.00 | 0.3564±0.01 | 0.0497±0.01 | 0.7338±0.19 | -0.0777±0.02 | +| TRA(Hengxu Lin, et al.) | Alpha158(with selected 20 features) | 0.0404±0.00 | 0.3197±0.05 | 0.0490±0.00 | 0.4047±0.04 | 0.0649±0.02 | 1.0091±0.30 | -0.0860±0.02 | +| Linear | Alpha158 | 0.0397±0.00 | 0.3000±0.00 | 0.0472±0.00 | 0.3531±0.00 | 0.0692±0.00 | 0.9209±0.00 | -0.1509±0.00 | +| TRA(Hengxu Lin, et al.) | Alpha158 | 0.0440±0.00 | 0.3535±0.05 | 0.0540±0.00 | 0.4451±0.03 | 0.0718±0.02 | 1.0835±0.35 | -0.0760±0.02 | +| CatBoost(Liudmila Prokhorenkova, et al.) | Alpha158 | 0.0481±0.00 | 0.3366±0.00 | 0.0454±0.00 | 0.3311±0.00 | 0.0765±0.00 | 0.8032±0.01 | -0.1092±0.00 | +| XGBoost(Tianqi Chen, et al.) | Alpha158 | 0.0498±0.00 | 0.3779±0.00 | 0.0505±0.00 | 0.4131±0.00 | 0.0780±0.00 | 0.9070±0.00 | -0.1168±0.00 | +| TFT (Bryan Lim, et al.) | Alpha158(with selected 20 features) | 0.0358±0.00 | 0.2160±0.03 | 0.0116±0.01 | 0.0720±0.03 | 0.0847±0.02 | 0.8131±0.19 | -0.1824±0.03 | +| MLP | Alpha158 | 0.0376±0.00 | 0.2846±0.02 | 0.0429±0.00 | 0.3220±0.01 | 0.0895±0.02 | 1.1408±0.23 | -0.1103±0.02 | +| LightGBM(Guolin Ke, et al.) | Alpha158 | 0.0448±0.00 | 0.3660±0.00 | 0.0469±0.00 | 0.3877±0.00 | 0.0901±0.00 | 1.0164±0.00 | -0.1038±0.00 | +| DoubleEnsemble(Chuheng Zhang, et al.) | Alpha158 | 0.0544±0.00 | 0.4340±0.00 | 0.0523±0.00 | 0.4284±0.01 | 0.1168±0.01 | 1.3384±0.12 | -0.1036±0.01 | + + + +## Alpha360 dataset + +| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown | +|-------------------------------------------|----------|-------------|-------------|-------------|-------------|-------------------|-------------------|--------------| +| Transformer(Ashish Vaswani, et al.) | Alpha360 | 0.0114±0.00 | 0.0716±0.03 | 0.0327±0.00 | 0.2248±0.02 | -0.0270±0.03 | -0.3378±0.37 | -0.1653±0.05 | +| TabNet(Sercan O. Arik, et al.) | Alpha360 | 0.0099±0.00 | 0.0593±0.00 | 0.0290±0.00 | 0.1887±0.00 | -0.0369±0.00 | -0.3892±0.00 | -0.2145±0.00 | +| MLP | Alpha360 | 0.0273±0.00 | 0.1870±0.02 | 0.0396±0.00 | 0.2910±0.02 | 0.0029±0.02 | 0.0274±0.23 | -0.1385±0.03 | +| Localformer(Juyong Jiang, et al.) | Alpha360 | 0.0404±0.00 | 0.2932±0.04 | 0.0542±0.00 | 0.4110±0.03 | 0.0246±0.02 | 0.3211±0.21 | -0.1095±0.02 | +| CatBoost((Liudmila Prokhorenkova, et al.) | Alpha360 | 0.0378±0.00 | 0.2714±0.00 | 0.0467±0.00 | 0.3659±0.00 | 0.0292±0.00 | 0.3781±0.00 | -0.0862±0.00 | +| XGBoost(Tianqi Chen, et al.) | Alpha360 | 0.0394±0.00 | 0.2909±0.00 | 0.0448±0.00 | 0.3679±0.00 | 0.0344±0.00 | 0.4527±0.02 | -0.1004±0.00 | +| DoubleEnsemble(Chuheng Zhang, et al.) | Alpha360 | 0.0404±0.00 | 0.3023±0.00 | 0.0495±0.00 | 0.3898±0.00 | 0.0468±0.01 | 0.6302±0.20 | -0.0860±0.01 | +| LightGBM(Guolin Ke, et al.) | Alpha360 | 0.0400±0.00 | 0.3037±0.00 | 0.0499±0.00 | 0.4042±0.00 | 0.0558±0.00 | 0.7632±0.00 | -0.0659±0.00 | +| ALSTM (Yao Qin, et al.) | Alpha360 | 0.0497±0.00 | 0.3829±0.04 | 0.0599±0.00 | 0.4736±0.03 | 0.0626±0.02 | 0.8651±0.31 | -0.0994±0.03 | +| LSTM(Sepp Hochreiter, et al.) | Alpha360 | 0.0448±0.00 | 0.3474±0.04 | 0.0549±0.00 | 0.4366±0.03 | 0.0647±0.03 | 0.8963±0.39 | -0.0875±0.02 | +| GRU(Kyunghyun Cho, et al.) | Alpha360 | 0.0493±0.00 | 0.3772±0.04 | 0.0584±0.00 | 0.4638±0.03 | 0.0720±0.02 | 0.9730±0.33 | -0.0821±0.02 | +| TCTS(Xueqing Wu, et al.) | Alpha360 | 0.0454±0.01 | 0.3457±0.06 | 0.0566±0.01 | 0.4492±0.05 | 0.0744±0.03 | 1.0594±0.41 | -0.0761±0.03 | +| GATs (Petar Velickovic, et al.) | Alpha360 | 0.0476±0.00 | 0.3508±0.02 | 0.0598±0.00 | 0.4604±0.01 | 0.0824±0.02 | 1.1079±0.26 | -0.0894±0.03 | +| TRA(Hengxu Lin, et al.) | Alpha360 | 0.0485±0.00 | 0.3787±0.03 | 0.0587±0.00 | 0.4756±0.03 | 0.0920±0.03 | 1.2789±0.42 | -0.0834±0.02 | - The selected 20 features are based on the feature importance of a lightgbm-based model. - The base model of DoubleEnsemble is LGBM. diff --git a/examples/benchmarks/TCTS/workflow_config_tcts_Alpha360.yaml b/examples/benchmarks/TCTS/workflow_config_tcts_Alpha360.yaml index 7ca6e937f..cd3bbf59c 100644 --- a/examples/benchmarks/TCTS/workflow_config_tcts_Alpha360.yaml +++ b/examples/benchmarks/TCTS/workflow_config_tcts_Alpha360.yaml @@ -69,6 +69,7 @@ task: steps: 3 target_label: 1 lowest_valid_performance: 0.993 + seed: 0 dataset: class: DatasetH module_path: qlib.data.dataset diff --git a/examples/benchmarks/TFT/data_formatters/qlib_Alpha158.py b/examples/benchmarks/TFT/data_formatters/qlib_Alpha158.py index a15e3b740..a2afcc814 100644 --- a/examples/benchmarks/TFT/data_formatters/qlib_Alpha158.py +++ b/examples/benchmarks/TFT/data_formatters/qlib_Alpha158.py @@ -195,7 +195,8 @@ class Alpha158Formatter(GenericDataFormatter): for col in column_names: if col not in {"forecast_time", "identifier"}: - output[col] = self._target_scaler.inverse_transform(predictions[col]) + # Using [col] is for aligning with the format when fitting + output[col] = self._target_scaler.inverse_transform(predictions[[col]]) return output diff --git a/examples/benchmarks/TFT/tft.py b/examples/benchmarks/TFT/tft.py index a854c2dd9..cdc7f17e9 100644 --- a/examples/benchmarks/TFT/tft.py +++ b/examples/benchmarks/TFT/tft.py @@ -311,5 +311,11 @@ class TFTModel(ModelFT): # self.model.save(path) # save qlib model wrapper - self.model = None + drop_attrs = ["model", "tf_graph", "sess", "data_formatter"] + orig_attr = {} + for attr in drop_attrs: + orig_attr[attr] = getattr(self, attr) + setattr(self, attr, None) super(TFTModel, self).to_pickle(path) + for attr in drop_attrs: + setattr(self, attr, orig_attr[attr]) diff --git a/examples/benchmarks/TRA/src/model.py b/examples/benchmarks/TRA/src/model.py index b1b5e27f6..a0e6350e4 100644 --- a/examples/benchmarks/TRA/src/model.py +++ b/examples/benchmarks/TRA/src/model.py @@ -38,7 +38,7 @@ class TRAModel(Model): model_init_state=None, lamb=0.0, rho=0.99, - seed=0, + seed=None, logdir=None, eval_train=True, eval_test=False, diff --git a/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha158.yaml b/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha158.yaml index 0fa1b23d5..1d1c7da1c 100644 --- a/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha158.yaml +++ b/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha158.yaml @@ -50,6 +50,7 @@ task: kwargs: d_feat: 158 pretrain: True + seed: 993 dataset: class: DatasetH module_path: qlib.data.dataset diff --git a/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha360.yaml b/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha360.yaml index 0c798ae30..3d11efe60 100644 --- a/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha360.yaml +++ b/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha360.yaml @@ -50,6 +50,7 @@ task: kwargs: d_feat: 360 pretrain: True + seed: 993 dataset: class: DatasetH module_path: qlib.data.dataset diff --git a/examples/run_all_model.py b/examples/run_all_model.py index 41aba091e..483788372 100644 --- a/examples/run_all_model.py +++ b/examples/run_all_model.py @@ -151,6 +151,9 @@ def get_all_results(folders) -> dict: if recorders[recorder_id].status == "FINISHED": recorder = R.get_recorder(recorder_id=recorder_id, experiment_name=fn) metrics = recorder.list_metrics() + if "1day.excess_return_with_cost.annualized_return" not in metrics: + print(f"{recorder_id} is skipped due to incomplete result") + continue result["annualized_return_with_cost"].append(metrics["1day.excess_return_with_cost.annualized_return"]) result["information_ratio_with_cost"].append(metrics["1day.excess_return_with_cost.information_ratio"]) result["max_drawdown_with_cost"].append(metrics["1day.excess_return_with_cost.max_drawdown"]) @@ -200,174 +203,183 @@ def gen_yaml_file_without_seed_kwargs(yaml_path, temp_dir): return temp_path -# function to run the all the models -@only_allow_defined_args -def run( - times=1, - models=None, - dataset="Alpha360", - exclude=False, - qlib_uri: str = "git+https://github.com/microsoft/qlib#egg=pyqlib", - exp_folder_name: str = "run_all_model_records", - wait_before_rm_env: bool = False, - wait_when_err: bool = False, -): - """ - Please be aware that this function can only work under Linux. MacOS and Windows will be supported in the future. - Any PR to enhance this method is highly welcomed. Besides, this script doesn't support parallel running the same model - for multiple times, and this will be fixed in the future development. +class ModelRunner: + def _init_qlib(self, exp_folder_name): + # init qlib + GetData().qlib_data(exists_skip=True) + qlib.init( + exp_manager={ + "class": "MLflowExpManager", + "module_path": "qlib.workflow.expm", + "kwargs": { + "uri": "file:" + str(Path(os.getcwd()).resolve() / exp_folder_name), + "default_exp_name": "Experiment", + }, + } + ) - Parameters: - ----------- - times : int - determines how many times the model should be running. - models : str or list - determines the specific model or list of models to run or exclude. - exclude : boolean - determines whether the model being used is excluded or included. - dataset : str - determines the dataset to be used for each model. - qlib_uri : str - the uri to install qlib with pip - it could be url on the we or local path - exp_folder_name: str - the name of the experiment folder - wait_before_rm_env : bool - wait before remove environment. - wait_when_err : bool - wait when errors raised when executing commands + # function to run the all the models + @only_allow_defined_args + def run( + self, + times=1, + models=None, + dataset="Alpha360", + exclude=False, + qlib_uri: str = "git+https://github.com/microsoft/qlib#egg=pyqlib", + exp_folder_name: str = "run_all_model_records", + wait_before_rm_env: bool = False, + wait_when_err: bool = False, + ): + """ + Please be aware that this function can only work under Linux. MacOS and Windows will be supported in the future. + Any PR to enhance this method is highly welcomed. Besides, this script doesn't support parallel running the same model + for multiple times, and this will be fixed in the future development. - Usage: - ------- - Here are some use cases of the function in the bash: + Parameters: + ----------- + times : int + determines how many times the model should be running. + models : str or list + determines the specific model or list of models to run or exclude. + exclude : boolean + determines whether the model being used is excluded or included. + dataset : str + determines the dataset to be used for each model. + qlib_uri : str + the uri to install qlib with pip + it could be url on the we or local path + exp_folder_name: str + the name of the experiment folder + wait_before_rm_env : bool + wait before remove environment. + wait_when_err : bool + wait when errors raised when executing commands - .. code-block:: bash + Usage: + ------- + Here are some use cases of the function in the bash: - # Case 1 - run all models multiple times - python run_all_model.py 3 + .. code-block:: bash - # Case 2 - run specific models multiple times - python run_all_model.py 3 mlp + # Case 1 - run all models multiple times + python run_all_model.py run 3 - # Case 3 - run specific models multiple times with specific dataset - python run_all_model.py 3 mlp Alpha158 + # Case 2 - run specific models multiple times + python run_all_model.py run 3 mlp - # Case 4 - run other models except those are given as arguments for multiple times - python run_all_model.py 3 [mlp,tft,lstm] --exclude=True + # Case 3 - run specific models multiple times with specific dataset + python run_all_model.py run 3 mlp Alpha158 - # Case 5 - run specific models for one time - python run_all_model.py --models=[mlp,lightgbm] + # Case 4 - run other models except those are given as arguments for multiple times + python run_all_model.py run 3 [mlp,tft,lstm] --exclude=True - # Case 6 - run other models except those are given as arguments for one time - python run_all_model.py --models=[mlp,tft,sfm] --exclude=True + # Case 5 - run specific models for one time + python run_all_model.py run --models=[mlp,lightgbm] - """ - # init qlib - GetData().qlib_data(exists_skip=True) - qlib.init( - exp_manager={ - "class": "MLflowExpManager", - "module_path": "qlib.workflow.expm", - "kwargs": { - "uri": "file:" + str(Path(os.getcwd()).resolve() / exp_folder_name), - "default_exp_name": "Experiment", - }, - } - ) + # Case 6 - run other models except those are given as arguments for one time + python run_all_model.py run --models=[mlp,tft,sfm] --exclude=True - # get all folders - folders = get_all_folders(models, exclude) - # init error messages: - errors = dict() - # run all the model for iterations - for fn in folders: - # get all files - sys.stderr.write("Retrieving files...\n") - yaml_path, req_path = get_all_files(folders[fn], dataset) - if yaml_path is None: - sys.stderr.write(f"There is no {dataset}.yaml file in {folders[fn]}") - continue - sys.stderr.write("\n") - # create env by anaconda - temp_dir, env_path, python_path, conda_activate = create_env() + """ + self._init_qlib(exp_folder_name) - # install requirements.txt - sys.stderr.write("Installing requirements.txt...\n") - with open(req_path) as f: - content = f.read() - if "torch" in content: - # automatically install pytorch according to nvidia's version - execute( - f"{python_path} -m pip install light-the-torch", wait_when_err=wait_when_err - ) # for automatically installing torch according to the nvidia driver - execute( - f"{env_path / 'bin' / 'ltt'} install --install-cmd '{python_path} -m pip install {{packages}}' -- -r {req_path}", - wait_when_err=wait_when_err, - ) - else: - execute(f"{python_path} -m pip install -r {req_path}", wait_when_err=wait_when_err) - sys.stderr.write("\n") - - # read yaml, remove seed kwargs of model, and then save file in the temp_dir - yaml_path = gen_yaml_file_without_seed_kwargs(yaml_path, temp_dir) - # setup gpu for tft - if fn == "TFT": - execute( - f"conda install -y --prefix {env_path} anaconda cudatoolkit=10.0 && conda install -y --prefix {env_path} cudnn", - wait_when_err=wait_when_err, - ) + # get all folders + folders = get_all_folders(models, exclude) + # init error messages: + errors = dict() + # run all the model for iterations + for fn in folders: + # get all files + sys.stderr.write("Retrieving files...\n") + yaml_path, req_path = get_all_files(folders[fn], dataset) + if yaml_path is None: + sys.stderr.write(f"There is no {dataset}.yaml file in {folders[fn]}") + continue sys.stderr.write("\n") - # install qlib - sys.stderr.write("Installing qlib...\n") - execute(f"{python_path} -m pip install --upgrade pip", wait_when_err=wait_when_err) # TODO: FIX ME! - execute(f"{python_path} -m pip install --upgrade cython", wait_when_err=wait_when_err) # TODO: FIX ME! - if fn == "TFT": - execute( - f"cd {env_path} && {python_path} -m pip install --upgrade --force-reinstall --ignore-installed PyYAML -e {qlib_uri}", - wait_when_err=wait_when_err, - ) # TODO: FIX ME! - else: - execute( - f"cd {env_path} && {python_path} -m pip install --upgrade --force-reinstall -e {qlib_uri}", - wait_when_err=wait_when_err, - ) # TODO: FIX ME! - sys.stderr.write("\n") - # run workflow_by_config for multiple times - for i in range(times): - sys.stderr.write(f"Running the model: {fn} for iteration {i+1}...\n") - errs = execute( - f"{python_path} {env_path / 'bin' / 'qrun'} {yaml_path} {fn} {exp_folder_name}", - wait_when_err=wait_when_err, - ) - if errs is not None: - _errs = errors.get(fn, {}) - _errs.update({i: errs}) - errors[fn] = _errs + # create env by anaconda + temp_dir, env_path, python_path, conda_activate = create_env() + + # install requirements.txt + sys.stderr.write("Installing requirements.txt...\n") + with open(req_path) as f: + content = f.read() + if "torch" in content: + # automatically install pytorch according to nvidia's version + execute( + f"{python_path} -m pip install light-the-torch", wait_when_err=wait_when_err + ) # for automatically installing torch according to the nvidia driver + execute( + f"{env_path / 'bin' / 'ltt'} install --install-cmd '{python_path} -m pip install {{packages}}' -- -r {req_path}", + wait_when_err=wait_when_err, + ) + else: + execute(f"{python_path} -m pip install -r {req_path}", wait_when_err=wait_when_err) + sys.stderr.write("\n") + + # read yaml, remove seed kwargs of model, and then save file in the temp_dir + yaml_path = gen_yaml_file_without_seed_kwargs(yaml_path, temp_dir) + # setup gpu for tft + if fn == "TFT": + execute( + f"conda install -y --prefix {env_path} anaconda cudatoolkit=10.0 && conda install -y --prefix {env_path} cudnn", + wait_when_err=wait_when_err, + ) + sys.stderr.write("\n") + # install qlib + sys.stderr.write("Installing qlib...\n") + execute(f"{python_path} -m pip install --upgrade pip", wait_when_err=wait_when_err) # TODO: FIX ME! + execute(f"{python_path} -m pip install --upgrade cython", wait_when_err=wait_when_err) # TODO: FIX ME! + if fn == "TFT": + execute( + f"cd {env_path} && {python_path} -m pip install --upgrade --force-reinstall --ignore-installed PyYAML -e {qlib_uri}", + wait_when_err=wait_when_err, + ) # TODO: FIX ME! + else: + execute( + f"cd {env_path} && {python_path} -m pip install --upgrade --force-reinstall -e {qlib_uri}", + wait_when_err=wait_when_err, + ) # TODO: FIX ME! + sys.stderr.write("\n") + # run workflow_by_config for multiple times + for i in range(times): + sys.stderr.write(f"Running the model: {fn} for iteration {i+1}...\n") + errs = execute( + f"{python_path} {env_path / 'bin' / 'qrun'} {yaml_path} {fn} {exp_folder_name}", + wait_when_err=wait_when_err, + ) + if errs is not None: + _errs = errors.get(fn, {}) + _errs.update({i: errs}) + errors[fn] = _errs + sys.stderr.write("\n") + # remove env + sys.stderr.write(f"Deleting the environment: {env_path}...\n") + if wait_before_rm_env: + input("Press Enter to Continue") + shutil.rmtree(env_path) + # print errors + sys.stderr.write(f"Here are some of the errors of the models...\n") + pprint(errors) + self._collect_results(exp_folder_name, dataset) + + def _collect_results(self, exp_folder_name, dataset): + folders = get_all_folders(exp_folder_name, dataset) + # getting all results + sys.stderr.write(f"Retrieving results...\n") + results = get_all_results(folders) + if len(results) > 0: + # calculating the mean and std + sys.stderr.write(f"Calculating the mean and std of results...\n") + results = cal_mean_std(results) + # generating md table + sys.stderr.write(f"Generating markdown table...\n") + gen_and_save_md_table(results, dataset) sys.stderr.write("\n") - # remove env - sys.stderr.write(f"Deleting the environment: {env_path}...\n") - if wait_before_rm_env: - input("Press Enter to Continue") - shutil.rmtree(env_path) - # getting all results - sys.stderr.write(f"Retrieving results...\n") - results = get_all_results(folders) - if len(results) > 0: - # calculating the mean and std - sys.stderr.write(f"Calculating the mean and std of results...\n") - results = cal_mean_std(results) - # generating md table - sys.stderr.write(f"Generating markdown table...\n") - gen_and_save_md_table(results, dataset) sys.stderr.write("\n") - # print errors - sys.stderr.write(f"Here are some of the errors of the models...\n") - pprint(errors) - sys.stderr.write("\n") - # move results folder - shutil.move(exp_folder_name, exp_folder_name + f"_{dataset}_{datetime.now().strftime('%Y-%m-%d_%H:%M:%S')}") - shutil.move("table.md", f"table_{dataset}_{datetime.now().strftime('%Y-%m-%d_%H:%M:%S')}.md") + # move results folder + shutil.move(exp_folder_name, exp_folder_name + f"_{dataset}_{datetime.now().strftime('%Y-%m-%d_%H:%M:%S')}") + shutil.move("table.md", f"table_{dataset}_{datetime.now().strftime('%Y-%m-%d_%H:%M:%S')}.md") if __name__ == "__main__": - fire.Fire(run) # run all the model + fire.Fire(ModelRunner) # run all the model diff --git a/qlib/contrib/model/pytorch_tcts.py b/qlib/contrib/model/pytorch_tcts.py index bf46660ea..da7fda5f5 100644 --- a/qlib/contrib/model/pytorch_tcts.py +++ b/qlib/contrib/model/pytorch_tcts.py @@ -61,7 +61,7 @@ class TCTS(Model): weight_lr=5e-7, steps=3, GPU=0, - seed=0, + seed=None, target_label=0, lowest_valid_performance=0.993, **kwargs diff --git a/qlib/contrib/model/pytorch_tra.py b/qlib/contrib/model/pytorch_tra.py index 5a583a965..8d1e28410 100644 --- a/qlib/contrib/model/pytorch_tra.py +++ b/qlib/contrib/model/pytorch_tra.py @@ -74,7 +74,7 @@ class TRAModel(Model): lamb=0.0, rho=0.99, alpha=1.0, - seed=0, + seed=None, logdir=None, eval_train=False, eval_test=False, @@ -99,8 +99,9 @@ class TRAModel(Model): if transport_method == "router" and not eval_train: self.logger.warning("`eval_train` will be ignored when using TRA.router") - np.random.seed(seed) - torch.manual_seed(seed) + if seed is not None: + np.random.seed(seed) + torch.manual_seed(seed) self.model_config = model_config self.tra_config = tra_config diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py index f707f7ff5..bd5d3dbd3 100644 --- a/qlib/strategy/base.py +++ b/qlib/strategy/base.py @@ -7,6 +7,7 @@ if TYPE_CHECKING: from qlib.backtest.exchange import Exchange from qlib.backtest.position import BasePosition from typing import List, Tuple, Union +import pandas as pd from ..model.base import BaseModel from ..data.dataset import DatasetH @@ -219,6 +220,8 @@ class ModelStrategy(BaseStrategy): self.model = model self.dataset = dataset self.pred_scores = convert_index_format(self.model.predict(dataset), level="datetime") + if isinstance(self.pred_scores, pd.DataFrame): + self.pred_scores = self.pred_scores.iloc[:, 0] def _update_model(self): """