From 21c0dae03c4161427904875fa3e5509b0e950b12 Mon Sep 17 00:00:00 2001 From: Jactus Date: Thu, 19 Nov 2020 16:51:09 +0800 Subject: [PATCH] Init benchmarks --- examples/benchmarks/CatBoost/requirements.txt | 3 + examples/benchmarks/DNN/requirements.txt | 4 ++ .../benchmarks/DNN/workflow_config_dnn.yaml | 62 ++++++++++++++++++ examples/benchmarks/GATs/requirements.txt | 4 ++ .../benchmarks/GATs/worflow_config_gats.yaml | 63 ++++++++++++++++++ examples/benchmarks/GBDT/requirements.txt | 3 + .../benchmarks/GBDT/workflow_config_gbdt.yaml | 59 +++++++++++++++++ examples/benchmarks/GRU/requirements.txt | 4 ++ .../benchmarks/GRU/workflow_config_gru.yaml | 62 ++++++++++++++++++ examples/benchmarks/LSTM/requirements.txt | 4 ++ .../benchmarks/LSTM/workflow_config_lstm.yaml | 62 ++++++++++++++++++ examples/benchmarks/XGBoost/requirements.txt | 3 + .../XGBoost/workflow_config_xgboost.yaml | 62 ++++++++++++++++++ examples/benchmarks/XGBoost/xgboost.py | 64 +++++++++++++++++++ 14 files changed, 459 insertions(+) create mode 100644 examples/benchmarks/CatBoost/requirements.txt create mode 100644 examples/benchmarks/DNN/requirements.txt create mode 100644 examples/benchmarks/DNN/workflow_config_dnn.yaml create mode 100644 examples/benchmarks/GATs/requirements.txt create mode 100644 examples/benchmarks/GATs/worflow_config_gats.yaml create mode 100644 examples/benchmarks/GBDT/requirements.txt create mode 100644 examples/benchmarks/GBDT/workflow_config_gbdt.yaml create mode 100644 examples/benchmarks/GRU/requirements.txt create mode 100644 examples/benchmarks/GRU/workflow_config_gru.yaml create mode 100644 examples/benchmarks/LSTM/requirements.txt create mode 100644 examples/benchmarks/LSTM/workflow_config_lstm.yaml create mode 100644 examples/benchmarks/XGBoost/requirements.txt create mode 100644 examples/benchmarks/XGBoost/workflow_config_xgboost.yaml create mode 100755 examples/benchmarks/XGBoost/xgboost.py diff --git a/examples/benchmarks/CatBoost/requirements.txt b/examples/benchmarks/CatBoost/requirements.txt new file mode 100644 index 000000000..507a65944 --- /dev/null +++ b/examples/benchmarks/CatBoost/requirements.txt @@ -0,0 +1,3 @@ +pandas==1.1.2 +numpy==1.17.4 +catboost==0.24.3 diff --git a/examples/benchmarks/DNN/requirements.txt b/examples/benchmarks/DNN/requirements.txt new file mode 100644 index 000000000..16de0a438 --- /dev/null +++ b/examples/benchmarks/DNN/requirements.txt @@ -0,0 +1,4 @@ +pandas==1.1.2 +numpy==1.17.4 +scikit_learn==0.23.2 +torch==1.7.0 diff --git a/examples/benchmarks/DNN/workflow_config_dnn.yaml b/examples/benchmarks/DNN/workflow_config_dnn.yaml new file mode 100644 index 000000000..0f50cbb25 --- /dev/null +++ b/examples/benchmarks/DNN/workflow_config_dnn.yaml @@ -0,0 +1,62 @@ +provider_uri: "~/.qlib/qlib_data/cn_data" +market: &market csi300 +benchmark: &benchmark SH000300 +data_handler_config: &data_handler_config + start_time: 2008-01-01 + end_time: 2020-08-01 + fit_start_time: 2008-01-01 + fit_end_time: 2014-12-31 + instruments: *market +port_analysis_config: &port_analysis_config + strategy: + class: TopkDropoutStrategy + module_path: qlib.contrib.strategy.strategy + kwargs: + topk: 50 + n_drop: 5 + backtest: + verbose: False + limit_threshold: 0.095 + account: 100000000 + benchmark: *benchmark + deal_price: close + open_cost: 0.0005 + close_cost: 0.0015 + min_cost: 5 +task: + model: + class: DNNModelPytorch + module_path: qlib.contrib.model.pytorch_nn + kwargs: + input_dim: 360 + output_dim: 1 + layers: [256, 512, 1024, 512, 256, 128, 64] + lr: 0.001 + max_steps: 300 + batch_size: 2000 + early_stop_rounds: 50 + eval_steps: 20 + lr_decay: 0.96 + lr_decay_steps: 100 + optimizer: gd + loss: mse + dataset: + class: DatasetH + module_path: qlib.data.dataset + kwargs: + handler: + class: ALPHA360_Denoise + module_path: qlib.contrib.data.handler + kwargs: *data_handler_config + segments: + train: [2008-01-01, 2014-12-31] + valid: [2015-01-01, 2016-12-31] + test: [2017-01-01, 2020-08-01] + record: + - class: SignalRecord + module_path: qlib.workflow.record_temp + kwargs: {} + - class: PortAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + config: *port_analysis_config \ No newline at end of file diff --git a/examples/benchmarks/GATs/requirements.txt b/examples/benchmarks/GATs/requirements.txt new file mode 100644 index 000000000..16de0a438 --- /dev/null +++ b/examples/benchmarks/GATs/requirements.txt @@ -0,0 +1,4 @@ +pandas==1.1.2 +numpy==1.17.4 +scikit_learn==0.23.2 +torch==1.7.0 diff --git a/examples/benchmarks/GATs/worflow_config_gats.yaml b/examples/benchmarks/GATs/worflow_config_gats.yaml new file mode 100644 index 000000000..6c8db2e77 --- /dev/null +++ b/examples/benchmarks/GATs/worflow_config_gats.yaml @@ -0,0 +1,63 @@ +provider_uri: "~/.qlib/qlib_data/cn_data" +market: &market csi300 +benchmark: &benchmark SH000300 +data_handler_config: &data_handler_config + start_time: 2008-01-01 + end_time: 2020-08-01 + fit_start_time: 2008-01-01 + fit_end_time: 2014-12-31 + instruments: *market +port_analysis_config: &port_analysis_config + strategy: + class: TopkDropoutStrategy + module_path: qlib.contrib.strategy.strategy + kwargs: + topk: 50 + n_drop: 5 + backtest: + verbose: False + limit_threshold: 0.095 + account: 100000000 + benchmark: *benchmark + deal_price: close + open_cost: 0.0005 + close_cost: 0.0015 + min_cost: 5 +task: + model: + class: GAT + module_path: qlib.contrib.model.pytorch_gats + kwargs: + d_feat: 6 + hidden_size: 64 + num_layers: 2 + dropout: 0.0 + n_epochs: 200 + lr: 1e-3 + early_stop: 20 + batch_size: 800 + metric: IC + loss: mse + base_model: GRU + seed: 0 + GPU: 0 + dataset: + class: DatasetH + module_path: qlib.data.dataset + kwargs: + handler: + class: ALPHA360_Denoise + module_path: qlib.contrib.data.handler + kwargs: *data_handler_config + segments: + train: [2008-01-01, 2014-12-31] + valid: [2015-01-01, 2016-12-31] + test: [2017-01-01, 2020-08-01] + record: + - class: SignalRecord + module_path: qlib.workflow.record_temp + kwargs: {} + - class: PortAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + config: *port_analysis_config \ No newline at end of file diff --git a/examples/benchmarks/GBDT/requirements.txt b/examples/benchmarks/GBDT/requirements.txt new file mode 100644 index 000000000..507d2d453 --- /dev/null +++ b/examples/benchmarks/GBDT/requirements.txt @@ -0,0 +1,3 @@ +pandas==1.1.2 +numpy==1.17.4 +lightgbm==3.1.0 diff --git a/examples/benchmarks/GBDT/workflow_config_gbdt.yaml b/examples/benchmarks/GBDT/workflow_config_gbdt.yaml new file mode 100644 index 000000000..212558044 --- /dev/null +++ b/examples/benchmarks/GBDT/workflow_config_gbdt.yaml @@ -0,0 +1,59 @@ +provider_uri: "~/.qlib/qlib_data/cn_data" +market: &market csi300 +benchmark: &benchmark SH000300 +data_handler_config: &data_handler_config + start_time: 2008-01-01 + end_time: 2020-08-01 + fit_start_time: 2008-01-01 + fit_end_time: 2014-12-31 + instruments: *market +port_analysis_config: &port_analysis_config + strategy: + class: TopkDropoutStrategy + module_path: qlib.contrib.strategy.strategy + kwargs: + topk: 50 + n_drop: 5 + backtest: + verbose: False + limit_threshold: 0.095 + account: 100000000 + benchmark: *benchmark + deal_price: close + open_cost: 0.0005 + close_cost: 0.0015 + min_cost: 5 +task: + model: + class: LGBModel + module_path: qlib.contrib.model.gbdt + kwargs: + loss: mse + colsample_bytree: 0.8879 + learning_rate: 0.0421 + subsample: 0.8789 + lambda_l1: 205.6999 + lambda_l2: 580.9768 + max_depth: 8 + num_leaves: 210 + num_threads: 20 + dataset: + class: DatasetH + module_path: qlib.data.dataset + kwargs: + handler: + class: Alpha158 + module_path: qlib.contrib.data.handler + kwargs: *data_handler_config + segments: + train: [2008-01-01, 2014-12-31] + valid: [2015-01-01, 2016-12-31] + test: [2017-01-01, 2020-08-01] + record: + - class: SignalRecord + module_path: qlib.workflow.record_temp + kwargs: {} + - class: PortAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + config: *port_analysis_config \ No newline at end of file diff --git a/examples/benchmarks/GRU/requirements.txt b/examples/benchmarks/GRU/requirements.txt new file mode 100644 index 000000000..1fc2779c0 --- /dev/null +++ b/examples/benchmarks/GRU/requirements.txt @@ -0,0 +1,4 @@ +numpy==1.17.4 +pandas==1.1.2 +scikit_learn==0.23.2 +torch==1.7.0 diff --git a/examples/benchmarks/GRU/workflow_config_gru.yaml b/examples/benchmarks/GRU/workflow_config_gru.yaml new file mode 100644 index 000000000..49b6159dc --- /dev/null +++ b/examples/benchmarks/GRU/workflow_config_gru.yaml @@ -0,0 +1,62 @@ +provider_uri: "~/.qlib/qlib_data/cn_data" +market: &market csi300 +benchmark: &benchmark SH000300 +data_handler_config: &data_handler_config + start_time: 2008-01-01 + end_time: 2020-08-01 + fit_start_time: 2008-01-01 + fit_end_time: 2014-12-31 + instruments: *market +port_analysis_config: &port_analysis_config + strategy: + class: TopkDropoutStrategy + module_path: qlib.contrib.strategy.strategy + kwargs: + topk: 50 + n_drop: 5 + backtest: + verbose: False + limit_threshold: 0.095 + account: 100000000 + benchmark: *benchmark + deal_price: close + open_cost: 0.0005 + close_cost: 0.0015 + min_cost: 5 +task: + model: + class: GRU + module_path: qlib.contrib.model.pytorch_gru + kwargs: + d_feat: 6 + hidden_size: 64 + num_layers: 2 + dropout: 0.0 + n_epochs: 200 + lr: 1e-3 + early_stop: 20 + batch_size: 800 + metric: IC + loss: mse + seed: 0 + GPU: 0 + dataset: + class: DatasetH + module_path: qlib.data.dataset + kwargs: + handler: + class: ALPHA360_Denoise + module_path: qlib.contrib.data.handler + kwargs: *data_handler_config + segments: + train: [2008-01-01, 2014-12-31] + valid: [2015-01-01, 2016-12-31] + test: [2017-01-01, 2020-08-01] + record: + - class: SignalRecord + module_path: qlib.workflow.record_temp + kwargs: {} + - class: PortAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + config: *port_analysis_config \ No newline at end of file diff --git a/examples/benchmarks/LSTM/requirements.txt b/examples/benchmarks/LSTM/requirements.txt new file mode 100644 index 000000000..1fc2779c0 --- /dev/null +++ b/examples/benchmarks/LSTM/requirements.txt @@ -0,0 +1,4 @@ +numpy==1.17.4 +pandas==1.1.2 +scikit_learn==0.23.2 +torch==1.7.0 diff --git a/examples/benchmarks/LSTM/workflow_config_lstm.yaml b/examples/benchmarks/LSTM/workflow_config_lstm.yaml new file mode 100644 index 000000000..1e3b309d2 --- /dev/null +++ b/examples/benchmarks/LSTM/workflow_config_lstm.yaml @@ -0,0 +1,62 @@ +provider_uri: "~/.qlib/qlib_data/cn_data" +market: &market csi300 +benchmark: &benchmark SH000300 +data_handler_config: &data_handler_config + start_time: 2008-01-01 + end_time: 2020-08-01 + fit_start_time: 2008-01-01 + fit_end_time: 2014-12-31 + instruments: *market +port_analysis_config: &port_analysis_config + strategy: + class: TopkDropoutStrategy + module_path: qlib.contrib.strategy.strategy + kwargs: + topk: 50 + n_drop: 5 + backtest: + verbose: False + limit_threshold: 0.095 + account: 100000000 + benchmark: *benchmark + deal_price: close + open_cost: 0.0005 + close_cost: 0.0015 + min_cost: 5 +task: + model: + class: LSTM + module_path: qlib.contrib.model.pytorch_lstm + kwargs: + d_feat: 6 + hidden_size: 64 + num_layers: 2 + dropout: 0.0 + n_epochs: 200 + lr: 1e-3 + early_stop: 20 + batch_size: 800 + metric: IC + loss: mse + seed: 0 + GPU: 0 + dataset: + class: DatasetH + module_path: qlib.data.dataset + kwargs: + handler: + class: ALPHA360_Denoise + module_path: qlib.contrib.data.handler + kwargs: *data_handler_config + segments: + train: [2008-01-01, 2014-12-31] + valid: [2015-01-01, 2016-12-31] + test: [2017-01-01, 2020-08-01] + record: + - class: SignalRecord + module_path: qlib.workflow.record_temp + kwargs: {} + - class: PortAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + config: *port_analysis_config \ No newline at end of file diff --git a/examples/benchmarks/XGBoost/requirements.txt b/examples/benchmarks/XGBoost/requirements.txt new file mode 100644 index 000000000..077f343e5 --- /dev/null +++ b/examples/benchmarks/XGBoost/requirements.txt @@ -0,0 +1,3 @@ +numpy==1.17.4 +pandas==1.1.2 +xgboost==1.2.1 \ No newline at end of file diff --git a/examples/benchmarks/XGBoost/workflow_config_xgboost.yaml b/examples/benchmarks/XGBoost/workflow_config_xgboost.yaml new file mode 100644 index 000000000..497ffa5b6 --- /dev/null +++ b/examples/benchmarks/XGBoost/workflow_config_xgboost.yaml @@ -0,0 +1,62 @@ +provider_uri: "~/.qlib/qlib_data/cn_data" +market: &market csi300 +benchmark: &benchmark SH000300 +data_handler_config: &data_handler_config + start_time: 2008-01-01 + end_time: 2020-08-01 + fit_start_time: 2008-01-01 + fit_end_time: 2014-12-31 + instruments: *market +port_analysis_config: &port_analysis_config + strategy: + class: TopkDropoutStrategy + module_path: qlib.contrib.strategy.strategy + kwargs: + topk: 50 + n_drop: 5 + backtest: + verbose: False + limit_threshold: 0.095 + account: 100000000 + benchmark: *benchmark + deal_price: close + open_cost: 0.0005 + close_cost: 0.0015 + min_cost: 5 +task: + model: + class: XGBModel + module_path: qlib.contrib.model.xgboost + kwargs: + objective: reg:linear + n_estimators: 5000 + colsample_bytree: 0.85 + learning_rate: 0.0421 + subsample: 0.8789 + max_depth: 8 + num_leaves: 210 + num_threads: 20 + missing: -1 + min_child_weight: 1 + nthread: 4 + tree_method: hist + dataset: + class: DatasetH + module_path: qlib.data.dataset + kwargs: + handler: + class: Alpha158 + module_path: qlib.contrib.data.handler + kwargs: *data_handler_config + segments: + train: [2008-01-01, 2014-12-31] + valid: [2015-01-01, 2016-12-31] + test: [2017-01-01, 2020-08-01] + record: + - class: SignalRecord + module_path: qlib.workflow.record_temp + kwargs: {} + - class: PortAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + config: *port_analysis_config \ No newline at end of file diff --git a/examples/benchmarks/XGBoost/xgboost.py b/examples/benchmarks/XGBoost/xgboost.py new file mode 100755 index 000000000..f1208eb93 --- /dev/null +++ b/examples/benchmarks/XGBoost/xgboost.py @@ -0,0 +1,64 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +import numpy as np +import pandas as pd +import xgboost as xgb + +from ...model.base import Model +from ...data.dataset import DatasetH +from ...data.dataset.handler import DataHandlerLP + + +class XGBModel(Model): + """XGBModel Model""" + + def __init__(self, obj="mse", **kwargs): + if obj not in {"mse", "binary"}: + raise NotImplementedError + self._params = {"obj": obj} + self._params.update(kwargs) + self.model = None + + def fit( + self, + dataset: DatasetH, + num_boost_round=1000, + early_stopping_rounds=50, + verbose_eval=20, + evals_result=dict(), + **kwargs + ): + + df_train, df_valid = dataset.prepare( + ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L + ) + x_train, y_train = df_train["feature"], df_train["label"] + x_valid, y_valid = df_valid["feature"], df_valid["label"] + + # Lightgbm need 1D array as its label + if y_train.values.ndim == 2 and y_train.values.shape[1] == 1: + y_train_1d, y_valid_1d = np.squeeze(y_train.values), np.squeeze(y_valid.values) + else: + raise ValueError("XGBoost doesn't support multi-label training") + + dtrain = xgb.DMatrix(x_train.values, label=y_train_1d) + dvalid = xgb.DMatrix(x_valid.values, label=y_valid_1d) + self.model = xgb.train( + self._params, + dtrain=dtrain, + num_boost_round=num_boost_round, + evals=[(dtrain, "train"), (dvalid, "valid")], + early_stopping_rounds=early_stopping_rounds, + verbose_eval=verbose_eval, + evals_result=evals_result, + **kwargs + ) + evals_result["train"] = list(evals_result["train"].values())[0] + evals_result["valid"] = list(evals_result["valid"].values())[0] + + def predict(self, dataset): + if self.model is None: + raise ValueError("model is not fitted yet!") + x_test = dataset.prepare("test", col_set="feature") + return pd.Series(self.model.predict(xgb.DMatrix(np.squeeze(x_test.values))), index=x_test.index)