From f51e04a1cc476bc51bbd6396a3df25951a918893 Mon Sep 17 00:00:00 2001 From: Kenneth Tang Date: Thu, 13 May 2021 23:12:29 +0800 Subject: [PATCH 1/3] LightGBM hyperparameter --- examples/hyperparameter/LightGBM/Readme.md | 23 ++++++ .../LightGBM/hyperparameter_158.py | 74 +++++++++++++++++++ .../LightGBM/hyperparameter_360.py | 74 +++++++++++++++++++ .../hyperparameter/LightGBM/requirements.txt | 5 ++ 4 files changed, 176 insertions(+) create mode 100644 examples/hyperparameter/LightGBM/Readme.md create mode 100644 examples/hyperparameter/LightGBM/hyperparameter_158.py create mode 100644 examples/hyperparameter/LightGBM/hyperparameter_360.py create mode 100644 examples/hyperparameter/LightGBM/requirements.txt diff --git a/examples/hyperparameter/LightGBM/Readme.md b/examples/hyperparameter/LightGBM/Readme.md new file mode 100644 index 000000000..320e13828 --- /dev/null +++ b/examples/hyperparameter/LightGBM/Readme.md @@ -0,0 +1,23 @@ +# LightGBM hyperparameter + +## Alpha158 +First terminal +``` +optuna create-study --study LGBM_158 --storage sqlite:///db.sqlite3 +optuna-dashboard --port 5000 --host 0.0.0.0 sqlite:///db.sqlite3 +``` +Second terminal +``` +python hyperparameter_158.py +``` + +## Alpha360 +First terminal +``` +optuna create-study --study LGBM_360 --storage sqlite:///db.sqlite3 +optuna-dashboard --port 5000 --host 0.0.0.0 sqlite:///db.sqlite3 +``` +Second terminal +``` +python hyperparameter_360.py +``` diff --git a/examples/hyperparameter/LightGBM/hyperparameter_158.py b/examples/hyperparameter/LightGBM/hyperparameter_158.py new file mode 100644 index 000000000..dea00d383 --- /dev/null +++ b/examples/hyperparameter/LightGBM/hyperparameter_158.py @@ -0,0 +1,74 @@ +import qlib +from qlib.config import REG_CN +from qlib.utils import exists_qlib_data, init_instance_by_config +import optuna + +provider_uri = "~/.qlib/qlib_data/cn_data" +if not exists_qlib_data(provider_uri): + print(f"Qlib data is not found in {provider_uri}") + sys.path.append(str(scripts_dir)) + from get_data import GetData + GetData().qlib_data(target_dir=provider_uri, region='cn') +qlib.init(provider_uri=provider_uri, region='cn') + +market = "csi300" +benchmark = "SH000300" + +data_handler_config = { + 'start_time': '2008-01-01', + 'end_time': '2020-08-01', + 'fit_start_time': '2008-01-01', + 'fit_end_time': '2014-12-31', + 'instruments': market +} +dataset_task = { + "dataset": { + "class": "DatasetH", + "module_path": "qlib.data.dataset", + "kwargs": { + "handler": { + "class": "Alpha158", + "module_path": "qlib.contrib.data.handler", + "kwargs": data_handler_config, + }, + "segments": { + 'train': ('2008-01-01', '2014-12-31'), + 'valid': ('2015-01-01', '2016-12-31'), + 'test': ('2017-01-01', '2020-08-01'), + }, + }, + }, +} +dataset = init_instance_by_config(dataset_task["dataset"]) + +def objective(trial): + task = { + "model": { + "class": "LGBModel", + "module_path": "qlib.contrib.model.gbdt", + "kwargs": { + "loss": "mse", + "colsample_bytree": trial.suggest_uniform('colsample_bytree', 0.5, 1), + "learning_rate": trial.suggest_uniform('learning_rate', 0, 1), + "subsample": trial.suggest_uniform('subsample', 0, 1), + "lambda_l1": trial.suggest_loguniform('lambda_l1', 1e-8, 1e+4), + "lambda_l2": trial.suggest_loguniform('lambda_l2', 1e-8, 1e+4), + "max_depth": 10, + "num_leaves": trial.suggest_int('num_leaves', 1, 1024), + 'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0), + 'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0), + 'bagging_freq': trial.suggest_int('bagging_freq', 1, 7), + 'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 1, 50), + 'min_child_samples': trial.suggest_int('min_child_samples', 5, 100), + }, + }, + } + + evals_result = dict() + model = init_instance_by_config(task["model"]) + + model.fit(dataset, evals_result=evals_result) + return min(evals_result['valid']) + +study = optuna.Study(study_name='LGBM_158', storage='sqlite:///db.sqlite3') +study.optimize(objective, n_jobs=6) diff --git a/examples/hyperparameter/LightGBM/hyperparameter_360.py b/examples/hyperparameter/LightGBM/hyperparameter_360.py new file mode 100644 index 000000000..eef2966c2 --- /dev/null +++ b/examples/hyperparameter/LightGBM/hyperparameter_360.py @@ -0,0 +1,74 @@ +import qlib +from qlib.config import REG_CN +from qlib.utils import exists_qlib_data, init_instance_by_config +import optuna + +provider_uri = "~/.qlib/qlib_data/cn_data" +if not exists_qlib_data(provider_uri): + print(f"Qlib data is not found in {provider_uri}") + sys.path.append(str(scripts_dir)) + from get_data import GetData + GetData().qlib_data(target_dir=provider_uri, region='cn') +qlib.init(provider_uri=provider_uri, region='cn') + +market = "csi300" +benchmark = "SH000300" + +data_handler_config = { + 'start_time': '2008-01-01', + 'end_time': '2020-08-01', + 'fit_start_time': '2008-01-01', + 'fit_end_time': '2014-12-31', + 'instruments': market +} +dataset_task = { + "dataset": { + "class": "DatasetH", + "module_path": "qlib.data.dataset", + "kwargs": { + "handler": { + "class": "Alpha360", + "module_path": "qlib.contrib.data.handler", + "kwargs": data_handler_config, + }, + "segments": { + 'train': ('2008-01-01', '2014-12-31'), + 'valid': ('2015-01-01', '2016-12-31'), + 'test': ('2017-01-01', '2020-08-01'), + }, + }, + }, +} +dataset = init_instance_by_config(dataset_task["dataset"]) + +def objective(trial): + task = { + "model": { + "class": "LGBModel", + "module_path": "qlib.contrib.model.gbdt", + "kwargs": { + "loss": "mse", + "colsample_bytree": trial.suggest_uniform('colsample_bytree', 0.5, 1), + "learning_rate": trial.suggest_uniform('learning_rate', 0, 1), + "subsample": trial.suggest_uniform('subsample', 0, 1), + "lambda_l1": trial.suggest_loguniform('lambda_l1', 1e-8, 1e+4), + "lambda_l2": trial.suggest_loguniform('lambda_l2', 1e-8, 1e+4), + "max_depth": 10, + "num_leaves": trial.suggest_int('num_leaves', 1, 1024), + 'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0), + 'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0), + 'bagging_freq': trial.suggest_int('bagging_freq', 1, 7), + 'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 1, 50), + 'min_child_samples': trial.suggest_int('min_child_samples', 5, 100), + }, + }, + } + + evals_result = dict() + model = init_instance_by_config(task["model"]) + + model.fit(dataset, evals_result=evals_result) + return min(evals_result['valid']) + +study = optuna.Study(study_name='LGBM_360', storage='sqlite:///db.sqlite3') +study.optimize(objective, n_jobs=6) diff --git a/examples/hyperparameter/LightGBM/requirements.txt b/examples/hyperparameter/LightGBM/requirements.txt new file mode 100644 index 000000000..c8b16cefe --- /dev/null +++ b/examples/hyperparameter/LightGBM/requirements.txt @@ -0,0 +1,5 @@ +pandas==1.1.2 +numpy==1.17.4 +lightgbm==3.1.0 +optuna==2.7.0 +optuna-dashboard==0.4.1 From 8f67010b5838e13bfc27c28b7e9566567d0f52ad Mon Sep 17 00:00:00 2001 From: Kenneth Tang Date: Mon, 17 May 2021 23:09:42 +0800 Subject: [PATCH 2/3] Fix CI lint with black --- .../LightGBM/hyperparameter_158.py | 50 +++++++++---------- .../LightGBM/hyperparameter_360.py | 50 +++++++++---------- 2 files changed, 50 insertions(+), 50 deletions(-) diff --git a/examples/hyperparameter/LightGBM/hyperparameter_158.py b/examples/hyperparameter/LightGBM/hyperparameter_158.py index dea00d383..93c70596c 100644 --- a/examples/hyperparameter/LightGBM/hyperparameter_158.py +++ b/examples/hyperparameter/LightGBM/hyperparameter_158.py @@ -8,18 +8,18 @@ if not exists_qlib_data(provider_uri): print(f"Qlib data is not found in {provider_uri}") sys.path.append(str(scripts_dir)) from get_data import GetData - GetData().qlib_data(target_dir=provider_uri, region='cn') -qlib.init(provider_uri=provider_uri, region='cn') + GetData().qlib_data(target_dir=provider_uri, region="cn") +qlib.init(provider_uri=provider_uri, region="cn") market = "csi300" benchmark = "SH000300" data_handler_config = { - 'start_time': '2008-01-01', - 'end_time': '2020-08-01', - 'fit_start_time': '2008-01-01', - 'fit_end_time': '2014-12-31', - 'instruments': market + "start_time": "2008-01-01", + "end_time": "2020-08-01", + "fit_start_time": "2008-01-01", + "fit_end_time": "2014-12-31", + "instruments": market } dataset_task = { "dataset": { @@ -32,15 +32,16 @@ dataset_task = { "kwargs": data_handler_config, }, "segments": { - 'train': ('2008-01-01', '2014-12-31'), - 'valid': ('2015-01-01', '2016-12-31'), - 'test': ('2017-01-01', '2020-08-01'), + "train": ("2008-01-01", "2014-12-31"), + "valid": ("2015-01-01", "2016-12-31"), + "test": ("2017-01-01", "2020-08-01"), }, }, }, } dataset = init_instance_by_config(dataset_task["dataset"]) + def objective(trial): task = { "model": { @@ -48,27 +49,26 @@ def objective(trial): "module_path": "qlib.contrib.model.gbdt", "kwargs": { "loss": "mse", - "colsample_bytree": trial.suggest_uniform('colsample_bytree', 0.5, 1), - "learning_rate": trial.suggest_uniform('learning_rate', 0, 1), - "subsample": trial.suggest_uniform('subsample', 0, 1), - "lambda_l1": trial.suggest_loguniform('lambda_l1', 1e-8, 1e+4), - "lambda_l2": trial.suggest_loguniform('lambda_l2', 1e-8, 1e+4), + "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1), + "learning_rate": trial.suggest_uniform("learning_rate", 0, 1), + "subsample": trial.suggest_uniform("subsample", 0, 1), + "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 1e+4), + "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 1e+4), "max_depth": 10, - "num_leaves": trial.suggest_int('num_leaves', 1, 1024), - 'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0), - 'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0), - 'bagging_freq': trial.suggest_int('bagging_freq', 1, 7), - 'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 1, 50), - 'min_child_samples': trial.suggest_int('min_child_samples', 5, 100), + "num_leaves": trial.suggest_int("num_leaves", 1, 1024), + "feature_fraction": trial.suggest_uniform("feature_fraction", 0.4, 1.0), + "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.4, 1.0), + "bagging_freq": trial.suggest_int("bagging_freq", 1, 7), + "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 50), + "min_child_samples": trial.suggest_int("min_child_samples", 5, 100), }, - }, + }, } evals_result = dict() model = init_instance_by_config(task["model"]) - model.fit(dataset, evals_result=evals_result) - return min(evals_result['valid']) + return min(evals_result["valid"]) -study = optuna.Study(study_name='LGBM_158', storage='sqlite:///db.sqlite3') +study = optuna.Study(study_name="LGBM_158", storage="sqlite:///db.sqlite3") study.optimize(objective, n_jobs=6) diff --git a/examples/hyperparameter/LightGBM/hyperparameter_360.py b/examples/hyperparameter/LightGBM/hyperparameter_360.py index eef2966c2..3b72355a6 100644 --- a/examples/hyperparameter/LightGBM/hyperparameter_360.py +++ b/examples/hyperparameter/LightGBM/hyperparameter_360.py @@ -8,18 +8,18 @@ if not exists_qlib_data(provider_uri): print(f"Qlib data is not found in {provider_uri}") sys.path.append(str(scripts_dir)) from get_data import GetData - GetData().qlib_data(target_dir=provider_uri, region='cn') -qlib.init(provider_uri=provider_uri, region='cn') + GetData().qlib_data(target_dir=provider_uri, region="cn") +qlib.init(provider_uri=provider_uri, region="cn") market = "csi300" benchmark = "SH000300" data_handler_config = { - 'start_time': '2008-01-01', - 'end_time': '2020-08-01', - 'fit_start_time': '2008-01-01', - 'fit_end_time': '2014-12-31', - 'instruments': market + "start_time": "2008-01-01", + "end_time": "2020-08-01", + "fit_start_time": "2008-01-01", + "fit_end_time": "2014-12-31", + "instruments": market } dataset_task = { "dataset": { @@ -32,15 +32,16 @@ dataset_task = { "kwargs": data_handler_config, }, "segments": { - 'train': ('2008-01-01', '2014-12-31'), - 'valid': ('2015-01-01', '2016-12-31'), - 'test': ('2017-01-01', '2020-08-01'), + "train": ("2008-01-01", "2014-12-31"), + "valid": ("2015-01-01", "2016-12-31"), + "test": ("2017-01-01", "2020-08-01"), }, }, }, } dataset = init_instance_by_config(dataset_task["dataset"]) + def objective(trial): task = { "model": { @@ -48,27 +49,26 @@ def objective(trial): "module_path": "qlib.contrib.model.gbdt", "kwargs": { "loss": "mse", - "colsample_bytree": trial.suggest_uniform('colsample_bytree', 0.5, 1), - "learning_rate": trial.suggest_uniform('learning_rate', 0, 1), - "subsample": trial.suggest_uniform('subsample', 0, 1), - "lambda_l1": trial.suggest_loguniform('lambda_l1', 1e-8, 1e+4), - "lambda_l2": trial.suggest_loguniform('lambda_l2', 1e-8, 1e+4), + "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1), + "learning_rate": trial.suggest_uniform("learning_rate", 0, 1), + "subsample": trial.suggest_uniform("subsample", 0, 1), + "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 1e+4), + "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 1e+4), "max_depth": 10, - "num_leaves": trial.suggest_int('num_leaves', 1, 1024), - 'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0), - 'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0), - 'bagging_freq': trial.suggest_int('bagging_freq', 1, 7), - 'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 1, 50), - 'min_child_samples': trial.suggest_int('min_child_samples', 5, 100), + "num_leaves": trial.suggest_int("num_leaves", 1, 1024), + "feature_fraction": trial.suggest_uniform("feature_fraction", 0.4, 1.0), + "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.4, 1.0), + "bagging_freq": trial.suggest_int("bagging_freq", 1, 7), + "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 50), + "min_child_samples": trial.suggest_int("min_child_samples", 5, 100), }, - }, + }, } evals_result = dict() model = init_instance_by_config(task["model"]) - model.fit(dataset, evals_result=evals_result) - return min(evals_result['valid']) + return min(evals_result["valid"]) -study = optuna.Study(study_name='LGBM_360', storage='sqlite:///db.sqlite3') +study = optuna.Study(study_name="LGBM_360", storage="sqlite:///db.sqlite3") study.optimize(objective, n_jobs=6) From 41ab130807c1f91dc6818a63f75e52bf7dca4ba5 Mon Sep 17 00:00:00 2001 From: Kenneth Tang Date: Tue, 18 May 2021 00:01:45 +0800 Subject: [PATCH 3/3] Fix CI lint with black --- examples/hyperparameter/LightGBM/hyperparameter_158.py | 8 +++++--- examples/hyperparameter/LightGBM/hyperparameter_360.py | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/examples/hyperparameter/LightGBM/hyperparameter_158.py b/examples/hyperparameter/LightGBM/hyperparameter_158.py index 93c70596c..5e4887a14 100644 --- a/examples/hyperparameter/LightGBM/hyperparameter_158.py +++ b/examples/hyperparameter/LightGBM/hyperparameter_158.py @@ -8,6 +8,7 @@ if not exists_qlib_data(provider_uri): print(f"Qlib data is not found in {provider_uri}") sys.path.append(str(scripts_dir)) from get_data import GetData + GetData().qlib_data(target_dir=provider_uri, region="cn") qlib.init(provider_uri=provider_uri, region="cn") @@ -19,7 +20,7 @@ data_handler_config = { "end_time": "2020-08-01", "fit_start_time": "2008-01-01", "fit_end_time": "2014-12-31", - "instruments": market + "instruments": market, } dataset_task = { "dataset": { @@ -52,8 +53,8 @@ def objective(trial): "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1), "learning_rate": trial.suggest_uniform("learning_rate", 0, 1), "subsample": trial.suggest_uniform("subsample", 0, 1), - "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 1e+4), - "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 1e+4), + "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 1e4), + "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 1e4), "max_depth": 10, "num_leaves": trial.suggest_int("num_leaves", 1, 1024), "feature_fraction": trial.suggest_uniform("feature_fraction", 0.4, 1.0), @@ -70,5 +71,6 @@ def objective(trial): model.fit(dataset, evals_result=evals_result) return min(evals_result["valid"]) + study = optuna.Study(study_name="LGBM_158", storage="sqlite:///db.sqlite3") study.optimize(objective, n_jobs=6) diff --git a/examples/hyperparameter/LightGBM/hyperparameter_360.py b/examples/hyperparameter/LightGBM/hyperparameter_360.py index 3b72355a6..8b498e912 100644 --- a/examples/hyperparameter/LightGBM/hyperparameter_360.py +++ b/examples/hyperparameter/LightGBM/hyperparameter_360.py @@ -8,6 +8,7 @@ if not exists_qlib_data(provider_uri): print(f"Qlib data is not found in {provider_uri}") sys.path.append(str(scripts_dir)) from get_data import GetData + GetData().qlib_data(target_dir=provider_uri, region="cn") qlib.init(provider_uri=provider_uri, region="cn") @@ -19,7 +20,7 @@ data_handler_config = { "end_time": "2020-08-01", "fit_start_time": "2008-01-01", "fit_end_time": "2014-12-31", - "instruments": market + "instruments": market, } dataset_task = { "dataset": { @@ -52,8 +53,8 @@ def objective(trial): "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1), "learning_rate": trial.suggest_uniform("learning_rate", 0, 1), "subsample": trial.suggest_uniform("subsample", 0, 1), - "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 1e+4), - "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 1e+4), + "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 1e4), + "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 1e4), "max_depth": 10, "num_leaves": trial.suggest_int("num_leaves", 1, 1024), "feature_fraction": trial.suggest_uniform("feature_fraction", 0.4, 1.0), @@ -70,5 +71,6 @@ def objective(trial): model.fit(dataset, evals_result=evals_result) return min(evals_result["valid"]) + study = optuna.Study(study_name="LGBM_360", storage="sqlite:///db.sqlite3") study.optimize(objective, n_jobs=6)