From 724f9ba8d2382eaf7ca72762ba3030cd3120db9d Mon Sep 17 00:00:00 2001 From: Jactus Date: Sun, 9 May 2021 17:52:18 +0800 Subject: [PATCH 01/11] Update stale bot --- .github/{ => workflows}/stale.yml | 0 docs/component/recorder.rst | 1 + 2 files changed, 1 insertion(+) rename .github/{ => workflows}/stale.yml (100%) diff --git a/.github/stale.yml b/.github/workflows/stale.yml similarity index 100% rename from .github/stale.yml rename to .github/workflows/stale.yml diff --git a/docs/component/recorder.rst b/docs/component/recorder.rst index 3882161bc..cc425fa8e 100644 --- a/docs/component/recorder.rst +++ b/docs/component/recorder.rst @@ -34,6 +34,7 @@ Here is a general view of the structure of the system: - Recorder 2 - ... - ... + This experiment management system defines a set of interface and provided a concrete implementation ``MLflowExpManager``, which is based on the machine learning platform: ``MLFlow`` (`link `_). If users set the implementation of ``ExpManager`` to be ``MLflowExpManager``, they can use the command `mlflow ui` to visualize and check the experiment results. For more information, pleaes refer to the related documents `here `_. From 143c257fa2ad9459dc4a04024926d5b71c2af9df Mon Sep 17 00:00:00 2001 From: Jactus Date: Sun, 9 May 2021 17:56:37 +0800 Subject: [PATCH 02/11] Update stale bot --- .github/workflows/stale.yml | 78 ++++++++++--------------------------- 1 file changed, 20 insertions(+), 58 deletions(-) diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 3d025c987..02968dd09 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -1,62 +1,24 @@ -# Configuration for probot-stale - https://github.com/probot/stale +name: Mark stale issues and pull requests -# Number of days of inactivity before an Issue or Pull Request becomes stale -daysUntilStale: 60 +on: + schedule: + - cron: "0 0/3 * * *" -# Number of days of inactivity before an Issue or Pull Request with the stale label is closed. -# Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale. -daysUntilClose: 7 +jobs: + stale: -# Only issues or pull requests with all of these labels are check if stale. Defaults to `[]` (disabled) -onlyLabels: [] + runs-on: ubuntu-latest -# Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable -exemptLabels: - - bug - - pinned - - security - - "[Status] Maybe Later" - -# Set to true to ignore issues in a project (defaults to false) -exemptProjects: false - -# Set to true to ignore issues in a milestone (defaults to false) -exemptMilestones: false - -# Set to true to ignore issues with an assignee (defaults to false) -exemptAssignees: false - -# Label to use when marking as stale -staleLabel: wontfix - -# Comment to post when marking as stale. Set to `false` to disable -markComment: > - This issue has been automatically marked as stale because it has not had - recent activity. It will be closed if no further activity occurs. Thank you - for your contributions. - -# Comment to post when removing the stale label. -# unmarkComment: > -# Your comment here. - -# Comment to post when closing a stale Issue or Pull Request. -# closeComment: > -# Your comment here. - -# Limit the number of actions per hour, from 1-30. Default is 30 -limitPerRun: 30 - -# Limit to only `issues` or `pulls` -# only: issues - -# Optionally, specify configuration settings that are specific to just 'issues' or 'pulls': -# pulls: -# daysUntilStale: 30 -# markComment: > -# This pull request has been automatically marked as stale because it has not had -# recent activity. It will be closed if no further activity occurs. Thank you -# for your contributions. - -# issues: -# exemptLabels: -# - confirmed \ No newline at end of file + steps: + - uses: actions/stale@v3 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + stale-issue-message: 'This issue is stale because it has been open for three months with no activity. Remove the stale label or comment on the issue otherwise this will be closed in 5 days' + stale-pr-message: 'This PR is stale because it has been open for a year with no activity. Remove the stale label or comment on the PR otherwise this will be closed in 5 days' + stale-issue-label: 'stale' + stale-pr-label: 'stale' + days-before-stale: 90 + days-before-close: 5 + operations-per-run: 100 + exempt-issue-labels: 'bug' + remove-stale-when-updated: true \ No newline at end of file From 8ba5e93d040e31bd17eeb6ae8e2d409922bd0999 Mon Sep 17 00:00:00 2001 From: Jactus Date: Mon, 10 May 2021 12:33:58 +0800 Subject: [PATCH 03/11] Skip enhancement in stale bot --- .github/workflows/stale.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 02968dd09..b07bdf1e7 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -20,5 +20,5 @@ jobs: days-before-stale: 90 days-before-close: 5 operations-per-run: 100 - exempt-issue-labels: 'bug' + exempt-issue-labels: 'bug,enhancement' remove-stale-when-updated: true \ No newline at end of file From 9bd77bd89f1636411383b9df9c4d36abe752c835 Mon Sep 17 00:00:00 2001 From: zhupr Date: Tue, 11 May 2021 17:36:55 +0800 Subject: [PATCH 04/11] Add configurable dataset to examples --- ..._config_lightgbm_configurable_dataset.yaml | 81 +++++++++++++++++++ qlib/data/dataset/handler.py | 6 +- 2 files changed, 84 insertions(+), 3 deletions(-) create mode 100644 examples/benchmarks/LightGBM/workflow_config_lightgbm_configurable_dataset.yaml diff --git a/examples/benchmarks/LightGBM/workflow_config_lightgbm_configurable_dataset.yaml b/examples/benchmarks/LightGBM/workflow_config_lightgbm_configurable_dataset.yaml new file mode 100644 index 000000000..335dc2093 --- /dev/null +++ b/examples/benchmarks/LightGBM/workflow_config_lightgbm_configurable_dataset.yaml @@ -0,0 +1,81 @@ +qlib_init: + provider_uri: "~/.qlib/qlib_data/cn_data" + region: cn +market: &market csi300 +benchmark: &benchmark SH000300 +data_handler_config: &data_handler_config + start_time: 2008-01-01 + end_time: 2020-08-01 + instruments: *market + data_loader: + class: QlibDataLoader + kwargs: + config: + feature: + - ["Resi($close, 15)/$close", "Std(Abs($close/Ref($close, 1)-1)*$volume, 5)/(Mean(Abs($close/Ref($close, 1)-1)*$volume, 5)+1e-12)", "Rsquare($close, 5)", "($high-$low)/$open", "Rsquare($close, 10)", "Corr($close, Log($volume+1), 5)", "Corr($close/Ref($close,1), Log($volume/Ref($volume, 1)+1), 5)", "Corr($close, Log($volume+1), 10)", "Rsquare($close, 20)", "Corr($close/Ref($close,1), Log($volume/Ref($volume, 1)+1), 60)", "Corr($close/Ref($close,1), Log($volume/Ref($volume, 1)+1), 10)", "Corr($close, Log($volume+1), 20)", "(Less($open, $close)-$low)/$open"] + - ["RESI5", "WVMA5", "RSQR5", "KLEN", "RSQR10", "CORR5", "CORD5", "CORR10", "RSQR20", "CORD60", "CORD10", "CORR20", "KLOW"] + label: + - ["Ref($close, -2)/Ref($close, -1) - 1"] + - ["LABEL0"] + freq: day + + learn_processors: + - class: DropnaLabel + - class: CSZScoreNorm + kwargs: + fields_group: label +port_analysis_config: &port_analysis_config + strategy: + class: TopkDropoutStrategy + module_path: qlib.contrib.strategy.strategy + kwargs: + topk: 50 + n_drop: 5 + backtest: + verbose: False + limit_threshold: 0.095 + account: 100000000 + benchmark: *benchmark + deal_price: close + open_cost: 0.0005 + close_cost: 0.0015 + min_cost: 5 +task: + model: + class: LGBModel + module_path: qlib.contrib.model.gbdt + kwargs: + loss: mse + colsample_bytree: 0.8879 + learning_rate: 0.2 + subsample: 0.8789 + lambda_l1: 205.6999 + lambda_l2: 580.9768 + max_depth: 8 + num_leaves: 210 + num_threads: 20 + dataset: + class: DatasetH + module_path: qlib.data.dataset + kwargs: + handler: + class: DataHandlerLP + module_path: qlib.data.dataset.handler + kwargs: *data_handler_config + segments: + train: [2008-01-01, 2014-12-31] + valid: [2015-01-01, 2016-12-31] + test: [2017-01-01, 2020-08-01] + record: + - class: SignalRecord + module_path: qlib.workflow.record_temp + kwargs: {} + - class: SigAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + ana_long_short: False + ann_scaler: 252 + - class: PortAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + config: *port_analysis_config \ No newline at end of file diff --git a/qlib/data/dataset/handler.py b/qlib/data/dataset/handler.py index 201d2459d..a6ca658d1 100644 --- a/qlib/data/dataset/handler.py +++ b/qlib/data/dataset/handler.py @@ -58,7 +58,7 @@ class DataHandler(Serializable): instruments=None, start_time=None, end_time=None, - data_loader: Tuple[dict, str, DataLoader] = None, + data_loader: Union[dict, str, DataLoader] = None, init_data=True, fetch_orig=True, ): @@ -71,7 +71,7 @@ class DataHandler(Serializable): start_time of the original data. end_time : end_time of the original data. - data_loader : Tuple[dict, str, DataLoader] + data_loader : Union[dict, str, DataLoader] data loader to load the data. init_data : intialize the original data in the constructor. @@ -282,7 +282,7 @@ class DataHandlerLP(DataHandler): instruments=None, start_time=None, end_time=None, - data_loader: Tuple[dict, str, DataLoader] = None, + data_loader: Union[dict, str, DataLoader] = None, infer_processors=[], learn_processors=[], process_type=PTYPE_A, From b8e64dc526c125027c74c9273fc361f44bc74ac5 Mon Sep 17 00:00:00 2001 From: zhupr Date: Wed, 12 May 2021 17:58:39 +0800 Subject: [PATCH 05/11] Modify set_global_logger_level use of contextmanager --- qlib/log.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/qlib/log.py b/qlib/log.py index a0e4f7986..26b62e3e8 100644 --- a/qlib/log.py +++ b/qlib/log.py @@ -165,8 +165,16 @@ class LogFilter(logging.Filter): return allow +@contextmanager def set_global_logger_level(level: int): + _handler_level_map = {} qlib_logger = logging.root.manager.loggerDict.get("qlib", None) if qlib_logger is not None: for _handler in qlib_logger.handlers: + _handler_level_map[_handler] = _handler.level _handler.level = level + try: + yield + finally: + for _handler, _level in _handler_level_map.items(): + _handler.level = _level From 76c5c5d1b6c4a7c6823f52eee85bcc4a4a582bf7 Mon Sep 17 00:00:00 2001 From: zhupr Date: Wed, 12 May 2021 20:37:47 +0800 Subject: [PATCH 06/11] Add docstrings to set_global_logger_level --- qlib/log.py | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 67 insertions(+), 2 deletions(-) diff --git a/qlib/log.py b/qlib/log.py index 26b62e3e8..379544392 100644 --- a/qlib/log.py +++ b/qlib/log.py @@ -165,14 +165,79 @@ class LogFilter(logging.Filter): return allow -@contextmanager -def set_global_logger_level(level: int): +def set_global_logger_level(level: int, return_orig_handler_level: bool = False): + """set qlib.xxx logger handlers level + + Parameters + ---------- + level: int + logger level + + return_orig_handler_level: bool + return origin handler level map + + Examples + --------- + + .. code-block:: python + + import qlib + import logging + from qlib.log import get_module_logger, set_global_logger_level + qlib.init() + + tmp_logger_01 = get_module_logger("tmp_logger_01", level=logging.INFO) + tmp_logger_01.info("1. tmp_logger_01 info show") + + global_level = logging.WARNING + 1 + set_global_logger_level(global_level) + tmp_logger_02 = get_module_logger("tmp_logger_02", level=logging.INFO) + tmp_logger_02.log(msg="2. tmp_logger_02 log show", level=global_level) + + tmp_logger_01.info("3. tmp_logger_01 info do not show") + + """ _handler_level_map = {} qlib_logger = logging.root.manager.loggerDict.get("qlib", None) if qlib_logger is not None: for _handler in qlib_logger.handlers: _handler_level_map[_handler] = _handler.level _handler.level = level + return _handler_level_map if return_orig_handler_level else None + + +@contextmanager +def set_global_logger_level_cm(level: int): + """set qlib.xxx logger handlers level to use contextmanager + + Parameters + ---------- + level: int + logger level + + Examples + --------- + + .. code-block:: python + + import qlib + import logging + from qlib.log import get_module_logger, set_global_logger_level_cm + qlib.init() + + tmp_logger_01 = get_module_logger("tmp_logger_01", level=logging.INFO) + tmp_logger_01.info("1. tmp_logger_01 info show") + + global_level = logging.WARNING + 1 + with set_global_logger_level_cm(global_level): + tmp_logger_02 = get_module_logger("tmp_logger_02", level=logging.INFO) + tmp_logger_02.log(msg="2. tmp_logger_02 log show", level=global_level) + tmp_logger_01.info("3. tmp_logger_01 info do not show") + + tmp_logger_01.info("4. tmp_logger_01 info show") + + """ + _handler_level_map = set_global_logger_level(level, return_orig_handler_level=True) try: yield finally: From bd37f5d953d18a8c47304606f9d414403c96bda1 Mon Sep 17 00:00:00 2001 From: Jactus Date: Thu, 13 May 2021 14:21:54 +0800 Subject: [PATCH 07/11] Fix bug and update doc --- docs/component/data.rst | 3 +-- qlib/contrib/backtest/position.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/docs/component/data.rst b/docs/component/data.rst index 3cee803e6..0a650c523 100644 --- a/docs/component/data.rst +++ b/docs/component/data.rst @@ -396,8 +396,7 @@ The ``DatasetH`` class is the `dataset` with `Data Handler`. Here is the most im API --------- -To know more about ``Dataset``, please refer to `Dataset API <../reference/api.html#module-qlib.data.dataset.__init__>`_. - +To know more about ``Dataset``, please refer to `Dataset API <../reference/api.html#dataset>`_. Cache diff --git a/qlib/contrib/backtest/position.py b/qlib/contrib/backtest/position.py index 97abc2a56..09313f933 100644 --- a/qlib/contrib/backtest/position.py +++ b/qlib/contrib/backtest/position.py @@ -1,10 +1,10 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. - -import pandas as pd import copy import pathlib +import pandas as pd +import numpy as np from .order import Order """ From f51e04a1cc476bc51bbd6396a3df25951a918893 Mon Sep 17 00:00:00 2001 From: Kenneth Tang Date: Thu, 13 May 2021 23:12:29 +0800 Subject: [PATCH 08/11] LightGBM hyperparameter --- examples/hyperparameter/LightGBM/Readme.md | 23 ++++++ .../LightGBM/hyperparameter_158.py | 74 +++++++++++++++++++ .../LightGBM/hyperparameter_360.py | 74 +++++++++++++++++++ .../hyperparameter/LightGBM/requirements.txt | 5 ++ 4 files changed, 176 insertions(+) create mode 100644 examples/hyperparameter/LightGBM/Readme.md create mode 100644 examples/hyperparameter/LightGBM/hyperparameter_158.py create mode 100644 examples/hyperparameter/LightGBM/hyperparameter_360.py create mode 100644 examples/hyperparameter/LightGBM/requirements.txt diff --git a/examples/hyperparameter/LightGBM/Readme.md b/examples/hyperparameter/LightGBM/Readme.md new file mode 100644 index 000000000..320e13828 --- /dev/null +++ b/examples/hyperparameter/LightGBM/Readme.md @@ -0,0 +1,23 @@ +# LightGBM hyperparameter + +## Alpha158 +First terminal +``` +optuna create-study --study LGBM_158 --storage sqlite:///db.sqlite3 +optuna-dashboard --port 5000 --host 0.0.0.0 sqlite:///db.sqlite3 +``` +Second terminal +``` +python hyperparameter_158.py +``` + +## Alpha360 +First terminal +``` +optuna create-study --study LGBM_360 --storage sqlite:///db.sqlite3 +optuna-dashboard --port 5000 --host 0.0.0.0 sqlite:///db.sqlite3 +``` +Second terminal +``` +python hyperparameter_360.py +``` diff --git a/examples/hyperparameter/LightGBM/hyperparameter_158.py b/examples/hyperparameter/LightGBM/hyperparameter_158.py new file mode 100644 index 000000000..dea00d383 --- /dev/null +++ b/examples/hyperparameter/LightGBM/hyperparameter_158.py @@ -0,0 +1,74 @@ +import qlib +from qlib.config import REG_CN +from qlib.utils import exists_qlib_data, init_instance_by_config +import optuna + +provider_uri = "~/.qlib/qlib_data/cn_data" +if not exists_qlib_data(provider_uri): + print(f"Qlib data is not found in {provider_uri}") + sys.path.append(str(scripts_dir)) + from get_data import GetData + GetData().qlib_data(target_dir=provider_uri, region='cn') +qlib.init(provider_uri=provider_uri, region='cn') + +market = "csi300" +benchmark = "SH000300" + +data_handler_config = { + 'start_time': '2008-01-01', + 'end_time': '2020-08-01', + 'fit_start_time': '2008-01-01', + 'fit_end_time': '2014-12-31', + 'instruments': market +} +dataset_task = { + "dataset": { + "class": "DatasetH", + "module_path": "qlib.data.dataset", + "kwargs": { + "handler": { + "class": "Alpha158", + "module_path": "qlib.contrib.data.handler", + "kwargs": data_handler_config, + }, + "segments": { + 'train': ('2008-01-01', '2014-12-31'), + 'valid': ('2015-01-01', '2016-12-31'), + 'test': ('2017-01-01', '2020-08-01'), + }, + }, + }, +} +dataset = init_instance_by_config(dataset_task["dataset"]) + +def objective(trial): + task = { + "model": { + "class": "LGBModel", + "module_path": "qlib.contrib.model.gbdt", + "kwargs": { + "loss": "mse", + "colsample_bytree": trial.suggest_uniform('colsample_bytree', 0.5, 1), + "learning_rate": trial.suggest_uniform('learning_rate', 0, 1), + "subsample": trial.suggest_uniform('subsample', 0, 1), + "lambda_l1": trial.suggest_loguniform('lambda_l1', 1e-8, 1e+4), + "lambda_l2": trial.suggest_loguniform('lambda_l2', 1e-8, 1e+4), + "max_depth": 10, + "num_leaves": trial.suggest_int('num_leaves', 1, 1024), + 'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0), + 'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0), + 'bagging_freq': trial.suggest_int('bagging_freq', 1, 7), + 'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 1, 50), + 'min_child_samples': trial.suggest_int('min_child_samples', 5, 100), + }, + }, + } + + evals_result = dict() + model = init_instance_by_config(task["model"]) + + model.fit(dataset, evals_result=evals_result) + return min(evals_result['valid']) + +study = optuna.Study(study_name='LGBM_158', storage='sqlite:///db.sqlite3') +study.optimize(objective, n_jobs=6) diff --git a/examples/hyperparameter/LightGBM/hyperparameter_360.py b/examples/hyperparameter/LightGBM/hyperparameter_360.py new file mode 100644 index 000000000..eef2966c2 --- /dev/null +++ b/examples/hyperparameter/LightGBM/hyperparameter_360.py @@ -0,0 +1,74 @@ +import qlib +from qlib.config import REG_CN +from qlib.utils import exists_qlib_data, init_instance_by_config +import optuna + +provider_uri = "~/.qlib/qlib_data/cn_data" +if not exists_qlib_data(provider_uri): + print(f"Qlib data is not found in {provider_uri}") + sys.path.append(str(scripts_dir)) + from get_data import GetData + GetData().qlib_data(target_dir=provider_uri, region='cn') +qlib.init(provider_uri=provider_uri, region='cn') + +market = "csi300" +benchmark = "SH000300" + +data_handler_config = { + 'start_time': '2008-01-01', + 'end_time': '2020-08-01', + 'fit_start_time': '2008-01-01', + 'fit_end_time': '2014-12-31', + 'instruments': market +} +dataset_task = { + "dataset": { + "class": "DatasetH", + "module_path": "qlib.data.dataset", + "kwargs": { + "handler": { + "class": "Alpha360", + "module_path": "qlib.contrib.data.handler", + "kwargs": data_handler_config, + }, + "segments": { + 'train': ('2008-01-01', '2014-12-31'), + 'valid': ('2015-01-01', '2016-12-31'), + 'test': ('2017-01-01', '2020-08-01'), + }, + }, + }, +} +dataset = init_instance_by_config(dataset_task["dataset"]) + +def objective(trial): + task = { + "model": { + "class": "LGBModel", + "module_path": "qlib.contrib.model.gbdt", + "kwargs": { + "loss": "mse", + "colsample_bytree": trial.suggest_uniform('colsample_bytree', 0.5, 1), + "learning_rate": trial.suggest_uniform('learning_rate', 0, 1), + "subsample": trial.suggest_uniform('subsample', 0, 1), + "lambda_l1": trial.suggest_loguniform('lambda_l1', 1e-8, 1e+4), + "lambda_l2": trial.suggest_loguniform('lambda_l2', 1e-8, 1e+4), + "max_depth": 10, + "num_leaves": trial.suggest_int('num_leaves', 1, 1024), + 'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0), + 'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0), + 'bagging_freq': trial.suggest_int('bagging_freq', 1, 7), + 'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 1, 50), + 'min_child_samples': trial.suggest_int('min_child_samples', 5, 100), + }, + }, + } + + evals_result = dict() + model = init_instance_by_config(task["model"]) + + model.fit(dataset, evals_result=evals_result) + return min(evals_result['valid']) + +study = optuna.Study(study_name='LGBM_360', storage='sqlite:///db.sqlite3') +study.optimize(objective, n_jobs=6) diff --git a/examples/hyperparameter/LightGBM/requirements.txt b/examples/hyperparameter/LightGBM/requirements.txt new file mode 100644 index 000000000..c8b16cefe --- /dev/null +++ b/examples/hyperparameter/LightGBM/requirements.txt @@ -0,0 +1,5 @@ +pandas==1.1.2 +numpy==1.17.4 +lightgbm==3.1.0 +optuna==2.7.0 +optuna-dashboard==0.4.1 From 8f67010b5838e13bfc27c28b7e9566567d0f52ad Mon Sep 17 00:00:00 2001 From: Kenneth Tang Date: Mon, 17 May 2021 23:09:42 +0800 Subject: [PATCH 09/11] Fix CI lint with black --- .../LightGBM/hyperparameter_158.py | 50 +++++++++---------- .../LightGBM/hyperparameter_360.py | 50 +++++++++---------- 2 files changed, 50 insertions(+), 50 deletions(-) diff --git a/examples/hyperparameter/LightGBM/hyperparameter_158.py b/examples/hyperparameter/LightGBM/hyperparameter_158.py index dea00d383..93c70596c 100644 --- a/examples/hyperparameter/LightGBM/hyperparameter_158.py +++ b/examples/hyperparameter/LightGBM/hyperparameter_158.py @@ -8,18 +8,18 @@ if not exists_qlib_data(provider_uri): print(f"Qlib data is not found in {provider_uri}") sys.path.append(str(scripts_dir)) from get_data import GetData - GetData().qlib_data(target_dir=provider_uri, region='cn') -qlib.init(provider_uri=provider_uri, region='cn') + GetData().qlib_data(target_dir=provider_uri, region="cn") +qlib.init(provider_uri=provider_uri, region="cn") market = "csi300" benchmark = "SH000300" data_handler_config = { - 'start_time': '2008-01-01', - 'end_time': '2020-08-01', - 'fit_start_time': '2008-01-01', - 'fit_end_time': '2014-12-31', - 'instruments': market + "start_time": "2008-01-01", + "end_time": "2020-08-01", + "fit_start_time": "2008-01-01", + "fit_end_time": "2014-12-31", + "instruments": market } dataset_task = { "dataset": { @@ -32,15 +32,16 @@ dataset_task = { "kwargs": data_handler_config, }, "segments": { - 'train': ('2008-01-01', '2014-12-31'), - 'valid': ('2015-01-01', '2016-12-31'), - 'test': ('2017-01-01', '2020-08-01'), + "train": ("2008-01-01", "2014-12-31"), + "valid": ("2015-01-01", "2016-12-31"), + "test": ("2017-01-01", "2020-08-01"), }, }, }, } dataset = init_instance_by_config(dataset_task["dataset"]) + def objective(trial): task = { "model": { @@ -48,27 +49,26 @@ def objective(trial): "module_path": "qlib.contrib.model.gbdt", "kwargs": { "loss": "mse", - "colsample_bytree": trial.suggest_uniform('colsample_bytree', 0.5, 1), - "learning_rate": trial.suggest_uniform('learning_rate', 0, 1), - "subsample": trial.suggest_uniform('subsample', 0, 1), - "lambda_l1": trial.suggest_loguniform('lambda_l1', 1e-8, 1e+4), - "lambda_l2": trial.suggest_loguniform('lambda_l2', 1e-8, 1e+4), + "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1), + "learning_rate": trial.suggest_uniform("learning_rate", 0, 1), + "subsample": trial.suggest_uniform("subsample", 0, 1), + "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 1e+4), + "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 1e+4), "max_depth": 10, - "num_leaves": trial.suggest_int('num_leaves', 1, 1024), - 'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0), - 'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0), - 'bagging_freq': trial.suggest_int('bagging_freq', 1, 7), - 'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 1, 50), - 'min_child_samples': trial.suggest_int('min_child_samples', 5, 100), + "num_leaves": trial.suggest_int("num_leaves", 1, 1024), + "feature_fraction": trial.suggest_uniform("feature_fraction", 0.4, 1.0), + "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.4, 1.0), + "bagging_freq": trial.suggest_int("bagging_freq", 1, 7), + "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 50), + "min_child_samples": trial.suggest_int("min_child_samples", 5, 100), }, - }, + }, } evals_result = dict() model = init_instance_by_config(task["model"]) - model.fit(dataset, evals_result=evals_result) - return min(evals_result['valid']) + return min(evals_result["valid"]) -study = optuna.Study(study_name='LGBM_158', storage='sqlite:///db.sqlite3') +study = optuna.Study(study_name="LGBM_158", storage="sqlite:///db.sqlite3") study.optimize(objective, n_jobs=6) diff --git a/examples/hyperparameter/LightGBM/hyperparameter_360.py b/examples/hyperparameter/LightGBM/hyperparameter_360.py index eef2966c2..3b72355a6 100644 --- a/examples/hyperparameter/LightGBM/hyperparameter_360.py +++ b/examples/hyperparameter/LightGBM/hyperparameter_360.py @@ -8,18 +8,18 @@ if not exists_qlib_data(provider_uri): print(f"Qlib data is not found in {provider_uri}") sys.path.append(str(scripts_dir)) from get_data import GetData - GetData().qlib_data(target_dir=provider_uri, region='cn') -qlib.init(provider_uri=provider_uri, region='cn') + GetData().qlib_data(target_dir=provider_uri, region="cn") +qlib.init(provider_uri=provider_uri, region="cn") market = "csi300" benchmark = "SH000300" data_handler_config = { - 'start_time': '2008-01-01', - 'end_time': '2020-08-01', - 'fit_start_time': '2008-01-01', - 'fit_end_time': '2014-12-31', - 'instruments': market + "start_time": "2008-01-01", + "end_time": "2020-08-01", + "fit_start_time": "2008-01-01", + "fit_end_time": "2014-12-31", + "instruments": market } dataset_task = { "dataset": { @@ -32,15 +32,16 @@ dataset_task = { "kwargs": data_handler_config, }, "segments": { - 'train': ('2008-01-01', '2014-12-31'), - 'valid': ('2015-01-01', '2016-12-31'), - 'test': ('2017-01-01', '2020-08-01'), + "train": ("2008-01-01", "2014-12-31"), + "valid": ("2015-01-01", "2016-12-31"), + "test": ("2017-01-01", "2020-08-01"), }, }, }, } dataset = init_instance_by_config(dataset_task["dataset"]) + def objective(trial): task = { "model": { @@ -48,27 +49,26 @@ def objective(trial): "module_path": "qlib.contrib.model.gbdt", "kwargs": { "loss": "mse", - "colsample_bytree": trial.suggest_uniform('colsample_bytree', 0.5, 1), - "learning_rate": trial.suggest_uniform('learning_rate', 0, 1), - "subsample": trial.suggest_uniform('subsample', 0, 1), - "lambda_l1": trial.suggest_loguniform('lambda_l1', 1e-8, 1e+4), - "lambda_l2": trial.suggest_loguniform('lambda_l2', 1e-8, 1e+4), + "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1), + "learning_rate": trial.suggest_uniform("learning_rate", 0, 1), + "subsample": trial.suggest_uniform("subsample", 0, 1), + "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 1e+4), + "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 1e+4), "max_depth": 10, - "num_leaves": trial.suggest_int('num_leaves', 1, 1024), - 'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0), - 'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0), - 'bagging_freq': trial.suggest_int('bagging_freq', 1, 7), - 'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 1, 50), - 'min_child_samples': trial.suggest_int('min_child_samples', 5, 100), + "num_leaves": trial.suggest_int("num_leaves", 1, 1024), + "feature_fraction": trial.suggest_uniform("feature_fraction", 0.4, 1.0), + "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.4, 1.0), + "bagging_freq": trial.suggest_int("bagging_freq", 1, 7), + "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 50), + "min_child_samples": trial.suggest_int("min_child_samples", 5, 100), }, - }, + }, } evals_result = dict() model = init_instance_by_config(task["model"]) - model.fit(dataset, evals_result=evals_result) - return min(evals_result['valid']) + return min(evals_result["valid"]) -study = optuna.Study(study_name='LGBM_360', storage='sqlite:///db.sqlite3') +study = optuna.Study(study_name="LGBM_360", storage="sqlite:///db.sqlite3") study.optimize(objective, n_jobs=6) From 41ab130807c1f91dc6818a63f75e52bf7dca4ba5 Mon Sep 17 00:00:00 2001 From: Kenneth Tang Date: Tue, 18 May 2021 00:01:45 +0800 Subject: [PATCH 10/11] Fix CI lint with black --- examples/hyperparameter/LightGBM/hyperparameter_158.py | 8 +++++--- examples/hyperparameter/LightGBM/hyperparameter_360.py | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/examples/hyperparameter/LightGBM/hyperparameter_158.py b/examples/hyperparameter/LightGBM/hyperparameter_158.py index 93c70596c..5e4887a14 100644 --- a/examples/hyperparameter/LightGBM/hyperparameter_158.py +++ b/examples/hyperparameter/LightGBM/hyperparameter_158.py @@ -8,6 +8,7 @@ if not exists_qlib_data(provider_uri): print(f"Qlib data is not found in {provider_uri}") sys.path.append(str(scripts_dir)) from get_data import GetData + GetData().qlib_data(target_dir=provider_uri, region="cn") qlib.init(provider_uri=provider_uri, region="cn") @@ -19,7 +20,7 @@ data_handler_config = { "end_time": "2020-08-01", "fit_start_time": "2008-01-01", "fit_end_time": "2014-12-31", - "instruments": market + "instruments": market, } dataset_task = { "dataset": { @@ -52,8 +53,8 @@ def objective(trial): "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1), "learning_rate": trial.suggest_uniform("learning_rate", 0, 1), "subsample": trial.suggest_uniform("subsample", 0, 1), - "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 1e+4), - "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 1e+4), + "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 1e4), + "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 1e4), "max_depth": 10, "num_leaves": trial.suggest_int("num_leaves", 1, 1024), "feature_fraction": trial.suggest_uniform("feature_fraction", 0.4, 1.0), @@ -70,5 +71,6 @@ def objective(trial): model.fit(dataset, evals_result=evals_result) return min(evals_result["valid"]) + study = optuna.Study(study_name="LGBM_158", storage="sqlite:///db.sqlite3") study.optimize(objective, n_jobs=6) diff --git a/examples/hyperparameter/LightGBM/hyperparameter_360.py b/examples/hyperparameter/LightGBM/hyperparameter_360.py index 3b72355a6..8b498e912 100644 --- a/examples/hyperparameter/LightGBM/hyperparameter_360.py +++ b/examples/hyperparameter/LightGBM/hyperparameter_360.py @@ -8,6 +8,7 @@ if not exists_qlib_data(provider_uri): print(f"Qlib data is not found in {provider_uri}") sys.path.append(str(scripts_dir)) from get_data import GetData + GetData().qlib_data(target_dir=provider_uri, region="cn") qlib.init(provider_uri=provider_uri, region="cn") @@ -19,7 +20,7 @@ data_handler_config = { "end_time": "2020-08-01", "fit_start_time": "2008-01-01", "fit_end_time": "2014-12-31", - "instruments": market + "instruments": market, } dataset_task = { "dataset": { @@ -52,8 +53,8 @@ def objective(trial): "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1), "learning_rate": trial.suggest_uniform("learning_rate", 0, 1), "subsample": trial.suggest_uniform("subsample", 0, 1), - "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 1e+4), - "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 1e+4), + "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 1e4), + "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 1e4), "max_depth": 10, "num_leaves": trial.suggest_int("num_leaves", 1, 1024), "feature_fraction": trial.suggest_uniform("feature_fraction", 0.4, 1.0), @@ -70,5 +71,6 @@ def objective(trial): model.fit(dataset, evals_result=evals_result) return min(evals_result["valid"]) + study = optuna.Study(study_name="LGBM_360", storage="sqlite:///db.sqlite3") study.optimize(objective, n_jobs=6) From 19eda8f4f0d0e4931efcc9566256edc2076a5a25 Mon Sep 17 00:00:00 2001 From: you-n-g Date: Mon, 17 May 2021 17:45:31 +0800 Subject: [PATCH 11/11] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 84f17ccda..a14ab5c31 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,6 @@ New features under development(order by estimated release time). Your feedbacks about the features are very important. | Feature | Status | | -- | ------ | -| Online serving and automatic model rolling | Under review: https://github.com/microsoft/qlib/pull/290 | | Planning-based portfolio optimization | Under review: https://github.com/microsoft/qlib/pull/280 | | Fund data supporting and analysis | Under review: https://github.com/microsoft/qlib/pull/292 | | Point-in-Time database | Under review: https://github.com/microsoft/qlib/pull/343 | @@ -55,6 +54,7 @@ Your feedbacks about the features are very important. Recent released features | Feature | Status | | -- | ------ | +| Online serving and automatic model rolling | Released: https://github.com/microsoft/qlib/pull/290 | | DoubleEnsemble Model | Released https://github.com/microsoft/qlib/pull/286 | | High-frequency data processing example | Released https://github.com/microsoft/qlib/pull/257 | | High-frequency trading example | Part of code released https://github.com/microsoft/qlib/pull/227 |