diff --git a/.github/stale.yml b/.github/stale.yml deleted file mode 100644 index 3d025c987..000000000 --- a/.github/stale.yml +++ /dev/null @@ -1,62 +0,0 @@ -# Configuration for probot-stale - https://github.com/probot/stale - -# Number of days of inactivity before an Issue or Pull Request becomes stale -daysUntilStale: 60 - -# Number of days of inactivity before an Issue or Pull Request with the stale label is closed. -# Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale. -daysUntilClose: 7 - -# Only issues or pull requests with all of these labels are check if stale. Defaults to `[]` (disabled) -onlyLabels: [] - -# Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable -exemptLabels: - - bug - - pinned - - security - - "[Status] Maybe Later" - -# Set to true to ignore issues in a project (defaults to false) -exemptProjects: false - -# Set to true to ignore issues in a milestone (defaults to false) -exemptMilestones: false - -# Set to true to ignore issues with an assignee (defaults to false) -exemptAssignees: false - -# Label to use when marking as stale -staleLabel: wontfix - -# Comment to post when marking as stale. Set to `false` to disable -markComment: > - This issue has been automatically marked as stale because it has not had - recent activity. It will be closed if no further activity occurs. Thank you - for your contributions. - -# Comment to post when removing the stale label. -# unmarkComment: > -# Your comment here. - -# Comment to post when closing a stale Issue or Pull Request. -# closeComment: > -# Your comment here. - -# Limit the number of actions per hour, from 1-30. Default is 30 -limitPerRun: 30 - -# Limit to only `issues` or `pulls` -# only: issues - -# Optionally, specify configuration settings that are specific to just 'issues' or 'pulls': -# pulls: -# daysUntilStale: 30 -# markComment: > -# This pull request has been automatically marked as stale because it has not had -# recent activity. It will be closed if no further activity occurs. Thank you -# for your contributions. - -# issues: -# exemptLabels: -# - confirmed \ No newline at end of file diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml new file mode 100644 index 000000000..b07bdf1e7 --- /dev/null +++ b/.github/workflows/stale.yml @@ -0,0 +1,24 @@ +name: Mark stale issues and pull requests + +on: + schedule: + - cron: "0 0/3 * * *" + +jobs: + stale: + + runs-on: ubuntu-latest + + steps: + - uses: actions/stale@v3 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + stale-issue-message: 'This issue is stale because it has been open for three months with no activity. Remove the stale label or comment on the issue otherwise this will be closed in 5 days' + stale-pr-message: 'This PR is stale because it has been open for a year with no activity. Remove the stale label or comment on the PR otherwise this will be closed in 5 days' + stale-issue-label: 'stale' + stale-pr-label: 'stale' + days-before-stale: 90 + days-before-close: 5 + operations-per-run: 100 + exempt-issue-labels: 'bug,enhancement' + remove-stale-when-updated: true \ No newline at end of file diff --git a/README.md b/README.md index 84f17ccda..a14ab5c31 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,6 @@ New features under development(order by estimated release time). Your feedbacks about the features are very important. | Feature | Status | | -- | ------ | -| Online serving and automatic model rolling | Under review: https://github.com/microsoft/qlib/pull/290 | | Planning-based portfolio optimization | Under review: https://github.com/microsoft/qlib/pull/280 | | Fund data supporting and analysis | Under review: https://github.com/microsoft/qlib/pull/292 | | Point-in-Time database | Under review: https://github.com/microsoft/qlib/pull/343 | @@ -55,6 +54,7 @@ Your feedbacks about the features are very important. Recent released features | Feature | Status | | -- | ------ | +| Online serving and automatic model rolling | Released: https://github.com/microsoft/qlib/pull/290 | | DoubleEnsemble Model | Released https://github.com/microsoft/qlib/pull/286 | | High-frequency data processing example | Released https://github.com/microsoft/qlib/pull/257 | | High-frequency trading example | Part of code released https://github.com/microsoft/qlib/pull/227 | diff --git a/docs/component/data.rst b/docs/component/data.rst index 3cee803e6..0a650c523 100644 --- a/docs/component/data.rst +++ b/docs/component/data.rst @@ -396,8 +396,7 @@ The ``DatasetH`` class is the `dataset` with `Data Handler`. Here is the most im API --------- -To know more about ``Dataset``, please refer to `Dataset API <../reference/api.html#module-qlib.data.dataset.__init__>`_. - +To know more about ``Dataset``, please refer to `Dataset API <../reference/api.html#dataset>`_. Cache diff --git a/docs/component/recorder.rst b/docs/component/recorder.rst index 3882161bc..cc425fa8e 100644 --- a/docs/component/recorder.rst +++ b/docs/component/recorder.rst @@ -34,6 +34,7 @@ Here is a general view of the structure of the system: - Recorder 2 - ... - ... + This experiment management system defines a set of interface and provided a concrete implementation ``MLflowExpManager``, which is based on the machine learning platform: ``MLFlow`` (`link `_). If users set the implementation of ``ExpManager`` to be ``MLflowExpManager``, they can use the command `mlflow ui` to visualize and check the experiment results. For more information, pleaes refer to the related documents `here `_. diff --git a/examples/benchmarks/LightGBM/workflow_config_lightgbm_configurable_dataset.yaml b/examples/benchmarks/LightGBM/workflow_config_lightgbm_configurable_dataset.yaml new file mode 100644 index 000000000..335dc2093 --- /dev/null +++ b/examples/benchmarks/LightGBM/workflow_config_lightgbm_configurable_dataset.yaml @@ -0,0 +1,81 @@ +qlib_init: + provider_uri: "~/.qlib/qlib_data/cn_data" + region: cn +market: &market csi300 +benchmark: &benchmark SH000300 +data_handler_config: &data_handler_config + start_time: 2008-01-01 + end_time: 2020-08-01 + instruments: *market + data_loader: + class: QlibDataLoader + kwargs: + config: + feature: + - ["Resi($close, 15)/$close", "Std(Abs($close/Ref($close, 1)-1)*$volume, 5)/(Mean(Abs($close/Ref($close, 1)-1)*$volume, 5)+1e-12)", "Rsquare($close, 5)", "($high-$low)/$open", "Rsquare($close, 10)", "Corr($close, Log($volume+1), 5)", "Corr($close/Ref($close,1), Log($volume/Ref($volume, 1)+1), 5)", "Corr($close, Log($volume+1), 10)", "Rsquare($close, 20)", "Corr($close/Ref($close,1), Log($volume/Ref($volume, 1)+1), 60)", "Corr($close/Ref($close,1), Log($volume/Ref($volume, 1)+1), 10)", "Corr($close, Log($volume+1), 20)", "(Less($open, $close)-$low)/$open"] + - ["RESI5", "WVMA5", "RSQR5", "KLEN", "RSQR10", "CORR5", "CORD5", "CORR10", "RSQR20", "CORD60", "CORD10", "CORR20", "KLOW"] + label: + - ["Ref($close, -2)/Ref($close, -1) - 1"] + - ["LABEL0"] + freq: day + + learn_processors: + - class: DropnaLabel + - class: CSZScoreNorm + kwargs: + fields_group: label +port_analysis_config: &port_analysis_config + strategy: + class: TopkDropoutStrategy + module_path: qlib.contrib.strategy.strategy + kwargs: + topk: 50 + n_drop: 5 + backtest: + verbose: False + limit_threshold: 0.095 + account: 100000000 + benchmark: *benchmark + deal_price: close + open_cost: 0.0005 + close_cost: 0.0015 + min_cost: 5 +task: + model: + class: LGBModel + module_path: qlib.contrib.model.gbdt + kwargs: + loss: mse + colsample_bytree: 0.8879 + learning_rate: 0.2 + subsample: 0.8789 + lambda_l1: 205.6999 + lambda_l2: 580.9768 + max_depth: 8 + num_leaves: 210 + num_threads: 20 + dataset: + class: DatasetH + module_path: qlib.data.dataset + kwargs: + handler: + class: DataHandlerLP + module_path: qlib.data.dataset.handler + kwargs: *data_handler_config + segments: + train: [2008-01-01, 2014-12-31] + valid: [2015-01-01, 2016-12-31] + test: [2017-01-01, 2020-08-01] + record: + - class: SignalRecord + module_path: qlib.workflow.record_temp + kwargs: {} + - class: SigAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + ana_long_short: False + ann_scaler: 252 + - class: PortAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + config: *port_analysis_config \ No newline at end of file diff --git a/examples/hyperparameter/LightGBM/Readme.md b/examples/hyperparameter/LightGBM/Readme.md new file mode 100644 index 000000000..320e13828 --- /dev/null +++ b/examples/hyperparameter/LightGBM/Readme.md @@ -0,0 +1,23 @@ +# LightGBM hyperparameter + +## Alpha158 +First terminal +``` +optuna create-study --study LGBM_158 --storage sqlite:///db.sqlite3 +optuna-dashboard --port 5000 --host 0.0.0.0 sqlite:///db.sqlite3 +``` +Second terminal +``` +python hyperparameter_158.py +``` + +## Alpha360 +First terminal +``` +optuna create-study --study LGBM_360 --storage sqlite:///db.sqlite3 +optuna-dashboard --port 5000 --host 0.0.0.0 sqlite:///db.sqlite3 +``` +Second terminal +``` +python hyperparameter_360.py +``` diff --git a/examples/hyperparameter/LightGBM/hyperparameter_158.py b/examples/hyperparameter/LightGBM/hyperparameter_158.py new file mode 100644 index 000000000..5e4887a14 --- /dev/null +++ b/examples/hyperparameter/LightGBM/hyperparameter_158.py @@ -0,0 +1,76 @@ +import qlib +from qlib.config import REG_CN +from qlib.utils import exists_qlib_data, init_instance_by_config +import optuna + +provider_uri = "~/.qlib/qlib_data/cn_data" +if not exists_qlib_data(provider_uri): + print(f"Qlib data is not found in {provider_uri}") + sys.path.append(str(scripts_dir)) + from get_data import GetData + + GetData().qlib_data(target_dir=provider_uri, region="cn") +qlib.init(provider_uri=provider_uri, region="cn") + +market = "csi300" +benchmark = "SH000300" + +data_handler_config = { + "start_time": "2008-01-01", + "end_time": "2020-08-01", + "fit_start_time": "2008-01-01", + "fit_end_time": "2014-12-31", + "instruments": market, +} +dataset_task = { + "dataset": { + "class": "DatasetH", + "module_path": "qlib.data.dataset", + "kwargs": { + "handler": { + "class": "Alpha158", + "module_path": "qlib.contrib.data.handler", + "kwargs": data_handler_config, + }, + "segments": { + "train": ("2008-01-01", "2014-12-31"), + "valid": ("2015-01-01", "2016-12-31"), + "test": ("2017-01-01", "2020-08-01"), + }, + }, + }, +} +dataset = init_instance_by_config(dataset_task["dataset"]) + + +def objective(trial): + task = { + "model": { + "class": "LGBModel", + "module_path": "qlib.contrib.model.gbdt", + "kwargs": { + "loss": "mse", + "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1), + "learning_rate": trial.suggest_uniform("learning_rate", 0, 1), + "subsample": trial.suggest_uniform("subsample", 0, 1), + "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 1e4), + "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 1e4), + "max_depth": 10, + "num_leaves": trial.suggest_int("num_leaves", 1, 1024), + "feature_fraction": trial.suggest_uniform("feature_fraction", 0.4, 1.0), + "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.4, 1.0), + "bagging_freq": trial.suggest_int("bagging_freq", 1, 7), + "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 50), + "min_child_samples": trial.suggest_int("min_child_samples", 5, 100), + }, + }, + } + + evals_result = dict() + model = init_instance_by_config(task["model"]) + model.fit(dataset, evals_result=evals_result) + return min(evals_result["valid"]) + + +study = optuna.Study(study_name="LGBM_158", storage="sqlite:///db.sqlite3") +study.optimize(objective, n_jobs=6) diff --git a/examples/hyperparameter/LightGBM/hyperparameter_360.py b/examples/hyperparameter/LightGBM/hyperparameter_360.py new file mode 100644 index 000000000..8b498e912 --- /dev/null +++ b/examples/hyperparameter/LightGBM/hyperparameter_360.py @@ -0,0 +1,76 @@ +import qlib +from qlib.config import REG_CN +from qlib.utils import exists_qlib_data, init_instance_by_config +import optuna + +provider_uri = "~/.qlib/qlib_data/cn_data" +if not exists_qlib_data(provider_uri): + print(f"Qlib data is not found in {provider_uri}") + sys.path.append(str(scripts_dir)) + from get_data import GetData + + GetData().qlib_data(target_dir=provider_uri, region="cn") +qlib.init(provider_uri=provider_uri, region="cn") + +market = "csi300" +benchmark = "SH000300" + +data_handler_config = { + "start_time": "2008-01-01", + "end_time": "2020-08-01", + "fit_start_time": "2008-01-01", + "fit_end_time": "2014-12-31", + "instruments": market, +} +dataset_task = { + "dataset": { + "class": "DatasetH", + "module_path": "qlib.data.dataset", + "kwargs": { + "handler": { + "class": "Alpha360", + "module_path": "qlib.contrib.data.handler", + "kwargs": data_handler_config, + }, + "segments": { + "train": ("2008-01-01", "2014-12-31"), + "valid": ("2015-01-01", "2016-12-31"), + "test": ("2017-01-01", "2020-08-01"), + }, + }, + }, +} +dataset = init_instance_by_config(dataset_task["dataset"]) + + +def objective(trial): + task = { + "model": { + "class": "LGBModel", + "module_path": "qlib.contrib.model.gbdt", + "kwargs": { + "loss": "mse", + "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1), + "learning_rate": trial.suggest_uniform("learning_rate", 0, 1), + "subsample": trial.suggest_uniform("subsample", 0, 1), + "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 1e4), + "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 1e4), + "max_depth": 10, + "num_leaves": trial.suggest_int("num_leaves", 1, 1024), + "feature_fraction": trial.suggest_uniform("feature_fraction", 0.4, 1.0), + "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.4, 1.0), + "bagging_freq": trial.suggest_int("bagging_freq", 1, 7), + "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 50), + "min_child_samples": trial.suggest_int("min_child_samples", 5, 100), + }, + }, + } + + evals_result = dict() + model = init_instance_by_config(task["model"]) + model.fit(dataset, evals_result=evals_result) + return min(evals_result["valid"]) + + +study = optuna.Study(study_name="LGBM_360", storage="sqlite:///db.sqlite3") +study.optimize(objective, n_jobs=6) diff --git a/examples/hyperparameter/LightGBM/requirements.txt b/examples/hyperparameter/LightGBM/requirements.txt new file mode 100644 index 000000000..c8b16cefe --- /dev/null +++ b/examples/hyperparameter/LightGBM/requirements.txt @@ -0,0 +1,5 @@ +pandas==1.1.2 +numpy==1.17.4 +lightgbm==3.1.0 +optuna==2.7.0 +optuna-dashboard==0.4.1 diff --git a/qlib/contrib/backtest/position.py b/qlib/contrib/backtest/position.py index 97abc2a56..09313f933 100644 --- a/qlib/contrib/backtest/position.py +++ b/qlib/contrib/backtest/position.py @@ -1,10 +1,10 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. - -import pandas as pd import copy import pathlib +import pandas as pd +import numpy as np from .order import Order """ diff --git a/qlib/data/dataset/handler.py b/qlib/data/dataset/handler.py index 63b49d78b..c6338832a 100644 --- a/qlib/data/dataset/handler.py +++ b/qlib/data/dataset/handler.py @@ -61,7 +61,7 @@ class DataHandler(Serializable): instruments=None, start_time=None, end_time=None, - data_loader: Tuple[dict, str, DataLoader] = None, + data_loader: Union[dict, str, DataLoader] = None, init_data=True, fetch_orig=True, ): @@ -74,7 +74,7 @@ class DataHandler(Serializable): start_time of the original data. end_time : end_time of the original data. - data_loader : Tuple[dict, str, DataLoader] + data_loader : Union[dict, str, DataLoader] data loader to load the data. init_data : initialize the original data in the constructor. @@ -305,7 +305,7 @@ class DataHandlerLP(DataHandler): instruments=None, start_time=None, end_time=None, - data_loader: Tuple[dict, str, DataLoader] = None, + data_loader: Union[dict, str, DataLoader] = None, infer_processors=[], learn_processors=[], process_type=PTYPE_A, diff --git a/qlib/log.py b/qlib/log.py index a0e4f7986..379544392 100644 --- a/qlib/log.py +++ b/qlib/log.py @@ -165,8 +165,81 @@ class LogFilter(logging.Filter): return allow -def set_global_logger_level(level: int): +def set_global_logger_level(level: int, return_orig_handler_level: bool = False): + """set qlib.xxx logger handlers level + + Parameters + ---------- + level: int + logger level + + return_orig_handler_level: bool + return origin handler level map + + Examples + --------- + + .. code-block:: python + + import qlib + import logging + from qlib.log import get_module_logger, set_global_logger_level + qlib.init() + + tmp_logger_01 = get_module_logger("tmp_logger_01", level=logging.INFO) + tmp_logger_01.info("1. tmp_logger_01 info show") + + global_level = logging.WARNING + 1 + set_global_logger_level(global_level) + tmp_logger_02 = get_module_logger("tmp_logger_02", level=logging.INFO) + tmp_logger_02.log(msg="2. tmp_logger_02 log show", level=global_level) + + tmp_logger_01.info("3. tmp_logger_01 info do not show") + + """ + _handler_level_map = {} qlib_logger = logging.root.manager.loggerDict.get("qlib", None) if qlib_logger is not None: for _handler in qlib_logger.handlers: + _handler_level_map[_handler] = _handler.level _handler.level = level + return _handler_level_map if return_orig_handler_level else None + + +@contextmanager +def set_global_logger_level_cm(level: int): + """set qlib.xxx logger handlers level to use contextmanager + + Parameters + ---------- + level: int + logger level + + Examples + --------- + + .. code-block:: python + + import qlib + import logging + from qlib.log import get_module_logger, set_global_logger_level_cm + qlib.init() + + tmp_logger_01 = get_module_logger("tmp_logger_01", level=logging.INFO) + tmp_logger_01.info("1. tmp_logger_01 info show") + + global_level = logging.WARNING + 1 + with set_global_logger_level_cm(global_level): + tmp_logger_02 = get_module_logger("tmp_logger_02", level=logging.INFO) + tmp_logger_02.log(msg="2. tmp_logger_02 log show", level=global_level) + tmp_logger_01.info("3. tmp_logger_01 info do not show") + + tmp_logger_01.info("4. tmp_logger_01 info show") + + """ + _handler_level_map = set_global_logger_level(level, return_orig_handler_level=True) + try: + yield + finally: + for _handler, _level in _handler_level_map.items(): + _handler.level = _level