Merge pull request #290 from you-n-g/online_srv

init version of online serving and rolling
2026-07-22 03:37:34 +08:00 · 2021-05-17 17:35:29 +08:00
parent 142a9dca3c 8c3a08b18d
commit d08146c30f
39 changed files with 3903 additions and 125 deletions
--- a/examples/model_rolling/task_manager_rolling.py
+++ b/examples/model_rolling/task_manager_rolling.py
@@ -0,0 +1,159 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""
+This example shows how a TrainerRM works based on TaskManager with rolling tasks.
+After training, how to collect the rolling results will be shown in task_collecting.
+"""
+
+from pprint import pprint
+
+import fire
+import qlib
+from qlib.config import REG_CN
+from qlib.workflow import R
+from qlib.workflow.task.gen import RollingGen, task_generator
+from qlib.workflow.task.manage import TaskManager
+from qlib.workflow.task.collect import RecorderCollector
+from qlib.model.ens.group import RollingGroup
+from qlib.model.trainer import TrainerRM
+
+
+data_handler_config = {
+    "start_time": "2008-01-01",
+    "end_time": "2020-08-01",
+    "fit_start_time": "2008-01-01",
+    "fit_end_time": "2014-12-31",
+    "instruments": "csi100",
+}
+
+dataset_config = {
+    "class": "DatasetH",
+    "module_path": "qlib.data.dataset",
+    "kwargs": {
+        "handler": {
+            "class": "Alpha158",
+            "module_path": "qlib.contrib.data.handler",
+            "kwargs": data_handler_config,
+        },
+        "segments": {
+            "train": ("2008-01-01", "2014-12-31"),
+            "valid": ("2015-01-01", "2016-12-31"),
+            "test": ("2017-01-01", "2020-08-01"),
+        },
+    },
+}
+
+record_config = [
+    {
+        "class": "SignalRecord",
+        "module_path": "qlib.workflow.record_temp",
+    },
+    {
+        "class": "SigAnaRecord",
+        "module_path": "qlib.workflow.record_temp",
+    },
+]
+
+# use lgb
+task_lgb_config = {
+    "model": {
+        "class": "LGBModel",
+        "module_path": "qlib.contrib.model.gbdt",
+    },
+    "dataset": dataset_config,
+    "record": record_config,
+}
+
+# use xgboost
+task_xgboost_config = {
+    "model": {
+        "class": "XGBModel",
+        "module_path": "qlib.contrib.model.xgboost",
+    },
+    "dataset": dataset_config,
+    "record": record_config,
+}
+
+
+class RollingTaskExample:
+    def __init__(
+        self,
+        provider_uri="~/.qlib/qlib_data/cn_data",
+        region=REG_CN,
+        task_url="mongodb://10.0.0.4:27017/",
+        task_db_name="rolling_db",
+        experiment_name="rolling_exp",
+        task_pool="rolling_task",
+        task_config=[task_xgboost_config, task_lgb_config],
+        rolling_step=550,
+        rolling_type=RollingGen.ROLL_SD,
+    ):
+        # TaskManager config
+        mongo_conf = {
+            "task_url": task_url,
+            "task_db_name": task_db_name,
+        }
+        qlib.init(provider_uri=provider_uri, region=region, mongo=mongo_conf)
+        self.experiment_name = experiment_name
+        self.task_pool = task_pool
+        self.task_config = task_config
+        self.rolling_gen = RollingGen(step=rolling_step, rtype=rolling_type)
+
+    # Reset all things to the first status, be careful to save important data
+    def reset(self):
+        print("========== reset ==========")
+        TaskManager(task_pool=self.task_pool).remove()
+        exp = R.get_exp(experiment_name=self.experiment_name)
+        for rid in exp.list_recorders():
+            exp.delete_recorder(rid)
+
+    def task_generating(self):
+        print("========== task_generating ==========")
+        tasks = task_generator(
+            tasks=self.task_config,
+            generators=self.rolling_gen,  # generate different date segments
+        )
+        pprint(tasks)
+        return tasks
+
+    def task_training(self, tasks):
+        print("========== task_training ==========")
+        trainer = TrainerRM(self.experiment_name, self.task_pool)
+        trainer.train(tasks)
+
+    def task_collecting(self):
+        print("========== task_collecting ==========")
+
+        def rec_key(recorder):
+            task_config = recorder.load_object("task")
+            model_key = task_config["model"]["class"]
+            rolling_key = task_config["dataset"]["kwargs"]["segments"]["test"]
+            return model_key, rolling_key
+
+        def my_filter(recorder):
+            # only choose the results of "LGBModel"
+            model_key, rolling_key = rec_key(recorder)
+            if model_key == "LGBModel":
+                return True
+            return False
+
+        collector = RecorderCollector(
+            experiment=self.experiment_name,
+            process_list=RollingGroup(),
+            rec_key_func=rec_key,
+            rec_filter_func=my_filter,
+        )
+        print(collector())
+
+    def main(self):
+        self.reset()
+        tasks = self.task_generating()
+        self.task_training(tasks)
+        self.task_collecting()
+
+
+if __name__ == "__main__":
+    ## to see the whole process with your own parameters, use the command below
+    # python task_manager_rolling.py main --experiment_name="your_exp_name"
+    fire.Fire(RollingTaskExample)
--- a/examples/online_srv/online_management_simulate.py
+++ b/examples/online_srv/online_management_simulate.py
@@ -0,0 +1,146 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""
+This example is about how can simulate the OnlineManager based on rolling tasks. 
+"""
+
+import fire
+import qlib
+from qlib.model.trainer import DelayTrainerR, DelayTrainerRM, TrainerR, TrainerRM
+from qlib.workflow import R
+from qlib.workflow.online.manager import OnlineManager
+from qlib.workflow.online.strategy import RollingStrategy
+from qlib.workflow.task.gen import RollingGen
+from qlib.workflow.task.manage import TaskManager
+
+
+data_handler_config = {
+    "start_time": "2018-01-01",
+    "end_time": "2018-10-31",
+    "fit_start_time": "2018-01-01",
+    "fit_end_time": "2018-03-31",
+    "instruments": "csi100",
+}
+
+dataset_config = {
+    "class": "DatasetH",
+    "module_path": "qlib.data.dataset",
+    "kwargs": {
+        "handler": {
+            "class": "Alpha158",
+            "module_path": "qlib.contrib.data.handler",
+            "kwargs": data_handler_config,
+        },
+        "segments": {
+            "train": ("2018-01-01", "2018-03-31"),
+            "valid": ("2018-04-01", "2018-05-31"),
+            "test": ("2018-06-01", "2018-09-10"),
+        },
+    },
+}
+
+record_config = [
+    {
+        "class": "SignalRecord",
+        "module_path": "qlib.workflow.record_temp",
+    },
+    {
+        "class": "SigAnaRecord",
+        "module_path": "qlib.workflow.record_temp",
+    },
+]
+
+# use lgb model
+task_lgb_config = {
+    "model": {
+        "class": "LGBModel",
+        "module_path": "qlib.contrib.model.gbdt",
+    },
+    "dataset": dataset_config,
+    "record": record_config,
+}
+
+# use xgboost model
+task_xgboost_config = {
+    "model": {
+        "class": "XGBModel",
+        "module_path": "qlib.contrib.model.xgboost",
+    },
+    "dataset": dataset_config,
+    "record": record_config,
+}
+
+
+class OnlineSimulationExample:
+    def __init__(
+        self,
+        provider_uri="~/.qlib/qlib_data/cn_data",
+        region="cn",
+        exp_name="rolling_exp",
+        task_url="mongodb://10.0.0.4:27017/",
+        task_db_name="rolling_db",
+        task_pool="rolling_task",
+        rolling_step=80,
+        start_time="2018-09-10",
+        end_time="2018-10-31",
+        tasks=[task_xgboost_config, task_lgb_config],
+    ):
+        """
+        Init OnlineManagerExample.
+
+        Args:
+            provider_uri (str, optional): the provider uri. Defaults to "~/.qlib/qlib_data/cn_data".
+            region (str, optional): the stock region. Defaults to "cn".
+            exp_name (str, optional): the experiment name. Defaults to "rolling_exp".
+            task_url (str, optional): your MongoDB url. Defaults to "mongodb://10.0.0.4:27017/".
+            task_db_name (str, optional): database name. Defaults to "rolling_db".
+            task_pool (str, optional): the task pool name (a task pool is a collection in MongoDB). Defaults to "rolling_task".
+            rolling_step (int, optional): the step for rolling. Defaults to 80.
+            start_time (str, optional): the start time of simulating. Defaults to "2018-09-10".
+            end_time (str, optional): the end time of simulating. Defaults to "2018-10-31".
+            tasks (dict or list[dict]): a set of the task config waiting for rolling and training
+        """
+        self.exp_name = exp_name
+        self.task_pool = task_pool
+        self.start_time = start_time
+        self.end_time = end_time
+        mongo_conf = {
+            "task_url": task_url,
+            "task_db_name": task_db_name,
+        }
+        qlib.init(provider_uri=provider_uri, region=region, mongo=mongo_conf)
+        self.rolling_gen = RollingGen(
+            step=rolling_step, rtype=RollingGen.ROLL_SD, ds_extra_mod_func=None
+        )  # The rolling tasks generator, ds_extra_mod_func is None because we just need to simulate to 2018-10-31 and needn't change the handler end time.
+        self.trainer = DelayTrainerRM(self.exp_name, self.task_pool)  # Also can be TrainerR, TrainerRM, DelayTrainerR
+        self.rolling_online_manager = OnlineManager(
+            RollingStrategy(exp_name, task_template=tasks, rolling_gen=self.rolling_gen),
+            trainer=self.trainer,
+            begin_time=self.start_time,
+        )
+        self.tasks = tasks
+
+    # Reset all things to the first status, be careful to save important data
+    def reset(self):
+        TaskManager(self.task_pool).remove()
+        exp = R.get_exp(experiment_name=self.exp_name)
+        for rid in exp.list_recorders():
+            exp.delete_recorder(rid)
+
+    # Run this to run all workflow automatically
+    def main(self):
+        print("========== reset ==========")
+        self.reset()
+        print("========== simulate ==========")
+        self.rolling_online_manager.simulate(end_time=self.end_time)
+        print("========== collect results ==========")
+        print(self.rolling_online_manager.get_collector()())
+        print("========== signals ==========")
+        print(self.rolling_online_manager.get_signals())
+
+
+if __name__ == "__main__":
+    ## to run all workflow automatically with your own parameters, use the command below
+    # python online_management_simulate.py main --experiment_name="your_exp_name" --rolling_step=60
+    fire.Fire(OnlineSimulationExample)
--- a/examples/online_srv/rolling_online_management.py
+++ b/examples/online_srv/rolling_online_management.py
@@ -0,0 +1,181 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""
+This example shows how OnlineManager works with rolling tasks.
+There are four parts including first train, routine 1, add strategy and routine 2.
+Firstly, the OnlineManager will finish the first training and set trained models to `online` models.
+Next, the OnlineManager will finish a routine process, including update online prediction -> prepare tasks -> prepare new models -> prepare signals
+Then, we will add some new strategies to the OnlineManager. This will finish first training of new strategies.
+Finally, the OnlineManager will finish second routine and update all strategies.
+"""
+
+import os
+import fire
+import qlib
+from qlib.workflow import R
+from qlib.workflow.online.strategy import RollingStrategy
+from qlib.workflow.task.gen import RollingGen
+from qlib.workflow.online.manager import OnlineManager
+
+data_handler_config = {
+    "start_time": "2013-01-01",
+    "end_time": "2020-09-25",
+    "fit_start_time": "2013-01-01",
+    "fit_end_time": "2014-12-31",
+    "instruments": "csi100",
+}
+
+dataset_config = {
+    "class": "DatasetH",
+    "module_path": "qlib.data.dataset",
+    "kwargs": {
+        "handler": {
+            "class": "Alpha158",
+            "module_path": "qlib.contrib.data.handler",
+            "kwargs": data_handler_config,
+        },
+        "segments": {
+            "train": ("2013-01-01", "2014-12-31"),
+            "valid": ("2015-01-01", "2015-12-31"),
+            "test": ("2016-01-01", "2020-07-10"),
+        },
+    },
+}
+
+record_config = [
+    {
+        "class": "SignalRecord",
+        "module_path": "qlib.workflow.record_temp",
+    },
+    {
+        "class": "SigAnaRecord",
+        "module_path": "qlib.workflow.record_temp",
+    },
+]
+
+# use lgb model
+task_lgb_config = {
+    "model": {
+        "class": "LGBModel",
+        "module_path": "qlib.contrib.model.gbdt",
+    },
+    "dataset": dataset_config,
+    "record": record_config,
+}
+
+# use xgboost model
+task_xgboost_config = {
+    "model": {
+        "class": "XGBModel",
+        "module_path": "qlib.contrib.model.xgboost",
+    },
+    "dataset": dataset_config,
+    "record": record_config,
+}
+
+
+class RollingOnlineExample:
+    def __init__(
+        self,
+        provider_uri="~/.qlib/qlib_data/cn_data",
+        region="cn",
+        task_url="mongodb://10.0.0.4:27017/",
+        task_db_name="rolling_db",
+        rolling_step=550,
+        tasks=[task_xgboost_config],
+        add_tasks=[task_lgb_config],
+    ):
+        mongo_conf = {
+            "task_url": task_url,  # your MongoDB url
+            "task_db_name": task_db_name,  # database name
+        }
+        qlib.init(provider_uri=provider_uri, region=region, mongo=mongo_conf)
+        self.tasks = tasks
+        self.add_tasks = add_tasks
+        self.rolling_step = rolling_step
+        strategies = []
+        for task in tasks:
+            name_id = task["model"]["class"]  # NOTE: Assumption: The model class can specify only one strategy
+            strategies.append(
+                RollingStrategy(
+                    name_id,
+                    task,
+                    RollingGen(step=rolling_step, rtype=RollingGen.ROLL_SD),
+                )
+            )
+
+        self.rolling_online_manager = OnlineManager(strategies)
+
+    _ROLLING_MANAGER_PATH = (
+        ".RollingOnlineExample"  # the OnlineManager will dump to this file, for it can be loaded when calling routine.
+    )
+
+    # Reset all things to the first status, be careful to save important data
+    def reset(self):
+        for task in self.tasks + self.add_tasks:
+            name_id = task["model"]["class"]
+            exp = R.get_exp(experiment_name=name_id)
+            for rid in exp.list_recorders():
+                exp.delete_recorder(rid)
+
+        if os.path.exists(self._ROLLING_MANAGER_PATH):
+            os.remove(self._ROLLING_MANAGER_PATH)
+
+    def first_run(self):
+        print("========== reset ==========")
+        self.reset()
+        print("========== first_run ==========")
+        self.rolling_online_manager.first_train()
+        print("========== collect results ==========")
+        print(self.rolling_online_manager.get_collector()())
+        print("========== dump ==========")
+        self.rolling_online_manager.to_pickle(self._ROLLING_MANAGER_PATH)
+
+    def routine(self):
+        print("========== load ==========")
+        self.rolling_online_manager = OnlineManager.load(self._ROLLING_MANAGER_PATH)
+        print("========== routine ==========")
+        self.rolling_online_manager.routine()
+        print("========== collect results ==========")
+        print(self.rolling_online_manager.get_collector()())
+        print("========== signals ==========")
+        print(self.rolling_online_manager.get_signals())
+        print("========== dump ==========")
+        self.rolling_online_manager.to_pickle(self._ROLLING_MANAGER_PATH)
+
+    def add_strategy(self):
+        print("========== load ==========")
+        self.rolling_online_manager = OnlineManager.load(self._ROLLING_MANAGER_PATH)
+        print("========== add strategy ==========")
+        strategies = []
+        for task in self.add_tasks:
+            name_id = task["model"]["class"]  # NOTE: Assumption: The model class can specify only one strategy
+            strategies.append(
+                RollingStrategy(
+                    name_id,
+                    task,
+                    RollingGen(step=self.rolling_step, rtype=RollingGen.ROLL_SD),
+                )
+            )
+        self.rolling_online_manager.add_strategy(strategies=strategies)
+        print("========== dump ==========")
+        self.rolling_online_manager.to_pickle(self._ROLLING_MANAGER_PATH)
+
+    def main(self):
+        self.first_run()
+        self.routine()
+        self.add_strategy()
+        self.routine()
+
+
+if __name__ == "__main__":
+    ####### to train the first version's models, use the command below
+    # python rolling_online_management.py first_run
+
+    ####### to update the models and predictions after the trading time, use the command below
+    # python rolling_online_management.py routine
+
+    ####### to define your own parameters, use `--`
+    # python rolling_online_management.py first_run --exp_name='your_exp_name' --rolling_step=40
+    fire.Fire(RollingOnlineExample)
--- a/examples/online_srv/update_online_pred.py
+++ b/examples/online_srv/update_online_pred.py
@@ -0,0 +1,91 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""
+This example shows how OnlineTool works when we need update prediction.
+There are two parts including first_train and update_online_pred.
+Firstly, we will finish the training and set the trained models to the `online` models.
+Next, we will finish updating online predictions.
+"""
+import fire
+import qlib
+from qlib.config import REG_CN
+from qlib.model.trainer import task_train
+from qlib.workflow.online.utils import OnlineToolR
+
+data_handler_config = {
+    "start_time": "2008-01-01",
+    "end_time": "2020-08-01",
+    "fit_start_time": "2008-01-01",
+    "fit_end_time": "2014-12-31",
+    "instruments": "csi100",
+}
+
+task = {
+    "model": {
+        "class": "LGBModel",
+        "module_path": "qlib.contrib.model.gbdt",
+        "kwargs": {
+            "loss": "mse",
+            "colsample_bytree": 0.8879,
+            "learning_rate": 0.0421,
+            "subsample": 0.8789,
+            "lambda_l1": 205.6999,
+            "lambda_l2": 580.9768,
+            "max_depth": 8,
+            "num_leaves": 210,
+            "num_threads": 20,
+        },
+    },
+    "dataset": {
+        "class": "DatasetH",
+        "module_path": "qlib.data.dataset",
+        "kwargs": {
+            "handler": {
+                "class": "Alpha158",
+                "module_path": "qlib.contrib.data.handler",
+                "kwargs": data_handler_config,
+            },
+            "segments": {
+                "train": ("2008-01-01", "2014-12-31"),
+                "valid": ("2015-01-01", "2016-12-31"),
+                "test": ("2017-01-01", "2020-08-01"),
+            },
+        },
+    },
+    "record": {
+        "class": "SignalRecord",
+        "module_path": "qlib.workflow.record_temp",
+    },
+}
+
+
+class UpdatePredExample:
+    def __init__(
+        self, provider_uri="~/.qlib/qlib_data/cn_data", region=REG_CN, experiment_name="online_srv", task_config=task
+    ):
+        qlib.init(provider_uri=provider_uri, region=region)
+        self.experiment_name = experiment_name
+        self.online_tool = OnlineToolR(self.experiment_name)
+        self.task_config = task_config
+
+    def first_train(self):
+        rec = task_train(self.task_config, experiment_name=self.experiment_name)
+        self.online_tool.reset_online_tag(rec)  # set to online model
+
+    def update_online_pred(self):
+        self.online_tool.update_online_pred()
+
+    def main(self):
+        self.first_train()
+        self.update_online_pred()
+
+
+if __name__ == "__main__":
+    ## to train a model and set it to online model, use the command below
+    # python update_online_pred.py first_train
+    ## to update online predictions once a day, use the command below
+    # python update_online_pred.py update_online_pred
+    ## to see the whole process with your own parameters, use the command below
+    # python update_online_pred.py main --experiment_name="your_exp_name"
+    fire.Fire(UpdatePredExample)