online_serving V3

2026-07-05 12:00:58 +08:00 · 2021-03-18 09:30:01 +00:00
parent d33041dc24
commit 8abdd63869
9 changed files with 333 additions and 273 deletions
--- a/examples/taskmanager/task_manager_rolling.py
+++ b/examples/taskmanager/task_manager_rolling.py
@@ -3,6 +3,11 @@ from qlib.config import REG_CN
 from qlib.workflow.task.gen import RollingGen, task_generator
 from qlib.workflow.task.manage import TaskManager
 from qlib.config import C
+from qlib.workflow.task.manage import run_task
+from qlib.workflow.task.collect import RollingCollector
+from qlib.model.trainer import task_train
+from qlib.workflow import R
+from pprint import pprint

 data_handler_config = {
    "start_time": "2008-01-01",
@@ -60,51 +65,78 @@ task_xgboost_config = {
    "record": record_config,
 }

-provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
-qlib.init(provider_uri=provider_uri, region=REG_CN)
+# Reset all things to the first status, be careful to save important data
+def reset():
+    print("========== reset ==========")
+    TaskManager(task_pool=task_pool).remove()

-C["mongo"] = {
-    "task_url": "mongodb://localhost:27017/",  # maybe you need to change it to your url
-    "task_db_name": "rolling_db",
-}
+    # exp = R.get_exp(experiment_name=exp_name)

-exp_name = "rolling_exp"  # experiment name, will be used as the experiment in MLflow
-task_pool = "rolling_task"  # task pool name, will be used as the document in MongoDB
-
-tasks = task_generator(
-    task_xgboost_config,  # default task name
-    RollingGen(step=550, rtype=RollingGen.ROLL_SD),  # generate different date segment
-    task_lgb=task_lgb_config,  # use "task_lgb" as the task name
-)
-
-# Uncomment next two lines to see the generated tasks
-# from pprint import pprint
-# pprint(tasks)
-
-tm = TaskManager(task_pool=task_pool)
-tm.create_task(tasks)  # all tasks will be saved to MongoDB
-
-from qlib.workflow.task.manage import run_task
-from qlib.workflow.task.collect import TaskCollector
-from qlib.model.trainer import task_train
-
-run_task(task_train, task_pool, experiment_name=exp_name)  # all tasks will be trained using "task_train" method
+    # for rid in R.list_recorders():
+    #     exp.delete_recorder(rid)


-def get_task_key(task_config):
-    task_key = task_config["task_key"]
-    rolling_end_timestamp = task_config["dataset"]["kwargs"]["segments"]["test"][1]
-    return task_key, rolling_end_timestamp.strftime("%Y-%m-%d")
+# This part corresponds to "Task Generating" in the document
+def task_generating():
+
+    print("========== task_generating ==========")
+
+    tasks = task_generator(
+        tasks=[task_xgboost_config, task_lgb_config],
+        generators=RollingGen(step=550, rtype=RollingGen.ROLL_SD),  # generate different date segment
+    )
+
+    pprint(tasks)
+
+    return tasks


-def my_filter(task_config):
-    # only choose the results of "task_lgb" and test in 2019 from all tasks
-    task_key, rolling_end = get_task_key(task_config)
-    if task_key == "task_lgb" and rolling_end.startswith("2019"):
-        return True
-    return False
+# This part corresponds to "Task Storing" in the document
+def task_storing(tasks):
+    print("========== task_storing ==========")
+    tm = TaskManager(task_pool=task_pool)
+    tm.create_task(tasks)  # all tasks will be saved to MongoDB


-# name tasks by "get_task_key" and filter tasks by "my_filter"
-pred_rolling = TaskCollector.collect_predictions(exp_name, get_task_key, my_filter)
-pred_rolling
+# This part corresponds to "Task Running" in the document
+def task_running():
+    print("========== task_running ==========")
+    run_task(task_train, task_pool, experiment_name=exp_name)  # all tasks will be trained using "task_train" method
+
+
+# This part corresponds to "Task Collecting" in the document
+def task_collecting():
+    print("========== task_collecting ==========")
+
+    def get_task_key(task_config):
+        return task_config["model"]["class"]
+
+    def my_filter(recorder):
+        # only choose the results of "LGBModel"
+        task_key = get_task_key(rolling_collector.get_task(recorder))
+        if task_key == "LGBModel":
+            return True
+        return False
+
+    rolling_collector = RollingCollector(exp_name)
+    # group tasks by "get_task_key" and filter tasks by "my_filter"
+    pred_rolling = rolling_collector.collect_rolling_predictions(get_task_key, my_filter)
+    print(pred_rolling)
+
+
+if __name__ == "__main__":
+
+    provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
+    mongo_conf = {
+        "task_url": "mongodb://10.0.0.4:27017/",  # maybe you need to change it to your url
+        "task_db_name": "rolling_db",
+    }
+    exp_name = "rolling_exp"  # experiment name, will be used as the experiment in MLflow
+    task_pool = "rolling_task"  # task pool name, will be used as the document in MongoDB
+    qlib.init(provider_uri=provider_uri, region=REG_CN, mongo=mongo_conf)
+
+    reset()
+    tasks = task_generating()
+    task_storing(tasks)
+    task_running()
+    task_collecting()
--- a/examples/taskmanager/task_manager_rolling_with_updating.py
+++ b/examples/taskmanager/task_manager_rolling_with_updating.py
@@ -3,15 +3,14 @@ import fire
 import mlflow
 from qlib.config import C
 from qlib.workflow import R
+from pprint import pprint
 from qlib.config import REG_CN
 from qlib.model.trainer import task_train
 from qlib.workflow.task.manage import run_task
 from qlib.workflow.task.manage import TaskManager
-from qlib.workflow.task.utils import TimeAdjuster
-from qlib.workflow.task.update import ModelUpdater
-from qlib.workflow.task.collect import TaskCollector
+from qlib.workflow.task.collect import RollingCollector
 from qlib.workflow.task.gen import RollingGen, task_generator
-
+from qlib.workflow.task.online import RollingOnlineManager

 data_handler_config = {
    "start_time": "2013-01-01",
@@ -33,7 +32,7 @@ dataset_config = {
        "segments": {
            "train": ("2013-01-01", "2014-12-31"),
            "valid": ("2015-01-01", "2015-12-31"),
-            "test": ("2016-01-01", "2017-01-01"),
+            "test": ("2016-01-01", "2020-07-10"),
        },
    },
 }
@@ -69,16 +68,25 @@ task_xgboost_config = {
    "record": record_config,
 }

+
+def print_online_model():
+    print("Current 'online' model:")
+    for online in rolling_online_manager.list_online_model().values():
+        print(online.info["id"])
+    print("Current 'next online' model:")
+    for online in rolling_online_manager.list_next_online_model().values():
+        print(online.info["id"])
+
+
 # This part corresponds to "Task Generating" in the document
-def task_generating(**kwargs):
-    print("========================================= task_generating =========================================")
+def task_generating():

-    rolling_generator = RollingGen(step=rolling_step, rtype=RollingGen.ROLL_EX)
+    print("========== task_generating ==========")

-    tasks = task_generator(rolling_generator, **kwargs)
-
-    # See the generated tasks in a easy way
-    from pprint import pprint
+    tasks = task_generator(
+        tasks=[task_xgboost_config, task_lgb_config],
+        generators=rolling_gen,  # generate different date segment
+    )

    pprint(tasks)

@@ -87,49 +95,45 @@ def task_generating(**kwargs):

 # This part corresponds to "Task Storing" in the document
 def task_storing(tasks):
-    print("========================================= task_storing =========================================")
+    print("========== task_storing ==========")
    tm = TaskManager(task_pool=task_pool)
    tm.create_task(tasks)  # all tasks will be saved to MongoDB


 # This part corresponds to "Task Running" in the document
 def task_running():
-    print("========================================= task_running =========================================")
+    print("========== task_running ==========")
    run_task(task_train, task_pool, experiment_name=exp_name)  # all tasks will be trained using "task_train" method


 # This part corresponds to "Task Collecting" in the document
 def task_collecting():
-    print("========================================= task_collecting =========================================")
+    print("========== task_collecting ==========")

    def get_task_key(task_config):
-        task_key = task_config["task_key"]
-        rolling_end_timestamp = task_config["dataset"]["kwargs"]["segments"]["test"][1]
-        if rolling_end_timestamp == None:
-            rolling_end_timestamp = TimeAdjuster().last_date()
-        return task_key, rolling_end_timestamp.strftime("%Y-%m-%d")
+        return task_config["model"]["class"]

-    def lgb_filter(task_config):
-        # only choose the results of "task_lgb"
-        task_key, rolling_end = get_task_key(task_config)
-        if task_key == "task_lgb":
+    def my_filter(recorder):
+        # only choose the results of "LGBModel"
+        task_key = get_task_key(rolling_collector.get_task(recorder))
+        if task_key == "LGBModel":
            return True
        return False

-    task_collector = TaskCollector(exp_name)
-    pred_rolling = task_collector.collect_predictions(
-        get_task_key, lgb_filter
-    )  # name tasks by "get_task_key" and filter tasks by "my_filter"
+    rolling_collector = RollingCollector(exp_name)
+    # group tasks by "get_task_key" and filter tasks by "my_filter"
+    pred_rolling = rolling_collector.collect_rolling_predictions(get_task_key, my_filter)
    print(pred_rolling)


 # Reset all things to the first status, be careful to save important data
 def reset(force_end=False):
-    print("========================================= reset =========================================")
-    TaskManager(task_pool=task_pool).remove()
-
+    print("========== reset ==========")
+    task_manager.remove()
+    for error in task_manager.query():
+        assert False
    exp = R.get_exp(experiment_name=exp_name)
-    recs = TaskCollector(exp_name).list_recorders(only_finished=True)
+    recs = exp.list_recorders()

    for rid in recs:
        exp.delete_recorder(rid)
@@ -141,82 +145,60 @@ def reset(force_end=False):
        pass


-def set_online_model_to_latest():
-    print(
-        "========================================= set_online_model_to_latest ========================================="
-    )
-    model_updater = ModelUpdater(experiment_name=exp_name)
-    latest_records, latest_test = model_updater.collect_latest_records()
-    model_updater.reset_online_model(latest_records.values())
-
-
 # Run this firstly to see the workflow in Task Management
 def first_run():
-    print("========================================= first_run =========================================")
+    print("========== first_run ==========")
    reset(force_end=True)

-    # use "task_lgb" and "task_xgboost" as the task name
-    tasks = task_generating(**{"task_xgboost": task_xgboost_config, "task_lgb": task_lgb_config})
+    tasks = task_generating()
    task_storing(tasks)
    task_running()
    task_collecting()
-    set_online_model_to_latest()
+
+    rolling_online_manager.set_latest_model_to_next_online()
+    rolling_online_manager.reset_online_model()


 # Update the predictions of online model
 def update_predictions():
-    print("========================================= update_predictions =========================================")
-    model_updater = ModelUpdater(experiment_name=exp_name)
-    model_updater.update_online_pred()
+    print("========== update_predictions ==========")
+    rolling_online_manager.update_online_pred()
+    task_collecting()
+    # if there are some next_online_model, then online them. if no, still use current online_model.
+    print_online_model()
+    rolling_online_manager.reset_online_model()
+    print_online_model()


 # Update the models using the latest date and set them to online model
 def update_model():
-    print("========================================= update_model =========================================")
-    # get the latest recorders
-    model_updater = ModelUpdater(experiment_name=exp_name)
-    latest_records, latest_test = model_updater.collect_latest_records()
-    # date adjustment based on trade day of Calendar in Qlib
-    time_adjuster = TimeAdjuster()
-    calendar_latest = time_adjuster.last_date()
-    print("The latest date is ", calendar_latest)
-    if time_adjuster.cal_interval(calendar_latest, latest_test[0]) > rolling_step:
-        print("Need update models!")
-        tasks = {}
-        for rid, rec in latest_records.items():
-            old_task = rec.task
-            test_begin = old_task["dataset"]["kwargs"]["segments"]["test"][0]
-            # modify the test segment to generate new tasks
-            old_task["dataset"]["kwargs"]["segments"]["test"] = (test_begin, calendar_latest)
-            tasks[old_task["task_key"]] = old_task
+    print("========== update_model ==========")
+    rolling_online_manager.prepare_new_models()
+    print_online_model()
+    rolling_online_manager.set_latest_model_to_next_online()
+    print_online_model()

-        # retrain the latest model
-        new_tasks = task_generating(**tasks)
-        task_storing(new_tasks)
-        task_running()
-        task_collecting()
-        latest_records, _ = model_updater.collect_latest_records()

-    # set the latest model to online model
-    model_updater.reset_online_model(latest_records.values())
+def after_day():
+    rolling_online_manager.prepare_signals()
+    update_model()
+    update_predictions()


 # Run whole workflow completely
 def whole_workflow():
-    print("========================================= whole_workflow =========================================")
+    print("========== whole_workflow ==========")
    # run this at the first time
    first_run()
-    # run this every day
-    update_predictions()
-    # run this every "rolling_steps" day
-    update_model()
+    # run this every day after trading
+    after_day()


 if __name__ == "__main__":
    ####### to train the first version's models, use the command below
    # python task_manager_rolling_with_updating.py first_run

-    ####### to update the models using the latest date and set them to online model, use the command below
+    ####### to update the models using the latest date, use the command below
    # python task_manager_rolling_with_updating.py update_model

    ####### to update the predictions to the latest date, use the command below
@@ -231,8 +213,8 @@ if __name__ == "__main__":
    qlib.init(provider_uri=provider_uri, region=REG_CN)

    C["mongo"] = {
-        "task_url": "mongodb://localhost:27017/",  # your MongoDB url
-        "task_db_name": "rolling_db",  # database name
+        "task_url": "mongodb://10.0.0.4:27017/",  # your MongoDB url
+        "task_db_name": "online",  # database name
    }

    exp_name = "rolling_exp"  # experiment name, will be used as the experiment in MLflow
@@ -240,5 +222,9 @@ if __name__ == "__main__":
    rolling_step = 550

    ##########################################################################################
-
+    rolling_gen = RollingGen(step=550, rtype=RollingGen.ROLL_SD)
+    rolling_online_manager = RollingOnlineManager(
+        experiment_name=exp_name, rolling_gen=rolling_gen, task_pool=task_pool
+    )
+    task_manager = TaskManager(task_pool=task_pool)
    fire.Fire()