mirror of
https://github.com/microsoft/qlib.git
synced 2026-07-05 12:00:58 +08:00
online_serving V3
This commit is contained in:
@@ -3,6 +3,11 @@ from qlib.config import REG_CN
|
||||
from qlib.workflow.task.gen import RollingGen, task_generator
|
||||
from qlib.workflow.task.manage import TaskManager
|
||||
from qlib.config import C
|
||||
from qlib.workflow.task.manage import run_task
|
||||
from qlib.workflow.task.collect import RollingCollector
|
||||
from qlib.model.trainer import task_train
|
||||
from qlib.workflow import R
|
||||
from pprint import pprint
|
||||
|
||||
data_handler_config = {
|
||||
"start_time": "2008-01-01",
|
||||
@@ -60,51 +65,78 @@ task_xgboost_config = {
|
||||
"record": record_config,
|
||||
}
|
||||
|
||||
provider_uri = "~/.qlib/qlib_data/cn_data" # target_dir
|
||||
qlib.init(provider_uri=provider_uri, region=REG_CN)
|
||||
# Reset all things to the first status, be careful to save important data
|
||||
def reset():
|
||||
print("========== reset ==========")
|
||||
TaskManager(task_pool=task_pool).remove()
|
||||
|
||||
C["mongo"] = {
|
||||
"task_url": "mongodb://localhost:27017/", # maybe you need to change it to your url
|
||||
"task_db_name": "rolling_db",
|
||||
}
|
||||
# exp = R.get_exp(experiment_name=exp_name)
|
||||
|
||||
exp_name = "rolling_exp" # experiment name, will be used as the experiment in MLflow
|
||||
task_pool = "rolling_task" # task pool name, will be used as the document in MongoDB
|
||||
|
||||
tasks = task_generator(
|
||||
task_xgboost_config, # default task name
|
||||
RollingGen(step=550, rtype=RollingGen.ROLL_SD), # generate different date segment
|
||||
task_lgb=task_lgb_config, # use "task_lgb" as the task name
|
||||
)
|
||||
|
||||
# Uncomment next two lines to see the generated tasks
|
||||
# from pprint import pprint
|
||||
# pprint(tasks)
|
||||
|
||||
tm = TaskManager(task_pool=task_pool)
|
||||
tm.create_task(tasks) # all tasks will be saved to MongoDB
|
||||
|
||||
from qlib.workflow.task.manage import run_task
|
||||
from qlib.workflow.task.collect import TaskCollector
|
||||
from qlib.model.trainer import task_train
|
||||
|
||||
run_task(task_train, task_pool, experiment_name=exp_name) # all tasks will be trained using "task_train" method
|
||||
# for rid in R.list_recorders():
|
||||
# exp.delete_recorder(rid)
|
||||
|
||||
|
||||
def get_task_key(task_config):
|
||||
task_key = task_config["task_key"]
|
||||
rolling_end_timestamp = task_config["dataset"]["kwargs"]["segments"]["test"][1]
|
||||
return task_key, rolling_end_timestamp.strftime("%Y-%m-%d")
|
||||
# This part corresponds to "Task Generating" in the document
|
||||
def task_generating():
|
||||
|
||||
print("========== task_generating ==========")
|
||||
|
||||
tasks = task_generator(
|
||||
tasks=[task_xgboost_config, task_lgb_config],
|
||||
generators=RollingGen(step=550, rtype=RollingGen.ROLL_SD), # generate different date segment
|
||||
)
|
||||
|
||||
pprint(tasks)
|
||||
|
||||
return tasks
|
||||
|
||||
|
||||
def my_filter(task_config):
|
||||
# only choose the results of "task_lgb" and test in 2019 from all tasks
|
||||
task_key, rolling_end = get_task_key(task_config)
|
||||
if task_key == "task_lgb" and rolling_end.startswith("2019"):
|
||||
return True
|
||||
return False
|
||||
# This part corresponds to "Task Storing" in the document
|
||||
def task_storing(tasks):
|
||||
print("========== task_storing ==========")
|
||||
tm = TaskManager(task_pool=task_pool)
|
||||
tm.create_task(tasks) # all tasks will be saved to MongoDB
|
||||
|
||||
|
||||
# name tasks by "get_task_key" and filter tasks by "my_filter"
|
||||
pred_rolling = TaskCollector.collect_predictions(exp_name, get_task_key, my_filter)
|
||||
pred_rolling
|
||||
# This part corresponds to "Task Running" in the document
|
||||
def task_running():
|
||||
print("========== task_running ==========")
|
||||
run_task(task_train, task_pool, experiment_name=exp_name) # all tasks will be trained using "task_train" method
|
||||
|
||||
|
||||
# This part corresponds to "Task Collecting" in the document
|
||||
def task_collecting():
|
||||
print("========== task_collecting ==========")
|
||||
|
||||
def get_task_key(task_config):
|
||||
return task_config["model"]["class"]
|
||||
|
||||
def my_filter(recorder):
|
||||
# only choose the results of "LGBModel"
|
||||
task_key = get_task_key(rolling_collector.get_task(recorder))
|
||||
if task_key == "LGBModel":
|
||||
return True
|
||||
return False
|
||||
|
||||
rolling_collector = RollingCollector(exp_name)
|
||||
# group tasks by "get_task_key" and filter tasks by "my_filter"
|
||||
pred_rolling = rolling_collector.collect_rolling_predictions(get_task_key, my_filter)
|
||||
print(pred_rolling)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
provider_uri = "~/.qlib/qlib_data/cn_data" # target_dir
|
||||
mongo_conf = {
|
||||
"task_url": "mongodb://10.0.0.4:27017/", # maybe you need to change it to your url
|
||||
"task_db_name": "rolling_db",
|
||||
}
|
||||
exp_name = "rolling_exp" # experiment name, will be used as the experiment in MLflow
|
||||
task_pool = "rolling_task" # task pool name, will be used as the document in MongoDB
|
||||
qlib.init(provider_uri=provider_uri, region=REG_CN, mongo=mongo_conf)
|
||||
|
||||
reset()
|
||||
tasks = task_generating()
|
||||
task_storing(tasks)
|
||||
task_running()
|
||||
task_collecting()
|
||||
|
||||
@@ -3,15 +3,14 @@ import fire
|
||||
import mlflow
|
||||
from qlib.config import C
|
||||
from qlib.workflow import R
|
||||
from pprint import pprint
|
||||
from qlib.config import REG_CN
|
||||
from qlib.model.trainer import task_train
|
||||
from qlib.workflow.task.manage import run_task
|
||||
from qlib.workflow.task.manage import TaskManager
|
||||
from qlib.workflow.task.utils import TimeAdjuster
|
||||
from qlib.workflow.task.update import ModelUpdater
|
||||
from qlib.workflow.task.collect import TaskCollector
|
||||
from qlib.workflow.task.collect import RollingCollector
|
||||
from qlib.workflow.task.gen import RollingGen, task_generator
|
||||
|
||||
from qlib.workflow.task.online import RollingOnlineManager
|
||||
|
||||
data_handler_config = {
|
||||
"start_time": "2013-01-01",
|
||||
@@ -33,7 +32,7 @@ dataset_config = {
|
||||
"segments": {
|
||||
"train": ("2013-01-01", "2014-12-31"),
|
||||
"valid": ("2015-01-01", "2015-12-31"),
|
||||
"test": ("2016-01-01", "2017-01-01"),
|
||||
"test": ("2016-01-01", "2020-07-10"),
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -69,16 +68,25 @@ task_xgboost_config = {
|
||||
"record": record_config,
|
||||
}
|
||||
|
||||
|
||||
def print_online_model():
|
||||
print("Current 'online' model:")
|
||||
for online in rolling_online_manager.list_online_model().values():
|
||||
print(online.info["id"])
|
||||
print("Current 'next online' model:")
|
||||
for online in rolling_online_manager.list_next_online_model().values():
|
||||
print(online.info["id"])
|
||||
|
||||
|
||||
# This part corresponds to "Task Generating" in the document
|
||||
def task_generating(**kwargs):
|
||||
print("========================================= task_generating =========================================")
|
||||
def task_generating():
|
||||
|
||||
rolling_generator = RollingGen(step=rolling_step, rtype=RollingGen.ROLL_EX)
|
||||
print("========== task_generating ==========")
|
||||
|
||||
tasks = task_generator(rolling_generator, **kwargs)
|
||||
|
||||
# See the generated tasks in a easy way
|
||||
from pprint import pprint
|
||||
tasks = task_generator(
|
||||
tasks=[task_xgboost_config, task_lgb_config],
|
||||
generators=rolling_gen, # generate different date segment
|
||||
)
|
||||
|
||||
pprint(tasks)
|
||||
|
||||
@@ -87,49 +95,45 @@ def task_generating(**kwargs):
|
||||
|
||||
# This part corresponds to "Task Storing" in the document
|
||||
def task_storing(tasks):
|
||||
print("========================================= task_storing =========================================")
|
||||
print("========== task_storing ==========")
|
||||
tm = TaskManager(task_pool=task_pool)
|
||||
tm.create_task(tasks) # all tasks will be saved to MongoDB
|
||||
|
||||
|
||||
# This part corresponds to "Task Running" in the document
|
||||
def task_running():
|
||||
print("========================================= task_running =========================================")
|
||||
print("========== task_running ==========")
|
||||
run_task(task_train, task_pool, experiment_name=exp_name) # all tasks will be trained using "task_train" method
|
||||
|
||||
|
||||
# This part corresponds to "Task Collecting" in the document
|
||||
def task_collecting():
|
||||
print("========================================= task_collecting =========================================")
|
||||
print("========== task_collecting ==========")
|
||||
|
||||
def get_task_key(task_config):
|
||||
task_key = task_config["task_key"]
|
||||
rolling_end_timestamp = task_config["dataset"]["kwargs"]["segments"]["test"][1]
|
||||
if rolling_end_timestamp == None:
|
||||
rolling_end_timestamp = TimeAdjuster().last_date()
|
||||
return task_key, rolling_end_timestamp.strftime("%Y-%m-%d")
|
||||
return task_config["model"]["class"]
|
||||
|
||||
def lgb_filter(task_config):
|
||||
# only choose the results of "task_lgb"
|
||||
task_key, rolling_end = get_task_key(task_config)
|
||||
if task_key == "task_lgb":
|
||||
def my_filter(recorder):
|
||||
# only choose the results of "LGBModel"
|
||||
task_key = get_task_key(rolling_collector.get_task(recorder))
|
||||
if task_key == "LGBModel":
|
||||
return True
|
||||
return False
|
||||
|
||||
task_collector = TaskCollector(exp_name)
|
||||
pred_rolling = task_collector.collect_predictions(
|
||||
get_task_key, lgb_filter
|
||||
) # name tasks by "get_task_key" and filter tasks by "my_filter"
|
||||
rolling_collector = RollingCollector(exp_name)
|
||||
# group tasks by "get_task_key" and filter tasks by "my_filter"
|
||||
pred_rolling = rolling_collector.collect_rolling_predictions(get_task_key, my_filter)
|
||||
print(pred_rolling)
|
||||
|
||||
|
||||
# Reset all things to the first status, be careful to save important data
|
||||
def reset(force_end=False):
|
||||
print("========================================= reset =========================================")
|
||||
TaskManager(task_pool=task_pool).remove()
|
||||
|
||||
print("========== reset ==========")
|
||||
task_manager.remove()
|
||||
for error in task_manager.query():
|
||||
assert False
|
||||
exp = R.get_exp(experiment_name=exp_name)
|
||||
recs = TaskCollector(exp_name).list_recorders(only_finished=True)
|
||||
recs = exp.list_recorders()
|
||||
|
||||
for rid in recs:
|
||||
exp.delete_recorder(rid)
|
||||
@@ -141,82 +145,60 @@ def reset(force_end=False):
|
||||
pass
|
||||
|
||||
|
||||
def set_online_model_to_latest():
|
||||
print(
|
||||
"========================================= set_online_model_to_latest ========================================="
|
||||
)
|
||||
model_updater = ModelUpdater(experiment_name=exp_name)
|
||||
latest_records, latest_test = model_updater.collect_latest_records()
|
||||
model_updater.reset_online_model(latest_records.values())
|
||||
|
||||
|
||||
# Run this firstly to see the workflow in Task Management
|
||||
def first_run():
|
||||
print("========================================= first_run =========================================")
|
||||
print("========== first_run ==========")
|
||||
reset(force_end=True)
|
||||
|
||||
# use "task_lgb" and "task_xgboost" as the task name
|
||||
tasks = task_generating(**{"task_xgboost": task_xgboost_config, "task_lgb": task_lgb_config})
|
||||
tasks = task_generating()
|
||||
task_storing(tasks)
|
||||
task_running()
|
||||
task_collecting()
|
||||
set_online_model_to_latest()
|
||||
|
||||
rolling_online_manager.set_latest_model_to_next_online()
|
||||
rolling_online_manager.reset_online_model()
|
||||
|
||||
|
||||
# Update the predictions of online model
|
||||
def update_predictions():
|
||||
print("========================================= update_predictions =========================================")
|
||||
model_updater = ModelUpdater(experiment_name=exp_name)
|
||||
model_updater.update_online_pred()
|
||||
print("========== update_predictions ==========")
|
||||
rolling_online_manager.update_online_pred()
|
||||
task_collecting()
|
||||
# if there are some next_online_model, then online them. if no, still use current online_model.
|
||||
print_online_model()
|
||||
rolling_online_manager.reset_online_model()
|
||||
print_online_model()
|
||||
|
||||
|
||||
# Update the models using the latest date and set them to online model
|
||||
def update_model():
|
||||
print("========================================= update_model =========================================")
|
||||
# get the latest recorders
|
||||
model_updater = ModelUpdater(experiment_name=exp_name)
|
||||
latest_records, latest_test = model_updater.collect_latest_records()
|
||||
# date adjustment based on trade day of Calendar in Qlib
|
||||
time_adjuster = TimeAdjuster()
|
||||
calendar_latest = time_adjuster.last_date()
|
||||
print("The latest date is ", calendar_latest)
|
||||
if time_adjuster.cal_interval(calendar_latest, latest_test[0]) > rolling_step:
|
||||
print("Need update models!")
|
||||
tasks = {}
|
||||
for rid, rec in latest_records.items():
|
||||
old_task = rec.task
|
||||
test_begin = old_task["dataset"]["kwargs"]["segments"]["test"][0]
|
||||
# modify the test segment to generate new tasks
|
||||
old_task["dataset"]["kwargs"]["segments"]["test"] = (test_begin, calendar_latest)
|
||||
tasks[old_task["task_key"]] = old_task
|
||||
print("========== update_model ==========")
|
||||
rolling_online_manager.prepare_new_models()
|
||||
print_online_model()
|
||||
rolling_online_manager.set_latest_model_to_next_online()
|
||||
print_online_model()
|
||||
|
||||
# retrain the latest model
|
||||
new_tasks = task_generating(**tasks)
|
||||
task_storing(new_tasks)
|
||||
task_running()
|
||||
task_collecting()
|
||||
latest_records, _ = model_updater.collect_latest_records()
|
||||
|
||||
# set the latest model to online model
|
||||
model_updater.reset_online_model(latest_records.values())
|
||||
def after_day():
|
||||
rolling_online_manager.prepare_signals()
|
||||
update_model()
|
||||
update_predictions()
|
||||
|
||||
|
||||
# Run whole workflow completely
|
||||
def whole_workflow():
|
||||
print("========================================= whole_workflow =========================================")
|
||||
print("========== whole_workflow ==========")
|
||||
# run this at the first time
|
||||
first_run()
|
||||
# run this every day
|
||||
update_predictions()
|
||||
# run this every "rolling_steps" day
|
||||
update_model()
|
||||
# run this every day after trading
|
||||
after_day()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
####### to train the first version's models, use the command below
|
||||
# python task_manager_rolling_with_updating.py first_run
|
||||
|
||||
####### to update the models using the latest date and set them to online model, use the command below
|
||||
####### to update the models using the latest date, use the command below
|
||||
# python task_manager_rolling_with_updating.py update_model
|
||||
|
||||
####### to update the predictions to the latest date, use the command below
|
||||
@@ -231,8 +213,8 @@ if __name__ == "__main__":
|
||||
qlib.init(provider_uri=provider_uri, region=REG_CN)
|
||||
|
||||
C["mongo"] = {
|
||||
"task_url": "mongodb://localhost:27017/", # your MongoDB url
|
||||
"task_db_name": "rolling_db", # database name
|
||||
"task_url": "mongodb://10.0.0.4:27017/", # your MongoDB url
|
||||
"task_db_name": "online", # database name
|
||||
}
|
||||
|
||||
exp_name = "rolling_exp" # experiment name, will be used as the experiment in MLflow
|
||||
@@ -240,5 +222,9 @@ if __name__ == "__main__":
|
||||
rolling_step = 550
|
||||
|
||||
##########################################################################################
|
||||
|
||||
rolling_gen = RollingGen(step=550, rtype=RollingGen.ROLL_SD)
|
||||
rolling_online_manager = RollingOnlineManager(
|
||||
experiment_name=exp_name, rolling_gen=rolling_gen, task_pool=task_pool
|
||||
)
|
||||
task_manager = TaskManager(task_pool=task_pool)
|
||||
fire.Fire()
|
||||
|
||||
Reference in New Issue
Block a user