mirror of
https://github.com/microsoft/qlib.git
synced 2026-07-01 18:11:18 +08:00
231 lines
6.7 KiB
Python
231 lines
6.7 KiB
Python
import qlib
|
|
import fire
|
|
import mlflow
|
|
from qlib.config import C
|
|
from qlib.workflow import R
|
|
from pprint import pprint
|
|
from qlib.config import REG_CN
|
|
from qlib.model.trainer import task_train
|
|
from qlib.workflow.task.manage import run_task
|
|
from qlib.workflow.task.manage import TaskManager
|
|
from qlib.workflow.task.collect import RollingCollector
|
|
from qlib.workflow.task.gen import RollingGen, task_generator
|
|
from qlib.workflow.task.online import RollingOnlineManager
|
|
|
|
data_handler_config = {
|
|
"start_time": "2013-01-01",
|
|
"end_time": "2020-09-25",
|
|
"fit_start_time": "2013-01-01",
|
|
"fit_end_time": "2014-12-31",
|
|
"instruments": "csi100",
|
|
}
|
|
|
|
dataset_config = {
|
|
"class": "DatasetH",
|
|
"module_path": "qlib.data.dataset",
|
|
"kwargs": {
|
|
"handler": {
|
|
"class": "Alpha158",
|
|
"module_path": "qlib.contrib.data.handler",
|
|
"kwargs": data_handler_config,
|
|
},
|
|
"segments": {
|
|
"train": ("2013-01-01", "2014-12-31"),
|
|
"valid": ("2015-01-01", "2015-12-31"),
|
|
"test": ("2016-01-01", "2020-07-10"),
|
|
},
|
|
},
|
|
}
|
|
|
|
record_config = [
|
|
{
|
|
"class": "SignalRecord",
|
|
"module_path": "qlib.workflow.record_temp",
|
|
},
|
|
{
|
|
"class": "SigAnaRecord",
|
|
"module_path": "qlib.workflow.record_temp",
|
|
},
|
|
]
|
|
|
|
# use lgb model
|
|
task_lgb_config = {
|
|
"model": {
|
|
"class": "LGBModel",
|
|
"module_path": "qlib.contrib.model.gbdt",
|
|
},
|
|
"dataset": dataset_config,
|
|
"record": record_config,
|
|
}
|
|
|
|
# use xgboost model
|
|
task_xgboost_config = {
|
|
"model": {
|
|
"class": "XGBModel",
|
|
"module_path": "qlib.contrib.model.xgboost",
|
|
},
|
|
"dataset": dataset_config,
|
|
"record": record_config,
|
|
}
|
|
|
|
|
|
def print_online_model():
|
|
print("Current 'online' model:")
|
|
for online in rolling_online_manager.list_online_model().values():
|
|
print(online.info["id"])
|
|
print("Current 'next online' model:")
|
|
for online in rolling_online_manager.list_next_online_model().values():
|
|
print(online.info["id"])
|
|
|
|
|
|
# This part corresponds to "Task Generating" in the document
|
|
def task_generating():
|
|
|
|
print("========== task_generating ==========")
|
|
|
|
tasks = task_generator(
|
|
tasks=[task_xgboost_config, task_lgb_config],
|
|
generators=rolling_gen, # generate different date segment
|
|
)
|
|
|
|
pprint(tasks)
|
|
|
|
return tasks
|
|
|
|
|
|
# This part corresponds to "Task Storing" in the document
|
|
def task_storing(tasks):
|
|
print("========== task_storing ==========")
|
|
tm = TaskManager(task_pool=task_pool)
|
|
tm.create_task(tasks) # all tasks will be saved to MongoDB
|
|
|
|
|
|
# This part corresponds to "Task Running" in the document
|
|
def task_running():
|
|
print("========== task_running ==========")
|
|
run_task(task_train, task_pool, experiment_name=exp_name) # all tasks will be trained using "task_train" method
|
|
|
|
|
|
# This part corresponds to "Task Collecting" in the document
|
|
def task_collecting():
|
|
print("========== task_collecting ==========")
|
|
|
|
def get_task_key(task_config):
|
|
return task_config["model"]["class"]
|
|
|
|
def my_filter(recorder):
|
|
# only choose the results of "LGBModel"
|
|
task_key = get_task_key(rolling_collector.get_task(recorder))
|
|
if task_key == "LGBModel":
|
|
return True
|
|
return False
|
|
|
|
rolling_collector = RollingCollector(exp_name)
|
|
# group tasks by "get_task_key" and filter tasks by "my_filter"
|
|
pred_rolling = rolling_collector.collect_rolling_predictions(get_task_key, my_filter)
|
|
print(pred_rolling)
|
|
|
|
|
|
# Reset all things to the first status, be careful to save important data
|
|
def reset(force_end=False):
|
|
print("========== reset ==========")
|
|
task_manager.remove()
|
|
for error in task_manager.query():
|
|
assert False
|
|
exp = R.get_exp(experiment_name=exp_name)
|
|
recs = exp.list_recorders()
|
|
|
|
for rid in recs:
|
|
exp.delete_recorder(rid)
|
|
|
|
try:
|
|
if force_end:
|
|
mlflow.end_run()
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
# Run this firstly to see the workflow in Task Management
|
|
def first_run():
|
|
print("========== first_run ==========")
|
|
reset(force_end=True)
|
|
|
|
tasks = task_generating()
|
|
task_storing(tasks)
|
|
task_running()
|
|
task_collecting()
|
|
|
|
rolling_online_manager.set_latest_model_to_next_online()
|
|
rolling_online_manager.reset_online_model()
|
|
|
|
|
|
# Update the predictions of online model
|
|
def update_predictions():
|
|
print("========== update_predictions ==========")
|
|
rolling_online_manager.update_online_pred()
|
|
task_collecting()
|
|
# if there are some next_online_model, then online them. if no, still use current online_model.
|
|
print_online_model()
|
|
rolling_online_manager.reset_online_model()
|
|
print_online_model()
|
|
|
|
|
|
# Update the models using the latest date and set them to online model
|
|
def update_model():
|
|
print("========== update_model ==========")
|
|
rolling_online_manager.prepare_new_models()
|
|
print_online_model()
|
|
rolling_online_manager.set_latest_model_to_next_online()
|
|
print_online_model()
|
|
|
|
|
|
def after_day():
|
|
rolling_online_manager.prepare_signals()
|
|
update_model()
|
|
update_predictions()
|
|
|
|
|
|
# Run whole workflow completely
|
|
def whole_workflow():
|
|
print("========== whole_workflow ==========")
|
|
# run this at the first time
|
|
first_run()
|
|
# run this every day after trading
|
|
after_day()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
####### to train the first version's models, use the command below
|
|
# python task_manager_rolling_with_updating.py first_run
|
|
|
|
####### to update the models using the latest date, use the command below
|
|
# python task_manager_rolling_with_updating.py update_model
|
|
|
|
####### to update the predictions to the latest date, use the command below
|
|
# python task_manager_rolling_with_updating.py update_predictions
|
|
|
|
####### to run whole workflow completely, use the command below
|
|
# python task_manager_rolling_with_updating.py whole_workflow
|
|
|
|
#################### you need to finish the configurations below #########################
|
|
|
|
provider_uri = "~/.qlib/qlib_data/cn_data" # data_dir
|
|
qlib.init(provider_uri=provider_uri, region=REG_CN)
|
|
|
|
C["mongo"] = {
|
|
"task_url": "mongodb://10.0.0.4:27017/", # your MongoDB url
|
|
"task_db_name": "online", # database name
|
|
}
|
|
|
|
exp_name = "rolling_exp" # experiment name, will be used as the experiment in MLflow
|
|
task_pool = "rolling_task" # task pool name, will be used as the document in MongoDB
|
|
rolling_step = 550
|
|
|
|
##########################################################################################
|
|
rolling_gen = RollingGen(step=550, rtype=RollingGen.ROLL_SD)
|
|
rolling_online_manager = RollingOnlineManager(
|
|
experiment_name=exp_name, rolling_gen=rolling_gen, task_pool=task_pool
|
|
)
|
|
task_manager = TaskManager(task_pool=task_pool)
|
|
fire.Fire()
|