from pprint import pprint import fire import qlib from qlib.config import REG_CN from qlib.model.trainer import task_train from qlib.workflow import R from qlib.workflow.task.collect import RollingCollector from qlib.workflow.task.gen import RollingGen, task_generator from qlib.workflow.task.manage import TaskManager, run_task data_handler_config = { "start_time": "2008-01-01", "end_time": "2020-08-01", "fit_start_time": "2008-01-01", "fit_end_time": "2014-12-31", "instruments": "csi100", } dataset_config = { "class": "DatasetH", "module_path": "qlib.data.dataset", "kwargs": { "handler": { "class": "Alpha158", "module_path": "qlib.contrib.data.handler", "kwargs": data_handler_config, }, "segments": { "train": ("2008-01-01", "2014-12-31"), "valid": ("2015-01-01", "2016-12-31"), "test": ("2017-01-01", "2020-08-01"), }, }, } record_config = [ { "class": "SignalRecord", "module_path": "qlib.workflow.record_temp", }, { "class": "SigAnaRecord", "module_path": "qlib.workflow.record_temp", }, ] # use lgb task_lgb_config = { "model": { "class": "LGBModel", "module_path": "qlib.contrib.model.gbdt", }, "dataset": dataset_config, "record": record_config, } # use xgboost task_xgboost_config = { "model": { "class": "XGBModel", "module_path": "qlib.contrib.model.xgboost", }, "dataset": dataset_config, "record": record_config, } # Reset all things to the first status, be careful to save important data def reset(task_pool, exp_name): print("========== reset ==========") TaskManager(task_pool=task_pool).remove() exp, _ = R.exp_manager._get_or_create_exp(experiment_name=exp_name) for rid in exp.list_recorders(): exp.delete_recorder(rid) # This part corresponds to "Task Generating" in the document def task_generating(): print("========== task_generating ==========") tasks = task_generator( tasks=[task_xgboost_config, task_lgb_config], generators=RollingGen(step=550, rtype=RollingGen.ROLL_SD), # generate different date segment ) pprint(tasks) return tasks # This part corresponds to "Task Storing" in the document def task_storing(tasks, task_pool, exp_name): print("========== task_storing ==========") tm = TaskManager(task_pool=task_pool) tm.create_task(tasks) # all tasks will be saved to MongoDB # This part corresponds to "Task Running" in the document def task_running(task_pool, exp_name): print("========== task_running ==========") run_task(task_train, task_pool, experiment_name=exp_name) # all tasks will be trained using "task_train" method # This part corresponds to "Task Collecting" in the document def task_collecting(task_pool, exp_name): print("========== task_collecting ==========") def get_group_key_func(recorder): task_config = recorder.load_object("task") return task_config["model"]["class"] def my_filter(recorder): # only choose the results of "LGBModel" task_key = get_group_key_func(recorder) if task_key == "LGBModel": return True return False rolling_collector = RollingCollector(exp_name) # group tasks by "get_task_key" and filter tasks by "my_filter" pred_rolling = rolling_collector.collect(get_group_key_func, my_filter) print(pred_rolling) def main( provider_uri="~/.qlib/qlib_data/cn_data", task_url="mongodb://10.0.0.4:27017/", task_db_name="rolling_db", exp_name="rolling_exp", task_pool="rolling_task", ): mongo_conf = { "task_url": task_url, "task_db_name": task_db_name, } qlib.init(provider_uri=provider_uri, region=REG_CN, mongo=mongo_conf) reset(task_pool, exp_name) tasks = task_generating() task_storing(tasks, task_pool, exp_name) task_running(task_pool, exp_name) task_collecting(task_pool, exp_name) if __name__ == "__main__": fire.Fire()