mirror of
https://github.com/microsoft/qlib.git
synced 2026-07-03 11:00:57 +08:00
online serving v10
This commit is contained in:
@@ -7,6 +7,7 @@ Ensemble can merge the objects in an Ensemble. For example, if there are many su
|
||||
|
||||
from typing import Union
|
||||
import pandas as pd
|
||||
from qlib.utils import flatten_dict
|
||||
|
||||
|
||||
class Ensemble:
|
||||
@@ -77,19 +78,22 @@ class RollingEnsemble(Ensemble):
|
||||
class AverageEnsemble(Ensemble):
|
||||
def __call__(self, ensemble_dict: dict):
|
||||
"""
|
||||
Average a dict of same shape dataframe like `prediction` or `IC` into an ensemble.
|
||||
Average and standardize a dict of same shape dataframe like `prediction` or `IC` into an ensemble.
|
||||
|
||||
NOTE: The values of dict must be pd.DataFrame, and have the index "datetime"
|
||||
NOTE: The values of dict must be pd.DataFrame, and have the index "datetime". If it is a nested dict, then flat it.
|
||||
|
||||
Args:
|
||||
ensemble_dict (dict): a dict like {"A": pd.DataFrame, "B": pd.DataFrame}.
|
||||
The key of the dict will be ignored.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: the complete result of averaging.
|
||||
pd.DataFrame: the complete result of averaging and standardizing.
|
||||
"""
|
||||
# need to flatten the nested dict
|
||||
ensemble_dict = flatten_dict(ensemble_dict)
|
||||
values = list(ensemble_dict.values())
|
||||
results = pd.concat(values, axis=1)
|
||||
results = results.mean(axis=1).to_frame("score")
|
||||
results = results.groupby("datetime").apply(lambda df: (df - df.mean()) / df.std())
|
||||
results = results.mean(axis=1)
|
||||
results = results.sort_index()
|
||||
return results
|
||||
|
||||
@@ -36,20 +36,36 @@ class Group:
|
||||
self._ens_func = ens
|
||||
|
||||
def group(self, *args, **kwargs) -> dict:
|
||||
# TODO: such design is weird when `_group_func` is the only configurable part in the class
|
||||
"""
|
||||
Group a set of object and change them to a dict.
|
||||
|
||||
For example: {(A,B,C1): object, (A,B,C2): object} -> {(A,B): {C1: object, C2: object}}
|
||||
|
||||
Returns:
|
||||
dict: grouped dict
|
||||
"""
|
||||
if isinstance(getattr(self, "_group_func", None), Callable):
|
||||
return self._group_func(*args, **kwargs)
|
||||
else:
|
||||
raise NotImplementedError(f"Please specify valid `group_func`.")
|
||||
|
||||
def reduce(self, *args, **kwargs) -> dict:
|
||||
"""
|
||||
Reduce grouped dict in some way.
|
||||
|
||||
For example: {(A,B): {C1: object, C2: object}} -> {(A,B): object}
|
||||
|
||||
Returns:
|
||||
dict: reduced dict
|
||||
"""
|
||||
if isinstance(getattr(self, "_ens_func", None), Callable):
|
||||
return self._ens_func(*args, **kwargs)
|
||||
else:
|
||||
raise NotImplementedError(f"Please specify valid `_ens_func`.")
|
||||
|
||||
def __call__(self, ungrouped_dict: dict, n_jobs=1, verbose=0, *args, **kwargs) -> dict:
|
||||
"""Group the ungrouped_dict into different groups.
|
||||
"""
|
||||
Group the ungrouped_dict into different groups.
|
||||
|
||||
Args:
|
||||
ungrouped_dict (dict): the ungrouped dict waiting for grouping like {name: things}
|
||||
|
||||
@@ -12,7 +12,6 @@ In ``DelayTrainer``, the first step is only to save some necessary info to model
|
||||
"""
|
||||
|
||||
import socket
|
||||
import time
|
||||
from typing import Callable, List
|
||||
|
||||
from qlib.data.dataset import Dataset
|
||||
@@ -145,12 +144,6 @@ class Trainer:
|
||||
"""
|
||||
return self.delay
|
||||
|
||||
def reset(self):
|
||||
"""
|
||||
Reset the Trainer status.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class TrainerR(Trainer):
|
||||
"""
|
||||
@@ -160,42 +153,52 @@ class TrainerR(Trainer):
|
||||
Assumption: models were defined by `task` and the results will saved to `Recorder`
|
||||
"""
|
||||
|
||||
def __init__(self, experiment_name: str, train_func: Callable = task_train):
|
||||
# Those tag will help you distinguish whether the Recorder has finished traning
|
||||
STATUS_KEY = "train_status"
|
||||
STATUS_BEGIN = "begin_task_train"
|
||||
STATUS_END = "end_task_train"
|
||||
|
||||
def __init__(self, experiment_name: str = None, train_func: Callable = task_train):
|
||||
"""
|
||||
Init TrainerR.
|
||||
|
||||
Args:
|
||||
experiment_name (str): the name of experiment.
|
||||
experiment_name (str, optional): the default name of experiment.
|
||||
train_func (Callable, optional): default training method. Defaults to `task_train`.
|
||||
"""
|
||||
super().__init__()
|
||||
self.experiment_name = experiment_name
|
||||
self.train_func = train_func
|
||||
|
||||
def train(self, tasks: list, train_func: Callable = None, **kwargs) -> List[Recorder]:
|
||||
def train(self, tasks: list, train_func: Callable = None, experiment_name: str = None, **kwargs) -> List[Recorder]:
|
||||
"""
|
||||
Given a list of `task`s and return a list of trained Recorder. The order can be guaranteed.
|
||||
|
||||
Args:
|
||||
tasks (list): a list of definition based on `task` dict
|
||||
train_func (Callable): the train method which need at least `task`s and `experiment_name`. None for default training method.
|
||||
experiment_name (str): the experiment name, None for use default name.
|
||||
kwargs: the params for train_func.
|
||||
|
||||
Returns:
|
||||
list: a list of Recorders
|
||||
"""
|
||||
if len(tasks) == 0:
|
||||
return []
|
||||
if train_func is None:
|
||||
train_func = self.train_func
|
||||
if experiment_name is None:
|
||||
experiment_name = self.experiment_name
|
||||
recs = []
|
||||
for task in tasks:
|
||||
rec = train_func(task, self.experiment_name, **kwargs)
|
||||
rec.set_tags(**{"train_status": "begin_task_train"})
|
||||
rec = train_func(task, experiment_name, **kwargs)
|
||||
rec.set_tags(**{self.STATUS_KEY: self.STATUS_BEGIN})
|
||||
recs.append(rec)
|
||||
return recs
|
||||
|
||||
def end_train(self, recs: list, **kwargs) -> list:
|
||||
def end_train(self, recs: list, **kwargs) -> List[Recorder]:
|
||||
for rec in recs:
|
||||
rec.set_tags(**{"train_status": "end_task_train"})
|
||||
rec.set_tags(**{self.STATUS_KEY: self.STATUS_END})
|
||||
return recs
|
||||
|
||||
|
||||
@@ -204,12 +207,12 @@ class DelayTrainerR(TrainerR):
|
||||
A delayed implementation based on TrainerR, which means `train` method may only do some preparation and `end_train` method can do the real model fitting.
|
||||
"""
|
||||
|
||||
def __init__(self, experiment_name, train_func=begin_task_train, end_train_func=end_task_train):
|
||||
def __init__(self, experiment_name: str = None, train_func=begin_task_train, end_train_func=end_task_train):
|
||||
"""
|
||||
Init TrainerRM.
|
||||
|
||||
Args:
|
||||
experiment_name (str): the name of experiment.
|
||||
experiment_name (str): the default name of experiment.
|
||||
train_func (Callable, optional): default train method. Defaults to `begin_task_train`.
|
||||
end_train_func (Callable, optional): default end_train method. Defaults to `end_task_train`.
|
||||
"""
|
||||
@@ -217,7 +220,7 @@ class DelayTrainerR(TrainerR):
|
||||
self.end_train_func = end_train_func
|
||||
self.delay = True
|
||||
|
||||
def end_train(self, recs, end_train_func=None, **kwargs) -> List[Recorder]:
|
||||
def end_train(self, recs, end_train_func=None, experiment_name: str = None, **kwargs) -> List[Recorder]:
|
||||
"""
|
||||
Given a list of Recorder and return a list of trained Recorder.
|
||||
This class will finish real data loading and model fitting.
|
||||
@@ -225,6 +228,7 @@ class DelayTrainerR(TrainerR):
|
||||
Args:
|
||||
recs (list): a list of Recorder, the tasks have been saved to them
|
||||
end_train_func (Callable, optional): the end_train method which need at least `recorder`s and `experiment_name`. Defaults to None for using self.end_train_func.
|
||||
experiment_name (str): the experiment name, None for use default name.
|
||||
kwargs: the params for end_train_func.
|
||||
|
||||
Returns:
|
||||
@@ -232,9 +236,13 @@ class DelayTrainerR(TrainerR):
|
||||
"""
|
||||
if end_train_func is None:
|
||||
end_train_func = self.end_train_func
|
||||
if experiment_name is None:
|
||||
experiment_name = self.experiment_name
|
||||
for rec in recs:
|
||||
end_train_func(rec, **kwargs)
|
||||
rec.set_tags(**{"train_status": "end_task_train"})
|
||||
if rec.list_tags()[self.STATUS_KEY] == self.STATUS_END:
|
||||
continue
|
||||
end_train_func(rec, experiment_name, **kwargs)
|
||||
rec.set_tags(**{self.STATUS_KEY: self.STATUS_END})
|
||||
return recs
|
||||
|
||||
|
||||
@@ -246,13 +254,18 @@ class TrainerRM(Trainer):
|
||||
Assumption: `task` will be saved to TaskManager and `task` will be fetched and trained from TaskManager
|
||||
"""
|
||||
|
||||
def __init__(self, experiment_name: str, task_pool: str, train_func=task_train):
|
||||
# Those tag will help you distinguish whether the Recorder has finished traning
|
||||
STATUS_KEY = "train_status"
|
||||
STATUS_BEGIN = "begin_task_train"
|
||||
STATUS_END = "end_task_train"
|
||||
|
||||
def __init__(self, experiment_name: str = None, task_pool: str = None, train_func=task_train):
|
||||
"""
|
||||
Init TrainerR.
|
||||
|
||||
Args:
|
||||
experiment_name (str): the name of experiment.
|
||||
task_pool (str): task pool name in TaskManager.
|
||||
experiment_name (str): the default name of experiment.
|
||||
task_pool (str): task pool name in TaskManager. None for use same name as experiment_name.
|
||||
train_func (Callable, optional): default training method. Defaults to `task_train`.
|
||||
"""
|
||||
super().__init__()
|
||||
@@ -264,6 +277,7 @@ class TrainerRM(Trainer):
|
||||
self,
|
||||
tasks: list,
|
||||
train_func: Callable = None,
|
||||
experiment_name: str = None,
|
||||
before_status: str = TaskManager.STATUS_WAITING,
|
||||
after_status: str = TaskManager.STATUS_DONE,
|
||||
**kwargs,
|
||||
@@ -277,6 +291,7 @@ class TrainerRM(Trainer):
|
||||
Args:
|
||||
tasks (list): a list of definition based on `task` dict
|
||||
train_func (Callable): the train method which need at least `task`s and `experiment_name`. None for default training method.
|
||||
experiment_name (str): the experiment name, None for use default name.
|
||||
before_status (str): the tasks in before_status will be fetched and trained. Can be STATUS_WAITING, STATUS_PART_DONE.
|
||||
after_status (str): the tasks after trained will become after_status. Can be STATUS_WAITING, STATUS_PART_DONE.
|
||||
kwargs: the params for train_func.
|
||||
@@ -284,14 +299,21 @@ class TrainerRM(Trainer):
|
||||
Returns:
|
||||
list: a list of Recorders
|
||||
"""
|
||||
if len(tasks) == 0:
|
||||
return []
|
||||
if train_func is None:
|
||||
train_func = self.train_func
|
||||
tm = TaskManager(task_pool=self.task_pool)
|
||||
if experiment_name is None:
|
||||
experiment_name = self.experiment_name
|
||||
task_pool = self.task_pool
|
||||
if task_pool is None:
|
||||
task_pool = experiment_name
|
||||
tm = TaskManager(task_pool=task_pool)
|
||||
_id_list = tm.create_task(tasks) # all tasks will be saved to MongoDB
|
||||
run_task(
|
||||
train_func,
|
||||
self.task_pool,
|
||||
experiment_name=self.experiment_name,
|
||||
task_pool,
|
||||
experiment_name=experiment_name,
|
||||
before_status=before_status,
|
||||
after_status=after_status,
|
||||
**kwargs,
|
||||
@@ -300,23 +322,15 @@ class TrainerRM(Trainer):
|
||||
recs = []
|
||||
for _id in _id_list:
|
||||
rec = tm.re_query(_id)["res"]
|
||||
rec.set_tags(**{"train_status": "begin_task_train"})
|
||||
rec.set_tags(**{self.STATUS_KEY: self.STATUS_BEGIN})
|
||||
recs.append(rec)
|
||||
return recs
|
||||
|
||||
def end_train(self, recs: list, **kwargs) -> list:
|
||||
for rec in recs:
|
||||
rec.set_tags(**{"train_status": "end_task_train"})
|
||||
rec.set_tags(**{self.STATUS_KEY: self.STATUS_END})
|
||||
return recs
|
||||
|
||||
def reset(self):
|
||||
"""
|
||||
.. note::
|
||||
this method will delete all task in this task_pool!
|
||||
"""
|
||||
tm = TaskManager(task_pool=self.task_pool)
|
||||
tm.remove()
|
||||
|
||||
|
||||
class DelayTrainerRM(TrainerRM):
|
||||
"""
|
||||
@@ -324,30 +338,57 @@ class DelayTrainerRM(TrainerRM):
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, experiment_name, task_pool: str, train_func=begin_task_train, end_train_func=end_task_train):
|
||||
def __init__(
|
||||
self,
|
||||
experiment_name: str = None,
|
||||
task_pool: str = None,
|
||||
train_func=begin_task_train,
|
||||
end_train_func=end_task_train,
|
||||
):
|
||||
"""
|
||||
Init DelayTrainerRM.
|
||||
|
||||
Args:
|
||||
experiment_name (str): the default name of experiment.
|
||||
task_pool (str): task pool name in TaskManager. None for use same name as experiment_name.
|
||||
train_func (Callable, optional): default train method. Defaults to `begin_task_train`.
|
||||
end_train_func (Callable, optional): default end_train method. Defaults to `end_task_train`.
|
||||
"""
|
||||
super().__init__(experiment_name, task_pool, train_func)
|
||||
self.end_train_func = end_train_func
|
||||
self.delay = True
|
||||
|
||||
def train(self, tasks: list, train_func=None, **kwargs):
|
||||
def train(self, tasks: list, train_func=None, experiment_name: str = None, **kwargs):
|
||||
"""
|
||||
Same as `train` of TrainerRM, after_status will be STATUS_PART_DONE.
|
||||
Args:
|
||||
tasks (list): a list of definition based on `task` dict
|
||||
train_func (Callable): the train method which need at least `task`s and `experiment_name`. Defaults to None for using self.train_func.
|
||||
experiment_name (str): the experiment name, None for use default name.
|
||||
Returns:
|
||||
list: a list of Recorders
|
||||
"""
|
||||
return super().train(tasks, train_func=train_func, after_status=TaskManager.STATUS_PART_DONE, **kwargs)
|
||||
if len(tasks) == 0:
|
||||
return []
|
||||
return super().train(
|
||||
tasks,
|
||||
train_func=train_func,
|
||||
experiment_name=experiment_name,
|
||||
after_status=TaskManager.STATUS_PART_DONE,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def end_train(self, recs, end_train_func=None, **kwargs):
|
||||
def end_train(self, recs, end_train_func=None, experiment_name: str = None, **kwargs):
|
||||
"""
|
||||
Given a list of Recorder and return a list of trained Recorder.
|
||||
This class will finish real data loading and model fitting.
|
||||
|
||||
NOTE: This method will train all STATUS_PART_DONE tasks in task pool, not only the ``recs``.
|
||||
|
||||
Args:
|
||||
recs (list): a list of Recorder, the tasks have been saved to them.
|
||||
end_train_func (Callable, optional): the end_train method which need at least `recorder`s and `experiment_name`. Defaults to None for using self.end_train_func.
|
||||
experiment_name (str): the experiment name, None for use default name.
|
||||
kwargs: the params for end_train_func.
|
||||
|
||||
Returns:
|
||||
@@ -356,13 +397,23 @@ class DelayTrainerRM(TrainerRM):
|
||||
|
||||
if end_train_func is None:
|
||||
end_train_func = self.end_train_func
|
||||
if experiment_name is None:
|
||||
experiment_name = self.experiment_name
|
||||
task_pool = self.task_pool
|
||||
if task_pool is None:
|
||||
task_pool = experiment_name
|
||||
tasks = []
|
||||
for rec in recs:
|
||||
tasks.append(rec.load_object("task"))
|
||||
|
||||
run_task(
|
||||
end_train_func,
|
||||
self.task_pool,
|
||||
experiment_name=self.experiment_name,
|
||||
task_pool,
|
||||
tasks=tasks,
|
||||
experiment_name=experiment_name,
|
||||
before_status=TaskManager.STATUS_PART_DONE,
|
||||
**kwargs,
|
||||
)
|
||||
for rec in recs:
|
||||
rec.set_tags(**{"train_status": "end_task_train"})
|
||||
rec.set_tags(**{self.STATUS_KEY: self.STATUS_END})
|
||||
return recs
|
||||
|
||||
Reference in New Issue
Block a user