From 900e284696d205249f56a41d930332ddf8bccc80 Mon Sep 17 00:00:00 2001 From: Young Date: Fri, 25 Sep 2020 04:39:59 +0000 Subject: [PATCH] make the integration simpler --- docs/start/integration.rst | 28 ++++++++++++++-------------- qlib/contrib/estimator/estimator.py | 5 ++++- qlib/contrib/estimator/trainer.py | 8 +++++--- tests/test_all_pipeline.py | 5 ++++- 4 files changed, 27 insertions(+), 19 deletions(-) diff --git a/docs/start/integration.rst b/docs/start/integration.rst index 614c82457..2732f61df 100644 --- a/docs/start/integration.rst +++ b/docs/start/integration.rst @@ -72,20 +72,6 @@ The Custom models need to inherit `qlib.contrib.model.base.Model <../reference/a raise ValueError('model is not fitted yet!') return self._model.predict(x_test.values) -- Override the `score` method - - The parameters include the test features and test labels. - - Return the evaluation score of the model. It's recommended to adopt the loss between labels and `prediction score`. - - Code Example: In the following example, users need to calculate the weighted loss with test data `x_test`, test label `y_test` and the weight `w_test`. - .. code-block:: Python - - def score(self, x_test:pd.Dataframe, y_test:pd.Dataframe, w_test:pd.DataFrame = None) -> float: - # Remove rows from x, y and w, which contain Nan in any columns in y_test. - x_test, y_test, w_test = drop_nan_by_y_index(x_test, y_test, w_test) - preds = self.predict(x_test) - w_test_weight = None if w_test is None else w_test.values - scorer = mean_squared_error if self.loss_type == 'mse' else roc_auc_score - return scorer(y_test.values, preds, sample_weight=w_test_weight) - - Override the `save` method & `load` method - The `save` method parameter includes the a `filename` that represents an absolute path, user need to save model into the path. - The `load` method parameter includes the a `buffer` read from the `filename` passed in the `save` method, users need to load model from the `buffer`. @@ -100,6 +86,20 @@ The Custom models need to inherit `qlib.contrib.model.base.Model <../reference/a def load(self, buffer): self._model = lgb.Booster(params={'model_str': buffer.decode('utf-8')}) +.. Without tuner, this part will not be used +.. - Override the `score` method(This step is optional) +.. - The parameters include the test features and test labels. +.. - Return the evaluation score of the model. It's recommended to adopt the loss between labels and `prediction score`. +.. - Code Example: In the following example, users need to calculate the weighted loss with test data `x_test`, test label `y_test` and the weight `w_test`. +.. .. code-block:: Python +.. +.. def score(self, x_test:pd.Dataframe, y_test:pd.Dataframe, w_test:pd.DataFrame = None) -> float: +.. # Remove rows from x, y and w, which contain Nan in any columns in y_test. +.. x_test, y_test, w_test = drop_nan_by_y_index(x_test, y_test, w_test) +.. preds = self.predict(x_test) +.. w_test_weight = None if w_test is None else w_test.values +.. scorer = mean_squared_error if self.loss_type == 'mse' else roc_auc_score +.. return scorer(y_test.values, preds, sample_weight=w_test_weight) Configuration File ======================= diff --git a/qlib/contrib/estimator/estimator.py b/qlib/contrib/estimator/estimator.py index 3a7dce438..1cb69e7ca 100644 --- a/qlib/contrib/estimator/estimator.py +++ b/qlib/contrib/estimator/estimator.py @@ -133,7 +133,10 @@ class Estimator(object): TimeInspector.set_time_mark() # 1. Get pred and prediction score of model(s). pred = self.trainer.get_test_score() - performance = self.trainer.get_test_performance() + try: + performance = self.trainer.get_test_performance() + except NotImplementedError: + performance = None # 2. Normal Backtest. report_normal, positions_normal = self._normal_backtest(pred) # 3. Long-Short Backtest. diff --git a/qlib/contrib/estimator/trainer.py b/qlib/contrib/estimator/trainer.py index d19051de9..6cb57f702 100644 --- a/qlib/contrib/estimator/trainer.py +++ b/qlib/contrib/estimator/trainer.py @@ -61,12 +61,11 @@ class BaseTrainer(object): """ pass - @abstractmethod def get_test_performance(self): """ Implement this method indicating how to get the performance of the model. """ - pass + raise NotImplementedError(f"Please implement `get_test_performance`") def get_test_score(self): """ @@ -164,7 +163,10 @@ class StaticTrainer(BaseTrainer): return pred def get_test_performance(self): - model_score = self.model.score(self.x_test, self.y_test) + try: + model_score = self.model.score(self.x_test, self.y_test) + except NotImplementedError: + model_score = None # Remove rows from x, y and w, which contain Nan in any columns in y_test. x_test, y_test, __ = drop_nan_by_y_index(self.x_test, self.y_test) pred_test = self.model.predict(x_test) diff --git a/tests/test_all_pipeline.py b/tests/test_all_pipeline.py index e30d10774..d7d497301 100644 --- a/tests/test_all_pipeline.py +++ b/tests/test_all_pipeline.py @@ -92,7 +92,10 @@ def train(): pred_score["score"] = _pred.iloc(axis=1)[0] # get performance - model_score = model.score(x_test, y_test) + try: + model_score = model.score(x_test, y_test) + except NotImplementedError: + model_score = None # Remove rows from x, y and w, which contain Nan in any columns in y_test. x_test, y_test, __ = drop_nan_by_y_index(x_test, y_test) pred_test = model.predict(x_test)