1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00

make the integration simpler

This commit is contained in:
Young
2020-09-25 04:39:59 +00:00
parent 9556d1ce38
commit 900e284696
4 changed files with 27 additions and 19 deletions

View File

@@ -72,20 +72,6 @@ The Custom models need to inherit `qlib.contrib.model.base.Model <../reference/a
raise ValueError('model is not fitted yet!')
return self._model.predict(x_test.values)
- Override the `score` method
- The parameters include the test features and test labels.
- Return the evaluation score of the model. It's recommended to adopt the loss between labels and `prediction score`.
- Code Example: In the following example, users need to calculate the weighted loss with test data `x_test`, test label `y_test` and the weight `w_test`.
.. code-block:: Python
def score(self, x_test:pd.Dataframe, y_test:pd.Dataframe, w_test:pd.DataFrame = None) -> float:
# Remove rows from x, y and w, which contain Nan in any columns in y_test.
x_test, y_test, w_test = drop_nan_by_y_index(x_test, y_test, w_test)
preds = self.predict(x_test)
w_test_weight = None if w_test is None else w_test.values
scorer = mean_squared_error if self.loss_type == 'mse' else roc_auc_score
return scorer(y_test.values, preds, sample_weight=w_test_weight)
- Override the `save` method & `load` method
- The `save` method parameter includes the a `filename` that represents an absolute path, user need to save model into the path.
- The `load` method parameter includes the a `buffer` read from the `filename` passed in the `save` method, users need to load model from the `buffer`.
@@ -100,6 +86,20 @@ The Custom models need to inherit `qlib.contrib.model.base.Model <../reference/a
def load(self, buffer):
self._model = lgb.Booster(params={'model_str': buffer.decode('utf-8')})
.. Without tuner, this part will not be used
.. - Override the `score` method(This step is optional)
.. - The parameters include the test features and test labels.
.. - Return the evaluation score of the model. It's recommended to adopt the loss between labels and `prediction score`.
.. - Code Example: In the following example, users need to calculate the weighted loss with test data `x_test`, test label `y_test` and the weight `w_test`.
.. .. code-block:: Python
..
.. def score(self, x_test:pd.Dataframe, y_test:pd.Dataframe, w_test:pd.DataFrame = None) -> float:
.. # Remove rows from x, y and w, which contain Nan in any columns in y_test.
.. x_test, y_test, w_test = drop_nan_by_y_index(x_test, y_test, w_test)
.. preds = self.predict(x_test)
.. w_test_weight = None if w_test is None else w_test.values
.. scorer = mean_squared_error if self.loss_type == 'mse' else roc_auc_score
.. return scorer(y_test.values, preds, sample_weight=w_test_weight)
Configuration File
=======================

View File

@@ -133,7 +133,10 @@ class Estimator(object):
TimeInspector.set_time_mark()
# 1. Get pred and prediction score of model(s).
pred = self.trainer.get_test_score()
performance = self.trainer.get_test_performance()
try:
performance = self.trainer.get_test_performance()
except NotImplementedError:
performance = None
# 2. Normal Backtest.
report_normal, positions_normal = self._normal_backtest(pred)
# 3. Long-Short Backtest.

View File

@@ -61,12 +61,11 @@ class BaseTrainer(object):
"""
pass
@abstractmethod
def get_test_performance(self):
"""
Implement this method indicating how to get the performance of the model.
"""
pass
raise NotImplementedError(f"Please implement `get_test_performance`")
def get_test_score(self):
"""
@@ -164,7 +163,10 @@ class StaticTrainer(BaseTrainer):
return pred
def get_test_performance(self):
model_score = self.model.score(self.x_test, self.y_test)
try:
model_score = self.model.score(self.x_test, self.y_test)
except NotImplementedError:
model_score = None
# Remove rows from x, y and w, which contain Nan in any columns in y_test.
x_test, y_test, __ = drop_nan_by_y_index(self.x_test, self.y_test)
pred_test = self.model.predict(x_test)

View File

@@ -92,7 +92,10 @@ def train():
pred_score["score"] = _pred.iloc(axis=1)[0]
# get performance
model_score = model.score(x_test, y_test)
try:
model_score = model.score(x_test, y_test)
except NotImplementedError:
model_score = None
# Remove rows from x, y and w, which contain Nan in any columns in y_test.
x_test, y_test, __ = drop_nan_by_y_index(x_test, y_test)
pred_test = model.predict(x_test)