1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00

Collect all contrib models in __init__ and add unit tests for init

This commit is contained in:
D-X-Y
2021-03-28 10:39:28 +00:00
parent 8a2e7b62af
commit 0386df7b16
17 changed files with 115 additions and 46 deletions

View File

@@ -0,0 +1,39 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
try:
from .catboost_model import CatBoostModel
except ModuleNotFoundError:
CatBoostModel = None
print("Please install necessary libs for CatBoostModel.")
try:
from .double_ensemble import DEnsembleModel
from .gbdt import LGBModel
except ModuleNotFoundError:
DEnsembleModel, LGBModel = None, None
print("Please install necessary libs for DEnsembleModel and LGBModel, such as lightgbm.")
try:
from .xgboost import XGBModel
except ModuleNotFoundError:
XGBModel = None
print("Please install necessary libs for XGBModel, such as xgboost.")
try:
from .linear import LinearModel
except ModuleNotFoundError:
LinearModel = None
print("Please install necessary libs for LinearModel, such as scipy and sklearn.")
# import pytorch models
try:
from .pytorch_alstm import ALSTM
from .pytorch_gats import GATs
from .pytorch_gru import GRU
from .pytorch_lstm import LSTM
from .pytorch_nn import DNNModelPytorch
from .pytorch_tabnet import TabnetModel
from .pytorch_sfm import SFM_Model
pytorch_classes = (ALSTM, GATs, GRU, LSTM, DNNModelPytorch, TabnetModel, SFM_Model)
except ModuleNotFoundError:
pytorch_classes = ()
print("Please install necessary libs for PyTorch models.")
all_model_classes = (CatBoostModel, DEnsembleModel, LGBModel, XGBModel, LinearModel) + pytorch_classes

View File

@@ -3,6 +3,7 @@
import numpy as np
import pandas as pd
from typing import Text, Union
from catboost import Pool, CatBoost
from catboost.utils import get_gpu_device_count
@@ -62,10 +63,10 @@ class CatBoostModel(Model):
evals_result["train"] = list(evals_result["learn"].values())[0]
evals_result["valid"] = list(evals_result["validation"].values())[0]
def predict(self, dataset):
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
if self.model is None:
raise ValueError("model is not fitted yet!")
x_test = dataset.prepare("test", col_set="feature")
x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
return pd.Series(self.model.predict(x_test.values), index=x_test.index)

View File

@@ -4,7 +4,7 @@
import lightgbm as lgb
import numpy as np
import pandas as pd
from typing import Text, Union
from ...model.base import Model
from ...data.dataset import DatasetH
from ...data.dataset.handler import DataHandlerLP
@@ -40,6 +40,10 @@ class DEnsembleModel(Model):
self.bins_sr = bins_sr
self.bins_fs = bins_fs
self.decay = decay
if sample_ratios is None: # the default values for sample_ratios
sample_ratios = [0.8, 0.7, 0.6, 0.5, 0.4]
if sub_weights is None: # the default values for sub_weights
sub_weights = [1.0, 0.2, 0.2, 0.2, 0.2, 0.2]
if not len(sample_ratios) == bins_fs:
raise ValueError("The length of sample_ratios should be equal to bins_fs.")
self.sample_ratios = sample_ratios
@@ -228,10 +232,10 @@ class DEnsembleModel(Model):
raise ValueError("not implemented yet")
return loss_curve
def predict(self, dataset):
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
if self.ensemble is None:
raise ValueError("model is not fitted yet!")
x_test = dataset.prepare("test", col_set="feature", data_key=DataHandlerLP.DK_I)
x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
pred = pd.Series(np.zeros(x_test.shape[0]), index=x_test.index)
for i_sub, submodel in enumerate(self.ensemble):
feat_sub = self.sub_features[i_sub]

View File

@@ -4,7 +4,7 @@
import numpy as np
import pandas as pd
import lightgbm as lgb
from typing import Text, Union
from ...model.base import ModelFT
from ...data.dataset import DatasetH
from ...data.dataset.handler import DataHandlerLP
@@ -61,7 +61,7 @@ class LGBModel(ModelFT):
evals_result["train"] = list(evals_result["train"].values())[0]
evals_result["valid"] = list(evals_result["valid"].values())[0]
def predict(self, dataset, segment="test"):
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
if self.model is None:
raise ValueError("model is not fitted yet!")
x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)

View File

@@ -3,7 +3,7 @@
import numpy as np
import pandas as pd
from typing import Text, Union
from scipy.optimize import nnls
from sklearn.linear_model import LinearRegression, Ridge, Lasso
@@ -84,7 +84,7 @@ class LinearModel(Model):
self.coef_ = coef
self.intercept_ = 0.0
def predict(self, dataset, segment="test"):
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
if self.coef_ is None:
raise ValueError("model is not fitted yet!")
x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)

View File

@@ -8,9 +8,9 @@ from __future__ import print_function
import os
import numpy as np
import pandas as pd
from typing import Text, Union
import copy
from ...utils import (
unpack_archive_with_buffer,
save_multiple_parts_file,
get_or_create_path,
drop_nan_by_y_index,
@@ -273,11 +273,11 @@ class ALSTM(Model):
if self.use_gpu:
torch.cuda.empty_cache()
def predict(self, dataset):
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
if not self.fitted:
raise ValueError("model is not fitted yet!")
x_test = dataset.prepare("test", col_set="feature")
x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
index = x_test.index
self.ALSTM_model.eval()
x_values = x_test.values

View File

@@ -8,6 +8,7 @@ from __future__ import print_function
import os
import numpy as np
import pandas as pd
from typing import Text, Union
import copy
from ...utils import (
unpack_archive_with_buffer,
@@ -264,11 +265,11 @@ class ALSTM(Model):
if self.use_gpu:
torch.cuda.empty_cache()
def predict(self, dataset):
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
if not self.fitted:
raise ValueError("model is not fitted yet!")
dl_test = dataset.prepare("test", col_set=["feature", "label"], data_key=DataHandlerLP.DK_I)
dl_test = dataset.prepare(segment, col_set=["feature", "label"], data_key=DataHandlerLP.DK_I)
dl_test.config(fillna_type="ffill+bfill")
test_loader = DataLoader(dl_test, batch_size=self.batch_size, num_workers=self.n_jobs)
self.ALSTM_model.eval()

View File

@@ -8,6 +8,7 @@ from __future__ import print_function
import os
import numpy as np
import pandas as pd
from typing import Text, Union
import copy
from ...utils import (
unpack_archive_with_buffer,
@@ -83,7 +84,6 @@ class GATs(Model):
self.with_pretrain = with_pretrain
self.model_path = model_path
self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu")
self.use_gpu = torch.cuda.is_available()
self.seed = seed
self.logger.info(
@@ -310,11 +310,11 @@ class GATs(Model):
if self.use_gpu:
torch.cuda.empty_cache()
def predict(self, dataset):
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
if not self.fitted:
raise ValueError("model is not fitted yet!")
x_test = dataset.prepare("test", col_set="feature")
x_test = dataset.prepare(segment, col_set="feature")
index = x_test.index
self.GAT_model.eval()
x_values = x_test.values

View File

@@ -8,6 +8,7 @@ from __future__ import print_function
import os
import numpy as np
import pandas as pd
from typing import Text, Union
import copy
from ...utils import (
unpack_archive_with_buffer,
@@ -273,11 +274,11 @@ class GRU(Model):
if self.use_gpu:
torch.cuda.empty_cache()
def predict(self, dataset):
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
if not self.fitted:
raise ValueError("model is not fitted yet!")
x_test = dataset.prepare("test", col_set="feature")
x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
index = x_test.index
self.gru_model.eval()
x_values = x_test.values

View File

@@ -8,6 +8,7 @@ from __future__ import print_function
import os
import numpy as np
import pandas as pd
from typing import Text, Union
import copy
from ...utils import (
unpack_archive_with_buffer,
@@ -268,11 +269,11 @@ class LSTM(Model):
if self.use_gpu:
torch.cuda.empty_cache()
def predict(self, dataset):
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
if not self.fitted:
raise ValueError("model is not fitted yet!")
x_test = dataset.prepare("test", col_set="feature")
x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
index = x_test.index
self.lstm_model.eval()
x_values = x_test.values
@@ -280,17 +281,13 @@ class LSTM(Model):
preds = []
for begin in range(sample_num)[:: self.batch_size]:
if sample_num - begin < self.batch_size:
end = sample_num
else:
end = begin + self.batch_size
x_batch = torch.from_numpy(x_values[begin:end]).float().to(self.device)
with torch.no_grad():
pred = self.lstm_model(x_batch).detach().cpu().numpy()
preds.append(pred)
return pd.Series(np.concatenate(preds), index=index)

View File

@@ -8,6 +8,7 @@ from __future__ import print_function
import os
import numpy as np
import pandas as pd
from typing import Text, Union
from sklearn.metrics import roc_auc_score, mean_squared_error
import torch
@@ -48,8 +49,8 @@ class DNNModelPytorch(Model):
def __init__(
self,
input_dim,
output_dim,
input_dim=360,
output_dim=1,
layers=(256,),
lr=0.001,
max_steps=300,
@@ -271,13 +272,12 @@ class DNNModelPytorch(Model):
else:
raise NotImplementedError("loss {} is not supported!".format(loss_type))
def predict(self, dataset):
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
if not self.fitted:
raise ValueError("model is not fitted yet!")
x_test_pd = dataset.prepare("test", col_set="feature")
x_test_pd = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
x_test = torch.from_numpy(x_test_pd.values).float().to(self.device)
self.dnn_model.eval()
with torch.no_grad():
preds = self.dnn_model(x_test).detach().cpu().numpy()
return pd.Series(np.squeeze(preds), index=x_test_pd.index)

View File

@@ -7,10 +7,9 @@ from __future__ import print_function
import os
import numpy as np
import pandas as pd
from typing import Text, Union
import copy
from ...utils import (
unpack_archive_with_buffer,
save_multiple_parts_file,
get_or_create_path,
drop_nan_by_y_index,
)
@@ -442,11 +441,11 @@ class SFM(Model):
raise ValueError("unknown metric `%s`" % self.metric)
def predict(self, dataset):
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
if not self.fitted:
raise ValueError("model is not fitted yet!")
x_test = dataset.prepare("test", col_set="feature")
x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
index = x_test.index
self.sfm_model.eval()
x_values = x_test.values
@@ -459,10 +458,7 @@ class SFM(Model):
else:
end = begin + self.batch_size
x_batch = torch.from_numpy(x_values[begin:end]).float()
if self.device != "cpu":
x_batch = x_batch.to(self.device)
x_batch = torch.from_numpy(x_values[begin:end]).float().to(self.device)
with torch.no_grad():
pred = self.sfm_model(x_batch).detach().cpu().numpy()

View File

@@ -6,10 +6,9 @@ from __future__ import print_function
import os
import numpy as np
import pandas as pd
from typing import Text, Union
import copy
from ...utils import (
unpack_archive_with_buffer,
save_multiple_parts_file,
get_or_create_path,
drop_nan_by_y_index,
)
@@ -217,11 +216,11 @@ class TabnetModel(Model):
if self.use_gpu:
torch.cuda.empty_cache()
def predict(self, dataset):
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
if not self.fitted:
raise ValueError("model is not fitted yet!")
x_test = dataset.prepare("test", col_set="feature", data_key=DataHandlerLP.DK_I)
x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
index = x_test.index
self.tabnet_model.eval()
x_values = torch.from_numpy(x_test.values)

View File

@@ -4,7 +4,7 @@
import numpy as np
import pandas as pd
import xgboost as xgb
from typing import Text, Union
from ...model.base import Model
from ...data.dataset import DatasetH
from ...data.dataset.handler import DataHandlerLP
@@ -57,8 +57,8 @@ class XGBModel(Model):
evals_result["train"] = list(evals_result["train"].values())[0]
evals_result["valid"] = list(evals_result["valid"].values())[0]
def predict(self, dataset, segment="test"):
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
if self.model is None:
raise ValueError("model is not fitted yet!")
x_test = dataset.prepare(segment, col_set="feature")
x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
return pd.Series(self.model.predict(xgb.DMatrix(x_test.values)), index=x_test.index)

0
qlib/data/dataset/processor.py Executable file → Normal file
View File

View File

@@ -1,6 +1,7 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import abc
from typing import Text, Union
from ..utils.serial import Serializable
from ..data.dataset import Dataset
@@ -59,7 +60,7 @@ class Model(BaseModel):
raise NotImplementedError()
@abc.abstractmethod
def predict(self, dataset: Dataset) -> object:
def predict(self, dataset: Dataset, segment: Union[Text, slice] = "test") -> object:
"""give prediction given Dataset
Parameters
@@ -67,6 +68,9 @@ class Model(BaseModel):
dataset : Dataset
dataset will generate the processed dataset from model training.
segment : Text or slice
dataset will use this segment to prepare data. (default=test)
Returns
-------
Prediction results with certain type such as `pandas.Series`.

View File

@@ -0,0 +1,27 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import unittest
from qlib.contrib.model import all_model_classes
class TestAllFlow(unittest.TestCase):
def test_0_initialize(self):
num = 0
for model_class in all_model_classes:
if model_class is not None:
model = model_class()
num += 1
print("There are {:}/{:} valid models in total.".format(num, len(all_model_classes)))
def suite():
_suite = unittest.TestSuite()
_suite.addTest(TestAllFlow("test_0_initialize"))
return _suite
if __name__ == "__main__":
runner = unittest.TextTestRunner()
runner.run(suite())