1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00

Merge pull request #378 from D-X-Y/main

Add MultiSegRecord and add segment kwargs in model.pred
This commit is contained in:
you-n-g
2021-03-29 01:06:41 +08:00
committed by GitHub
27 changed files with 328 additions and 134 deletions

View File

@@ -0,0 +1,39 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
try:
from .catboost_model import CatBoostModel
except ModuleNotFoundError:
CatBoostModel = None
print("Please install necessary libs for CatBoostModel.")
try:
from .double_ensemble import DEnsembleModel
from .gbdt import LGBModel
except ModuleNotFoundError:
DEnsembleModel, LGBModel = None, None
print("Please install necessary libs for DEnsembleModel and LGBModel, such as lightgbm.")
try:
from .xgboost import XGBModel
except ModuleNotFoundError:
XGBModel = None
print("Please install necessary libs for XGBModel, such as xgboost.")
try:
from .linear import LinearModel
except ModuleNotFoundError:
LinearModel = None
print("Please install necessary libs for LinearModel, such as scipy and sklearn.")
# import pytorch models
try:
from .pytorch_alstm import ALSTM
from .pytorch_gats import GATs
from .pytorch_gru import GRU
from .pytorch_lstm import LSTM
from .pytorch_nn import DNNModelPytorch
from .pytorch_tabnet import TabnetModel
from .pytorch_sfm import SFM_Model
pytorch_classes = (ALSTM, GATs, GRU, LSTM, DNNModelPytorch, TabnetModel, SFM_Model)
except ModuleNotFoundError:
pytorch_classes = ()
print("Please install necessary libs for PyTorch models.")
all_model_classes = (CatBoostModel, DEnsembleModel, LGBModel, XGBModel, LinearModel) + pytorch_classes

View File

@@ -3,6 +3,7 @@
import numpy as np
import pandas as pd
from typing import Text, Union
from catboost import Pool, CatBoost
from catboost.utils import get_gpu_device_count
@@ -62,10 +63,10 @@ class CatBoostModel(Model):
evals_result["train"] = list(evals_result["learn"].values())[0]
evals_result["valid"] = list(evals_result["validation"].values())[0]
def predict(self, dataset):
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
if self.model is None:
raise ValueError("model is not fitted yet!")
x_test = dataset.prepare("test", col_set="feature")
x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
return pd.Series(self.model.predict(x_test.values), index=x_test.index)

View File

@@ -4,7 +4,7 @@
import lightgbm as lgb
import numpy as np
import pandas as pd
from typing import Text, Union
from ...model.base import Model
from ...data.dataset import DatasetH
from ...data.dataset.handler import DataHandlerLP
@@ -40,6 +40,10 @@ class DEnsembleModel(Model):
self.bins_sr = bins_sr
self.bins_fs = bins_fs
self.decay = decay
if sample_ratios is None: # the default values for sample_ratios
sample_ratios = [0.8, 0.7, 0.6, 0.5, 0.4]
if sub_weights is None: # the default values for sub_weights
sub_weights = [1.0, 0.2, 0.2, 0.2, 0.2, 0.2]
if not len(sample_ratios) == bins_fs:
raise ValueError("The length of sample_ratios should be equal to bins_fs.")
self.sample_ratios = sample_ratios
@@ -228,10 +232,10 @@ class DEnsembleModel(Model):
raise ValueError("not implemented yet")
return loss_curve
def predict(self, dataset):
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
if self.ensemble is None:
raise ValueError("model is not fitted yet!")
x_test = dataset.prepare("test", col_set="feature", data_key=DataHandlerLP.DK_I)
x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
pred = pd.Series(np.zeros(x_test.shape[0]), index=x_test.index)
for i_sub, submodel in enumerate(self.ensemble):
feat_sub = self.sub_features[i_sub]

View File

@@ -4,7 +4,7 @@
import numpy as np
import pandas as pd
import lightgbm as lgb
from typing import Text, Union
from ...model.base import ModelFT
from ...data.dataset import DatasetH
from ...data.dataset.handler import DataHandlerLP
@@ -61,10 +61,10 @@ class LGBModel(ModelFT):
evals_result["train"] = list(evals_result["train"].values())[0]
evals_result["valid"] = list(evals_result["valid"].values())[0]
def predict(self, dataset):
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
if self.model is None:
raise ValueError("model is not fitted yet!")
x_test = dataset.prepare("test", col_set="feature", data_key=DataHandlerLP.DK_I)
x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
return pd.Series(self.model.predict(x_test.values), index=x_test.index)
def finetune(self, dataset: DatasetH, num_boost_round=10, verbose_eval=20):

View File

@@ -3,7 +3,7 @@
import numpy as np
import pandas as pd
from typing import Text, Union
from scipy.optimize import nnls
from sklearn.linear_model import LinearRegression, Ridge, Lasso
@@ -84,8 +84,8 @@ class LinearModel(Model):
self.coef_ = coef
self.intercept_ = 0.0
def predict(self, dataset):
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
if self.coef_ is None:
raise ValueError("model is not fitted yet!")
x_test = dataset.prepare("test", col_set="feature", data_key=DataHandlerLP.DK_I)
x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
return pd.Series(x_test.values @ self.coef_ + self.intercept_, index=x_test.index)

View File

@@ -8,13 +8,9 @@ from __future__ import print_function
import os
import numpy as np
import pandas as pd
from typing import Text, Union
import copy
from ...utils import (
unpack_archive_with_buffer,
save_multiple_parts_file,
get_or_create_path,
drop_nan_by_y_index,
)
from ...utils import get_or_create_path
from ...log import get_module_logger
import torch
@@ -273,11 +269,11 @@ class ALSTM(Model):
if self.use_gpu:
torch.cuda.empty_cache()
def predict(self, dataset):
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
if not self.fitted:
raise ValueError("model is not fitted yet!")
x_test = dataset.prepare("test", col_set="feature")
x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
index = x_test.index
self.ALSTM_model.eval()
x_values = x_test.values

View File

@@ -8,13 +8,9 @@ from __future__ import print_function
import os
import numpy as np
import pandas as pd
from typing import Text, Union
import copy
from ...utils import (
unpack_archive_with_buffer,
save_multiple_parts_file,
get_or_create_path,
drop_nan_by_y_index,
)
from ...utils import get_or_create_path
from ...log import get_module_logger
import torch
@@ -264,11 +260,11 @@ class ALSTM(Model):
if self.use_gpu:
torch.cuda.empty_cache()
def predict(self, dataset):
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
if not self.fitted:
raise ValueError("model is not fitted yet!")
dl_test = dataset.prepare("test", col_set=["feature", "label"], data_key=DataHandlerLP.DK_I)
dl_test = dataset.prepare(segment, col_set=["feature", "label"], data_key=DataHandlerLP.DK_I)
dl_test.config(fillna_type="ffill+bfill")
test_loader = DataLoader(dl_test, batch_size=self.batch_size, num_workers=self.n_jobs)
self.ALSTM_model.eval()

View File

@@ -8,13 +8,9 @@ from __future__ import print_function
import os
import numpy as np
import pandas as pd
from typing import Text, Union
import copy
from ...utils import (
unpack_archive_with_buffer,
save_multiple_parts_file,
get_or_create_path,
drop_nan_by_y_index,
)
from ...utils import get_or_create_path
from ...log import get_module_logger
import torch
import torch.nn as nn
@@ -83,7 +79,6 @@ class GATs(Model):
self.with_pretrain = with_pretrain
self.model_path = model_path
self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu")
self.use_gpu = torch.cuda.is_available()
self.seed = seed
self.logger.info(
@@ -310,11 +305,11 @@ class GATs(Model):
if self.use_gpu:
torch.cuda.empty_cache()
def predict(self, dataset):
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
if not self.fitted:
raise ValueError("model is not fitted yet!")
x_test = dataset.prepare("test", col_set="feature")
x_test = dataset.prepare(segment, col_set="feature")
index = x_test.index
self.GAT_model.eval()
x_values = x_test.values

View File

@@ -9,12 +9,7 @@ import os
import numpy as np
import pandas as pd
import copy
from ...utils import (
unpack_archive_with_buffer,
save_multiple_parts_file,
get_or_create_path,
drop_nan_by_y_index,
)
from ...utils import get_or_create_path
from ...log import get_module_logger
import torch
import torch.nn as nn

View File

@@ -8,13 +8,9 @@ from __future__ import print_function
import os
import numpy as np
import pandas as pd
from typing import Text, Union
import copy
from ...utils import (
unpack_archive_with_buffer,
save_multiple_parts_file,
get_or_create_path,
drop_nan_by_y_index,
)
from ...utils import get_or_create_path
from ...log import get_module_logger
import torch
@@ -273,11 +269,11 @@ class GRU(Model):
if self.use_gpu:
torch.cuda.empty_cache()
def predict(self, dataset):
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
if not self.fitted:
raise ValueError("model is not fitted yet!")
x_test = dataset.prepare("test", col_set="feature")
x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
index = x_test.index
self.gru_model.eval()
x_values = x_test.values

View File

@@ -9,12 +9,7 @@ import os
import numpy as np
import pandas as pd
import copy
from ...utils import (
unpack_archive_with_buffer,
save_multiple_parts_file,
get_or_create_path,
drop_nan_by_y_index,
)
from ...utils import get_or_create_path
from ...log import get_module_logger
import torch

View File

@@ -8,13 +8,9 @@ from __future__ import print_function
import os
import numpy as np
import pandas as pd
from typing import Text, Union
import copy
from ...utils import (
unpack_archive_with_buffer,
save_multiple_parts_file,
get_or_create_path,
drop_nan_by_y_index,
)
from ...utils import get_or_create_path
from ...log import get_module_logger
import torch
@@ -268,11 +264,11 @@ class LSTM(Model):
if self.use_gpu:
torch.cuda.empty_cache()
def predict(self, dataset):
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
if not self.fitted:
raise ValueError("model is not fitted yet!")
x_test = dataset.prepare("test", col_set="feature")
x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
index = x_test.index
self.lstm_model.eval()
x_values = x_test.values
@@ -280,17 +276,13 @@ class LSTM(Model):
preds = []
for begin in range(sample_num)[:: self.batch_size]:
if sample_num - begin < self.batch_size:
end = sample_num
else:
end = begin + self.batch_size
x_batch = torch.from_numpy(x_values[begin:end]).float().to(self.device)
with torch.no_grad():
pred = self.lstm_model(x_batch).detach().cpu().numpy()
preds.append(pred)
return pd.Series(np.concatenate(preds), index=index)

View File

@@ -9,12 +9,7 @@ import os
import numpy as np
import pandas as pd
import copy
from ...utils import (
unpack_archive_with_buffer,
save_multiple_parts_file,
get_or_create_path,
drop_nan_by_y_index,
)
from ...utils import get_or_create_path
from ...log import get_module_logger
import torch

View File

@@ -8,6 +8,7 @@ from __future__ import print_function
import os
import numpy as np
import pandas as pd
from typing import Text, Union
from sklearn.metrics import roc_auc_score, mean_squared_error
import torch
@@ -18,7 +19,7 @@ from .pytorch_utils import count_parameters
from ...model.base import Model
from ...data.dataset import DatasetH
from ...data.dataset.handler import DataHandlerLP
from ...utils import unpack_archive_with_buffer, save_multiple_parts_file, get_or_create_path, drop_nan_by_y_index
from ...utils import unpack_archive_with_buffer, save_multiple_parts_file, get_or_create_path
from ...log import get_module_logger
from ...workflow import R
@@ -48,8 +49,8 @@ class DNNModelPytorch(Model):
def __init__(
self,
input_dim,
output_dim,
input_dim=360,
output_dim=1,
layers=(256,),
lr=0.001,
max_steps=300,
@@ -271,13 +272,12 @@ class DNNModelPytorch(Model):
else:
raise NotImplementedError("loss {} is not supported!".format(loss_type))
def predict(self, dataset):
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
if not self.fitted:
raise ValueError("model is not fitted yet!")
x_test_pd = dataset.prepare("test", col_set="feature")
x_test_pd = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
x_test = torch.from_numpy(x_test_pd.values).float().to(self.device)
self.dnn_model.eval()
with torch.no_grad():
preds = self.dnn_model(x_test).detach().cpu().numpy()
return pd.Series(np.squeeze(preds), index=x_test_pd.index)

View File

@@ -7,13 +7,9 @@ from __future__ import print_function
import os
import numpy as np
import pandas as pd
from typing import Text, Union
import copy
from ...utils import (
unpack_archive_with_buffer,
save_multiple_parts_file,
get_or_create_path,
drop_nan_by_y_index,
)
from ...utils import get_or_create_path
from ...log import get_module_logger
import torch
@@ -442,11 +438,11 @@ class SFM(Model):
raise ValueError("unknown metric `%s`" % self.metric)
def predict(self, dataset):
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
if not self.fitted:
raise ValueError("model is not fitted yet!")
x_test = dataset.prepare("test", col_set="feature")
x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
index = x_test.index
self.sfm_model.eval()
x_values = x_test.values
@@ -459,10 +455,7 @@ class SFM(Model):
else:
end = begin + self.batch_size
x_batch = torch.from_numpy(x_values[begin:end]).float()
if self.device != "cpu":
x_batch = x_batch.to(self.device)
x_batch = torch.from_numpy(x_values[begin:end]).float().to(self.device)
with torch.no_grad():
pred = self.sfm_model(x_batch).detach().cpu().numpy()

View File

@@ -6,13 +6,9 @@ from __future__ import print_function
import os
import numpy as np
import pandas as pd
from typing import Text, Union
import copy
from ...utils import (
unpack_archive_with_buffer,
save_multiple_parts_file,
get_or_create_path,
drop_nan_by_y_index,
)
from ...utils import get_or_create_path
from ...log import get_module_logger
import torch
@@ -217,11 +213,11 @@ class TabnetModel(Model):
if self.use_gpu:
torch.cuda.empty_cache()
def predict(self, dataset):
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
if not self.fitted:
raise ValueError("model is not fitted yet!")
x_test = dataset.prepare("test", col_set="feature", data_key=DataHandlerLP.DK_I)
x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
index = x_test.index
self.tabnet_model.eval()
x_values = torch.from_numpy(x_test.values)

View File

@@ -4,7 +4,7 @@
import numpy as np
import pandas as pd
import xgboost as xgb
from typing import Text, Union
from ...model.base import Model
from ...data.dataset import DatasetH
from ...data.dataset.handler import DataHandlerLP
@@ -57,8 +57,8 @@ class XGBModel(Model):
evals_result["train"] = list(evals_result["train"].values())[0]
evals_result["valid"] = list(evals_result["valid"].values())[0]
def predict(self, dataset):
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
if self.model is None:
raise ValueError("model is not fitted yet!")
x_test = dataset.prepare("test", col_set="feature")
x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
return pd.Series(self.model.predict(xgb.DMatrix(x_test.values)), index=x_test.index)

View File

@@ -0,0 +1,4 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
from .record_temp import MultiSegRecord
from .record_temp import SignalMseRecord

View File

@@ -1,18 +1,59 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import re
import pandas as pd
from sklearn.metrics import mean_squared_error
from pprint import pprint
from typing import Dict, Text, Any
import numpy as np
from ...contrib.eva.alpha import calc_ic
from ...workflow.record_temp import RecordTemp
from ...workflow.record_temp import SignalRecord
from ...data import dataset as qlib_dataset
from ...log import get_module_logger
logger = get_module_logger("workflow", "INFO")
class MultiSegRecord(RecordTemp):
"""
This is the multiple segments signal record class that generates the signal prediction.
This class inherits the ``RecordTemp`` class.
"""
def __init__(self, model, dataset, recorder=None):
super().__init__(recorder=recorder)
if not isinstance(dataset, qlib_dataset.DatasetH):
raise ValueError("The type of dataset is not DatasetH instead of {:}".format(type(dataset)))
self.model = model
self.dataset = dataset
def generate(self, segments: Dict[Text, Any], save: bool = False):
for key, segment in segments.items():
predics = self.model.predict(self.dataset, segment)
if isinstance(predics, pd.Series):
predics = predics.to_frame("score")
labels = self.dataset.prepare(
segments=segment, col_set="label", data_key=qlib_dataset.handler.DataHandlerLP.DK_R
)
# Compute the IC and Rank IC
ic, ric = calc_ic(predics.iloc[:, 0], labels.iloc[:, 0])
results = {"all-IC": ic, "mean-IC": ic.mean(), "all-Rank-IC": ric, "mean-Rank-IC": ric.mean()}
logger.info("--- Results for {:} ({:}) ---".format(key, segment))
ic_x100, ric_x100 = ic * 100, ric * 100
logger.info("IC: {:.4f}%".format(ic_x100.mean()))
logger.info("ICIR: {:.4f}%".format(ic_x100.mean() / ic_x100.std()))
logger.info("Rank IC: {:.4f}%".format(ric_x100.mean()))
logger.info("Rank ICIR: {:.4f}%".format(ric_x100.mean() / ric_x100.std()))
if save:
save_name = "results-{:}.pkl".format(key)
self.recorder.save_objects(**{save_name: results})
logger.info(
"The record '{save_name}' has been saved as the artifact of the Experiment {self.recorder.experiment_id}"
)
class SignalMseRecord(SignalRecord):
"""
This is the Signal MSE Record class that computes the mean squared error (MSE).
@@ -38,7 +79,7 @@ class SignalMseRecord(SignalRecord):
objects = {"mse.pkl": mse, "rmse.pkl": np.sqrt(mse)}
self.recorder.log_metrics(**metrics)
self.recorder.save_objects(**objects, artifact_path=self.get_path())
pprint(metrics)
logger.info("The evaluation results in SignalMseRecord is {:}".format(metrics))
def list(self):
paths = [self.get_path("mse.pkl"), self.get_path("rmse.pkl")]

0
qlib/data/dataset/processor.py Executable file → Normal file
View File

View File

@@ -1,6 +1,7 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import abc
from typing import Text, Union
from ..utils.serial import Serializable
from ..data.dataset import Dataset
@@ -59,7 +60,7 @@ class Model(BaseModel):
raise NotImplementedError()
@abc.abstractmethod
def predict(self, dataset: Dataset) -> object:
def predict(self, dataset: Dataset, segment: Union[Text, slice] = "test") -> object:
"""give prediction given Dataset
Parameters
@@ -67,6 +68,9 @@ class Model(BaseModel):
dataset : Dataset
dataset will generate the processed dataset from model training.
segment : Text or slice
dataset will use this segment to prepare data. (default=test)
Returns
-------
Prediction results with certain type such as `pandas.Series`.

View File

@@ -159,7 +159,10 @@ class Experiment:
if create:
recorder, is_new = self._get_or_create_rec(recorder_id=recorder_id, recorder_name=recorder_name)
else:
recorder, is_new = self._get_recorder(recorder_id=recorder_id, recorder_name=recorder_name), False
recorder, is_new = (
self._get_recorder(recorder_id=recorder_id, recorder_name=recorder_name),
False,
)
if is_new:
self.active_recorder = recorder
# start the recorder
@@ -174,7 +177,10 @@ class Experiment:
try:
if recorder_id is None and recorder_name is None:
recorder_name = self._default_rec_name
return self._get_recorder(recorder_id=recorder_id, recorder_name=recorder_name), False
return (
self._get_recorder(recorder_id=recorder_id, recorder_name=recorder_name),
False,
)
except ValueError:
if recorder_name is None:
recorder_name = self._default_rec_name

View File

@@ -159,7 +159,10 @@ class ExpManager:
if create:
exp, is_new = self._get_or_create_exp(experiment_id=experiment_id, experiment_name=experiment_name)
else:
exp, is_new = self._get_exp(experiment_id=experiment_id, experiment_name=experiment_name), False
exp, is_new = (
self._get_exp(experiment_id=experiment_id, experiment_name=experiment_name),
False,
)
if is_new:
self.active_experiment = exp
# start the recorder
@@ -172,7 +175,10 @@ class ExpManager:
automatically create a new experiment based on the given id and name.
"""
try:
return self._get_exp(experiment_id=experiment_id, experiment_name=experiment_name), False
return (
self._get_exp(experiment_id=experiment_id, experiment_name=experiment_name),
False,
)
except ValueError:
if experiment_name is None:
experiment_name = self._default_exp_name

View File

@@ -39,7 +39,13 @@ class RecordTemp:
return "/".join(names)
def __init__(self, recorder):
self.recorder = recorder
self._recorder = recorder
@property
def recorder(self):
if self._recorder is None:
raise ValueError("This RecordTemp did not set recorder yet.")
return self._recorder
def generate(self, **kwargs):
"""
@@ -248,11 +254,20 @@ class PortAnaRecord(SignalRecord):
report_dict = normal_backtest(pred_score, strategy=self.strategy, **self.backtest_config)
report_normal = report_dict.get("report_df")
positions_normal = report_dict.get("positions")
self.recorder.save_objects(**{"report_normal.pkl": report_normal}, artifact_path=PortAnaRecord.get_path())
self.recorder.save_objects(**{"positions_normal.pkl": positions_normal}, artifact_path=PortAnaRecord.get_path())
self.recorder.save_objects(
**{"report_normal.pkl": report_normal},
artifact_path=PortAnaRecord.get_path(),
)
self.recorder.save_objects(
**{"positions_normal.pkl": positions_normal},
artifact_path=PortAnaRecord.get_path(),
)
order_normal = report_dict.get("order_list")
if order_normal:
self.recorder.save_objects(**{"order_normal.pkl": order_normal}, artifact_path=PortAnaRecord.get_path())
self.recorder.save_objects(
**{"order_normal.pkl": order_normal},
artifact_path=PortAnaRecord.get_path(),
)
# analysis
analysis = dict()

View File

@@ -6,24 +6,11 @@ import shutil
import unittest
from pathlib import Path
import numpy as np
import pandas as pd
import qlib
from qlib.config import REG_CN, C
from qlib.utils import drop_nan_by_y_index
from qlib.contrib.model.gbdt import LGBModel
from qlib.contrib.data.handler import Alpha158
from qlib.contrib.strategy.strategy import TopkDropoutStrategy
from qlib.contrib.evaluate import (
backtest as normal_backtest,
risk_analysis,
)
from qlib.contrib.workflow.record_temp import SignalMseRecord
from qlib.utils import exists_qlib_data, init_instance_by_config, flatten_dict
from qlib.config import C
from qlib.utils import init_instance_by_config, flatten_dict
from qlib.workflow import R
from qlib.workflow.record_temp import SignalRecord, SigAnaRecord, PortAnaRecord
from qlib.tests.data import GetData
from qlib.tests import TestAutoData
@@ -166,8 +153,6 @@ def train_with_sigana():
ric = sar.load(sar.get_path("ric.pkl"))
pred_score = sar.load("pred.pkl")
smr = SignalMseRecord(recorder)
smr.generate()
uri_path = R.get_uri()
return pred_score, {"ic": ic, "ric": ric}, uri_path
@@ -256,8 +241,10 @@ class TestAllFlow(TestAutoData):
def suite():
_suite = unittest.TestSuite()
_suite.addTest(TestAllFlow("test_0_train"))
_suite.addTest(TestAllFlow("test_1_backtest"))
_suite.addTest(TestAllFlow("test_0_train_with_sigana"))
_suite.addTest(TestAllFlow("test_1_train"))
_suite.addTest(TestAllFlow("test_2_backtest"))
_suite.addTest(TestAllFlow("test_3_expmanager"))
return _suite

View File

@@ -0,0 +1,27 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import unittest
from qlib.contrib.model import all_model_classes
class TestAllFlow(unittest.TestCase):
def test_0_initialize(self):
num = 0
for model_class in all_model_classes:
if model_class is not None:
model = model_class()
num += 1
print("There are {:}/{:} valid models in total.".format(num, len(all_model_classes)))
def suite():
_suite = unittest.TestSuite()
_suite.addTest(TestAllFlow("test_0_initialize"))
return _suite
if __name__ == "__main__":
runner = unittest.TextTestRunner()
runner.run(suite())

View File

@@ -0,0 +1,111 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import sys
import shutil
import unittest
from pathlib import Path
import qlib
from qlib.config import C
from qlib.contrib.workflow import MultiSegRecord, SignalMseRecord
from qlib.utils import init_instance_by_config, flatten_dict
from qlib.workflow import R
from qlib.tests import TestAutoData
market = "csi300"
benchmark = "SH000300"
###################################
# train model
###################################
data_handler_config = {
"start_time": "2008-01-01",
"end_time": "2020-08-01",
"fit_start_time": "2008-01-01",
"fit_end_time": "2014-12-31",
"instruments": market,
}
task = {
"model": {
"class": "LGBModel",
"module_path": "qlib.contrib.model.gbdt",
"kwargs": {
"loss": "mse",
"colsample_bytree": 0.8879,
"learning_rate": 0.0421,
"subsample": 0.8789,
"lambda_l1": 205.6999,
"lambda_l2": 580.9768,
"max_depth": 8,
"num_leaves": 210,
"num_threads": 20,
},
},
"dataset": {
"class": "DatasetH",
"module_path": "qlib.data.dataset",
"kwargs": {
"handler": {
"class": "Alpha158",
"module_path": "qlib.contrib.data.handler",
"kwargs": data_handler_config,
},
"segments": {
"train": ("2008-01-01", "2014-12-31"),
"valid": ("2015-01-01", "2016-12-31"),
"test": ("2017-01-01", "2020-08-01"),
},
},
},
}
def train_multiseg():
model = init_instance_by_config(task["model"])
dataset = init_instance_by_config(task["dataset"])
with R.start(experiment_name="workflow"):
R.log_params(**flatten_dict(task))
model.fit(dataset)
recorder = R.get_recorder()
sr = MultiSegRecord(model, dataset, recorder)
sr.generate(dict(valid="valid", test="test"), True)
uri = R.get_uri()
return uri
def train_mse():
model = init_instance_by_config(task["model"])
dataset = init_instance_by_config(task["dataset"])
with R.start(experiment_name="workflow"):
R.log_params(**flatten_dict(task))
model.fit(dataset)
recorder = R.get_recorder()
sr = SignalMseRecord(recorder, model=model, dataset=dataset)
sr.generate()
uri = R.get_uri()
return uri
class TestAllFlow(TestAutoData):
def test_0_multiseg(self):
uri_path = train_multiseg()
shutil.rmtree(str(Path(uri_path.strip("file:")).resolve()))
def test_1_mse(self):
uri_path = train_mse()
shutil.rmtree(str(Path(uri_path.strip("file:")).resolve()))
def suite():
_suite = unittest.TestSuite()
_suite.addTest(TestAllFlow("test_0_multiseg"))
_suite.addTest(TestAllFlow("test_1_mse"))
return _suite
if __name__ == "__main__":
runner = unittest.TextTestRunner()
runner.run(suite())