From 05599d1de8ec7b0e48ba7b71a2905027d40f2666 Mon Sep 17 00:00:00 2001 From: Don-ustc <43958178+Don-ustc@users.noreply.github.com> Date: Wed, 25 Nov 2020 19:13:53 +0800 Subject: [PATCH] Update about ALSTM --- qlib/contrib/model/pytorch_alstm.py | 392 ++++++++++++++++++++++++++++ 1 file changed, 392 insertions(+) create mode 100644 qlib/contrib/model/pytorch_alstm.py diff --git a/qlib/contrib/model/pytorch_alstm.py b/qlib/contrib/model/pytorch_alstm.py new file mode 100644 index 000000000..b302925ec --- /dev/null +++ b/qlib/contrib/model/pytorch_alstm.py @@ -0,0 +1,392 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + + +from __future__ import division +from __future__ import print_function + +import os +import numpy as np +import pandas as pd +import copy +from sklearn.metrics import roc_auc_score, mean_squared_error +import logging +from ...utils import unpack_archive_with_buffer, save_multiple_parts_file, create_save_path, drop_nan_by_y_index +from ...log import get_module_logger, TimeInspector + +import torch +import torch.nn as nn +import torch.optim as optim + +from ...model.base import Model +from ...data.dataset import DatasetH +from ...data.dataset.handler import DataHandlerLP + + +class ALSTM(Model): + """ALSTM Model + + Parameters + ---------- + input_dim : int + input dimension + output_dim : int + output dimension + layers : tuple + layer sizes + lr : float + learning rate + optimizer : str + optimizer name + GPU : str + the GPU ID(s) used for training + """ + + def __init__( + self, + d_feat=6, + hidden_size=64, + num_layers=2, + dropout=0.0, + n_epochs=200, + lr=0.001, + metric="IC", + batch_size=2000, + early_stop=20, + loss="mse", + optimizer="adam", + GPU="0", + seed=0, + rnn_type="GRU", + **kwargs + ): + # Set logger. + self.logger = get_module_logger("ALSTM") + self.logger.info("ALSTM pytorch version...") + + # set hyper-parameters. + self.d_feat = d_feat + self.hidden_size = hidden_size + self.num_layers = num_layers + self.dropout = dropout + self.n_epochs = n_epochs + self.lr = lr + self.metric = metric + self.batch_size = batch_size + self.early_stop = early_stop + self.optimizer = optimizer.lower() + self.loss = loss + self.visible_GPU = GPU + self.use_gpu = torch.cuda.is_available() + self.seed = seed + self.rnn_type = rnn_type + + self.logger.info( + "ALSTM parameters setting:" + "\nd_feat : {}" + "\nhidden_size : {}" + "\nnum_layers : {}" + "\ndropout : {}" + "\nn_epochs : {}" + "\nlr : {}" + "\nmetric : {}" + "\nbatch_size : {}" + "\nearly_stop : {}" + "\noptimizer : {}" + "\nloss_type : {}" + "\nvisible_GPU : {}" + "\nuse_GPU : {}" + "\nseed : {}" + "\nrnn_type : {}".format( + d_feat, + hidden_size, + num_layers, + dropout, + n_epochs, + lr, + metric, + batch_size, + early_stop, + optimizer.lower(), + loss, + GPU, + self.use_gpu, + seed, + self.rnn_type, + ) + ) + + if loss not in {"mse", "binary"}: + raise NotImplementedError("loss {} is not supported!".format(loss)) + self._scorer = mean_squared_error if loss == "mse" else roc_auc_score + + self.alstm_model = ALSTMModel( + d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout + ) + # def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, input_day=20, rnn_type="GRU"): + + if optimizer.lower() == "adam": + self.train_optimizer = optim.Adam(self.alstm_model.parameters(), lr=self.lr) + elif optimizer.lower() == "gd": + self.train_optimizer = optim.SGD(self.alstm_model.parameters(), lr=self.lr) + else: + raise NotImplementedError("optimizer {} is not supported!".format(optimizer)) + + self._fitted = False + if self.use_gpu: + self.alstm_model.cuda() + # set the visible GPU + if self.visible_GPU: + os.environ["CUDA_VISIBLE_DEVICES"] = self.visible_GPU + + def mse(self, pred, label): + loss = (pred - label) ** 2 + return torch.mean(loss) + + def loss_fn(self, pred, label): + mask = ~torch.isnan(label) + + if self.loss == "mse": + return self.mse(pred[mask], label[mask]) + + raise ValueError("unknown loss `%s`" % self.loss) + + def metric_fn(self, pred, label): + + mask = torch.isfinite(label) + if self.metric == "IC": + return self.cal_ic(pred[mask], label[mask]) + + if self.metric == "" or self.metric == "loss": # use loss + return -self.loss_fn(pred[mask], label[mask]) + + raise ValueError("unknown metric `%s`" % self.metric) + + def cal_ic(self, pred, label): + return torch.mean(pred * label) + + def train_epoch(self, x_train, y_train): + + x_train_values = x_train.values + y_train_values = np.squeeze(y_train.values) * 100 + + self.alstm_model.train() + + indices = np.arange(len(x_train_values)) + np.random.shuffle(indices) + + for i in range(len(indices))[:: self.batch_size]: + + if len(indices) - i < self.batch_size: + break + + feature = torch.from_numpy(x_train_values[indices[i : i + self.batch_size]]).float() + label = torch.from_numpy(y_train_values[indices[i : i + self.batch_size]]).float() + + if self.use_gpu: + feature = feature.cuda() + label = label.cuda() + + pred = self.alstm_model(feature) + loss = self.loss_fn(pred, label) + + self.train_optimizer.zero_grad() + loss.backward() + torch.nn.utils.clip_grad_value_(self.alstm_model.parameters(), 3.0) + self.train_optimizer.step() + + def test_epoch(self, data_x, data_y): + + # prepare training data + x_values = data_x.values + y_values = np.squeeze(data_y.values) + + self.alstm_model.eval() + + scores = [] + losses = [] + + indices = np.arange(len(x_values)) + np.random.shuffle(indices) + + for i in range(len(indices))[:: self.batch_size]: + + if len(indices) - i < self.batch_size: + break + + feature = torch.from_numpy(x_values[indices[i : i + self.batch_size]]).float() + label = torch.from_numpy(y_values[indices[i : i + self.batch_size]]).float() + + if self.use_gpu: + feature = feature.cuda() + label = label.cuda() + + pred = self.alstm_model(feature) + loss = self.loss_fn(pred, label) + losses.append(loss.item()) + + score = self.metric_fn(pred, label) + scores.append(score.item()) + + return np.mean(losses), np.mean(scores) + + def fit( + self, + dataset: DatasetH, + evals_result=dict(), + verbose=True, + save_path=None, + ): + + df_train, df_valid, df_test = dataset.prepare( + ["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L + ) + + x_train, y_train = df_train["feature"], df_train["label"] + x_valid, y_valid = df_valid["feature"], df_valid["label"] + + if save_path == None: + save_path = create_save_path(save_path) + stop_steps = 0 + train_loss = 0 + best_score = -np.inf + best_epoch = 0 + evals_result["train"] = [] + evals_result["valid"] = [] + + # train + self.logger.info("training...") + self._fitted = True + # return + + for step in range(self.n_epochs): + self.logger.info("Epoch%d:", step) + self.logger.info("training...") + self.train_epoch(x_train, y_train) + self.logger.info("evaluating...") + train_loss, train_score = self.test_epoch(x_train, y_train) + val_loss, val_score = self.test_epoch(x_valid, y_valid) + self.logger.info("train %.6f, valid %.6f" % (train_score, val_score)) + evals_result["train"].append(train_score) + evals_result["valid"].append(val_score) + + if val_score > best_score: + best_score = val_score + stop_steps = 0 + best_epoch = step + best_param = copy.deepcopy(self.alstm_model.state_dict()) + else: + stop_steps += 1 + if stop_steps >= self.early_stop: + self.logger.info("early stop") + break + + self.logger.info("best score: %.6lf @ %d" % (best_score, best_epoch)) + self.alstm_model.load_state_dict(best_param) + torch.save(best_param, save_path) + + if self.use_gpu: + torch.cuda.empty_cache() + + def predict(self, dataset): + if not self._fitted: + raise ValueError("model is not fitted yet!") + + x_test = dataset.prepare("test", col_set="feature") + index = x_test.index + self.alstm_model.eval() + x_values = x_test.values + sample_num = x_values.shape[0] + preds = [] + + for begin in range(sample_num)[:: self.batch_size]: + + if sample_num - begin < self.batch_size: + end = sample_num + else: + end = begin + self.batch_size + + x_batch = torch.from_numpy(x_values[begin:end]).float() + + if self.use_gpu: + x_batch = x_batch.cuda() + + with torch.no_grad(): + if self.use_gpu: + pred = self.alstm_model(x_batch).detach().cpu().numpy() + else: + pred = self.alstm_model(x_batch).detach().numpy() + + preds.append(pred) + + return pd.Series(np.concatenate(preds), index=index) + + +class GRUModel(nn.Module): + def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0): + super().__init__() + + self.rnn = nn.GRU( + input_size=d_feat, + hidden_size=hidden_size, + num_layers=num_layers, + batch_first=True, + dropout=dropout, + ) + self.fc_out = nn.Linear(hidden_size, 1) + + self.d_feat = d_feat + + def forward(self, x): + # x: [N, F*T] + x = x.reshape(len(x), self.d_feat, -1) # [N, F, T] + x = x.permute(0, 2, 1) # [N, T, F] + out, _ = self.rnn(x) + return self.fc_out(out[:, -1, :]).squeeze() + + + +class ALSTMModel(nn.Module): + def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, rnn_type="GRU"): + super().__init__() + self.hid_size = hidden_size + self.input_size = d_feat + self.dropout = dropout + self.rnn_type = rnn_type + self.rnn_layer = num_layers + self._build_model() + + def _build_model(self): + try: + klass = getattr(nn, self.rnn_type.upper()) + except: + raise ValueError('unknown rnn_type `%s`' % self.rnn_type) + self.net = nn.Sequential() + self.net.add_module('fc_in', nn.Linear(in_features=self.input_size, out_features=self.hid_size)) + self.net.add_module('act', nn.Tanh()) + self.rnn = klass(input_size=self.hid_size, + hidden_size=self.hid_size, + num_layers=self.rnn_layer, + batch_first=True, + dropout=self.dropout) + self.fc_out = nn.Linear(in_features=self.hid_size*2, out_features=1) + # self.fc_out = nn.Linear(in_features=self.hid_size, out_features=1) + self.att_net = nn.Sequential() + self.att_net.add_module('att_fc_in', nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size/2))) + self.att_net.add_module('att_dropout', torch.nn.Dropout(self.dropout)) + self.att_net.add_module('att_act', nn.Tanh()) + self.att_net.add_module('att_fc_out', nn.Linear(in_features=int(self.hid_size/2), out_features=1, bias=False)) + self.att_net.add_module('att_softmax', nn.Softmax(dim=1)) + + def forward(self, inputs): + # inputs: [batch_size, input_size*input_day] + inputs = inputs.view(len(inputs), self.input_size, -1) + inputs = inputs.permute(0, 2, 1) # [batch, input_size, seq_len] -> [batch, seq_len, input_size] + rnn_out, _ = self.rnn(self.net(inputs)) # [batch, seq_len, num_directions * hidden_size] + attention_score = self.att_net(rnn_out) # [batch, seq_len, 1] + out_att = torch.mul(rnn_out, attention_score) + out_att = torch.sum(out_att, dim=1) + out = self.fc_out(torch.cat((rnn_out[:, -1, :], out_att), dim=1)) # [batch, seq_len, num_directions * hidden_size] -> [batch, 1] + # out = self.fc_out(rnn_out[:, -1, :] + out_att) + return out[..., 0] +