diff --git a/qlib/contrib/model/pytorch_alstm.py b/qlib/contrib/model/pytorch_alstm.py index bbbb61851..6df996e11 100644 --- a/qlib/contrib/model/pytorch_alstm.py +++ b/qlib/contrib/model/pytorch_alstm.py @@ -23,6 +23,7 @@ import torch import torch.nn as nn import torch.optim as optim +from .pytorch_utils import count_parameters from ...model.base import Model from ...data.dataset import DatasetH from ...data.dataset.handler import DataHandlerLP @@ -39,8 +40,8 @@ class ALSTM(Model): the evaluate metric used in early stop optimizer : str optimizer name - GPU : str - the GPU ID(s) used for training + GPU : int + the GPU ID used for training """ def __init__( @@ -76,7 +77,7 @@ class ALSTM(Model): self.early_stop = early_stop self.optimizer = optimizer.lower() self.loss = loss - self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() else "cpu") + self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") self.use_gpu = torch.cuda.is_available() self.seed = seed @@ -123,6 +124,9 @@ class ALSTM(Model): num_layers=self.num_layers, dropout=self.dropout, ) + self.logger.info("model:\n{:}".format(self.ALSTM_model)) + self.logger.info("model size: {:.4f} MB".format(count_parameters(self.ALSTM_model))) + if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.ALSTM_model.parameters(), lr=self.lr) elif optimizer.lower() == "gd": diff --git a/qlib/contrib/model/pytorch_alstm_ts.py b/qlib/contrib/model/pytorch_alstm_ts.py index 725568de8..c8854e8d3 100644 --- a/qlib/contrib/model/pytorch_alstm_ts.py +++ b/qlib/contrib/model/pytorch_alstm_ts.py @@ -40,8 +40,8 @@ class ALSTM(Model): the evaluate metric used in early stop optimizer : str optimizer name - GPU : str - the GPU ID(s) used for training + GPU : int + the GPU ID used for training """ def __init__( @@ -78,7 +78,7 @@ class ALSTM(Model): self.early_stop = early_stop self.optimizer = optimizer.lower() self.loss = loss - self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() else "cpu") + self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") self.n_jobs = n_jobs self.use_gpu = torch.cuda.is_available() self.seed = seed diff --git a/qlib/contrib/model/pytorch_gru.py b/qlib/contrib/model/pytorch_gru.py index 84f863b9f..720e6b4f1 100755 --- a/qlib/contrib/model/pytorch_gru.py +++ b/qlib/contrib/model/pytorch_gru.py @@ -23,6 +23,7 @@ import torch import torch.nn as nn import torch.optim as optim +from .pytorch_utils import count_parameters from ...model.base import Model from ...data.dataset import DatasetH from ...data.dataset.handler import DataHandlerLP @@ -76,7 +77,7 @@ class GRU(Model): self.early_stop = early_stop self.optimizer = optimizer.lower() self.loss = loss - self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() else "cpu") + self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") self.use_gpu = torch.cuda.is_available() self.seed = seed @@ -123,6 +124,9 @@ class GRU(Model): num_layers=self.num_layers, dropout=self.dropout, ) + self.logger.info("model:\n{:}".format(self.gru_model)) + self.logger.info("model size: {:.4f} MB".format(count_parameters(self.gru_model))) + if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.gru_model.parameters(), lr=self.lr) elif optimizer.lower() == "gd": diff --git a/qlib/contrib/model/pytorch_gru_ts.py b/qlib/contrib/model/pytorch_gru_ts.py index bb6618b85..cbf1c1add 100755 --- a/qlib/contrib/model/pytorch_gru_ts.py +++ b/qlib/contrib/model/pytorch_gru_ts.py @@ -24,6 +24,7 @@ import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader +from .pytorch_utils import count_parameters from ...model.base import Model from ...data.dataset import DatasetH, TSDatasetH from ...data.dataset.handler import DataHandlerLP @@ -78,7 +79,7 @@ class GRU(Model): self.early_stop = early_stop self.optimizer = optimizer.lower() self.loss = loss - self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() else "cpu") + self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") self.n_jobs = n_jobs self.use_gpu = torch.cuda.is_available() self.seed = seed @@ -127,7 +128,10 @@ class GRU(Model): hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, - ).to(self.device) + ) + self.logger.info("model:\n{:}".format(self.gru_model)) + self.logger.info("model size: {:.4f} MB".format(count_parameters(self.gru_model))) + if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.GRU_model.parameters(), lr=self.lr) elif optimizer.lower() == "gd": diff --git a/qlib/contrib/model/pytorch_lstm.py b/qlib/contrib/model/pytorch_lstm.py index 163d500ec..61e372425 100755 --- a/qlib/contrib/model/pytorch_lstm.py +++ b/qlib/contrib/model/pytorch_lstm.py @@ -76,7 +76,7 @@ class LSTM(Model): self.early_stop = early_stop self.optimizer = optimizer.lower() self.loss = loss - self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() else "cpu") + self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") self.use_gpu = torch.cuda.is_available() self.seed = seed diff --git a/qlib/contrib/model/pytorch_lstm_ts.py b/qlib/contrib/model/pytorch_lstm_ts.py index cf4f8fb9f..44d136676 100755 --- a/qlib/contrib/model/pytorch_lstm_ts.py +++ b/qlib/contrib/model/pytorch_lstm_ts.py @@ -78,7 +78,7 @@ class LSTM(Model): self.early_stop = early_stop self.optimizer = optimizer.lower() self.loss = loss - self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() else "cpu") + self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") self.n_jobs = n_jobs self.use_gpu = torch.cuda.is_available() self.seed = seed diff --git a/qlib/contrib/model/pytorch_nn.py b/qlib/contrib/model/pytorch_nn.py index 16fcea9ff..f8b0b7748 100644 --- a/qlib/contrib/model/pytorch_nn.py +++ b/qlib/contrib/model/pytorch_nn.py @@ -42,8 +42,8 @@ class DNNModelPytorch(Model): learning rate decay steps optimizer : str optimizer name - GPU : str - the GPU ID(s) used for training + GPU : int + the GPU ID used for training """ def __init__( @@ -80,7 +80,7 @@ class DNNModelPytorch(Model): self.lr_decay_steps = lr_decay_steps self.optimizer = optimizer.lower() self.loss_type = loss - self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() else "cpu") + self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") self.use_GPU = torch.cuda.is_available() self.seed = seed self.weight_decay = weight_decay diff --git a/qlib/contrib/model/pytorch_utils.py b/qlib/contrib/model/pytorch_utils.py new file mode 100644 index 000000000..532969eb5 --- /dev/null +++ b/qlib/contrib/model/pytorch_utils.py @@ -0,0 +1,21 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +import numpy as np +import torch.nn as nn + + +def count_parameters(model_or_parameters, unit="mb"): + if isinstance(model_or_parameters, nn.Module): + counts = np.sum(np.prod(v.size()) for v in model_or_parameters.parameters()) + else: + counts = np.sum(np.prod(v.size()) for v in model_or_parameters) + if unit.lower() == "mb": + counts /= 1e6 + elif unit.lower() == "kb": + counts /= 1e3 + elif unit.lower() == "gb": + counts /= 1e9 + elif unit is not None: + raise ValueError("Unknow unit: {:}".format(unit)) + return counts