1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00

Update all baseline models.

This commit is contained in:
lwwang1995
2020-11-27 22:30:05 +08:00
parent 7952d79932
commit bebce24a7c
17 changed files with 282 additions and 856 deletions

View File

@@ -8,6 +8,20 @@ data_handler_config: &data_handler_config
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
infer_processors:
- class: RobustZScoreNorm
kwargs:
fields_group: feature
clip_outlier: true
- class: Fillna
kwargs:
fields_group: feature
learn_processors:
- class: DropnaLabel
- class: CSRankNorm
kwargs:
fields_group: label
label: ["Ref($close, -2) / Ref($close, -1) - 1"]
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
@@ -26,8 +40,8 @@ port_analysis_config: &port_analysis_config
min_cost: 5
task:
model:
class: GAT
module_path: qlib.contrib.model.pytorch_gats
class: GAT_Classic
module_path: qlib.contrib.model.pytorch_gats_classic
kwargs:
d_feat: 6
hidden_size: 64
@@ -38,8 +52,7 @@ task:
early_stop: 20
metric: loss
loss: mse
base_model: LSTM
with_pretrain: True
base_model: GRU
seed: 0
GPU: 0
dataset:
@@ -47,7 +60,7 @@ task:
module_path: qlib.data.dataset
kwargs:
handler:
class: ALPHA360_Denoise
class: ALPHA360
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
@@ -58,11 +71,6 @@ task:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
ana_long_short: False
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:

View File

@@ -1,15 +0,0 @@
## Requirement
* pandas==1.1.2
* numpy==1.17.4
* scikit_learn==0.23.2
* torch==1.7.0
## HATS
* HATS is a a hierarchical attention network for stock prediction which uses relational data for stock market prediction. HATS selectively aggregates information
on different relation types and adds the information to the representations of each company. HATS is used as a relational modeling module with initialized node representations.Furthermore, HATS
can predict not only individual stock prices but also market index movements, which is similar to the graph classification task.
* HATS uses pretrained model of GRU and LSTM. The code of GRU and LSTM used in Qlib is a pyTorch implemention of GRU and LSTM.
* Paper address:HATS: A Hierarchical Graph Attention Network for Stock Movement Prediction https://arxiv.org/pdf/1908.07999.pdf

View File

@@ -1,4 +0,0 @@
pandas==1.1.2
numpy==1.17.4
scikit_learn==0.23.2
torch==1.7.0

View File

@@ -1,77 +0,0 @@
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
market: &market csi300
benchmark: &benchmark SH000300
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
infer_processors:
- class: RobustZScoreNorm
kwargs:
fields_group: feature
clip_outlier: true
- class: Fillna
kwargs:
fields_group: feature
learn_processors:
- class: DropnaLabel
- class: CSRankNorm
kwargs:
fields_group: label
label: ["Ref($close, -2) / Ref($close, -1) - 1"]
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy.strategy
kwargs:
topk: 50
n_drop: 5
backtest:
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: HATS
module_path: qlib.contrib.model.pytorch_hats
kwargs:
d_feat: 6
hidden_size: 64
num_layers: 2
dropout: 0.6
n_epochs: 200
lr: 1e-3
early_stop: 20
metric: loss
loss: mse
base_model: GRU
seed: 0
GPU: 0
dataset:
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: ALPHA360
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs: {}
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -1,4 +0,0 @@
# TabNet
* TabNet is a novel high-performance and interpretable canonical deep tabular data learning architectur. TabNet uses sequential attention to choose which features to reason from at each decision step, enabling interpretability and more effcient learning as the learning capacity is used for the most salient features.
* The code used in Qlib is a pyTorch implementation of Tabnet (Arik, S. O., & Pfister, T. (2019). [https://github.com/dreamquark-ai/tabnet](https://github.com/dreamquark-ai/tabnet)
* Paper: TabNet: Attentive Interpretable Tabular Learning. [https://arxiv.org/pdf/1908.07442.pdf](https://arxiv.org/pdf/1908.07442.pdf).

View File

@@ -1,5 +0,0 @@
pandas==1.1.2
numpy==1.17.4
scikit_learn==0.23.2
torch==1.7.0
pytorch-tabnet==2.0.1

View File

@@ -1,66 +0,0 @@
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
market: &market csi300
benchmark: &benchmark SH000300
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy.strategy
kwargs:
topk: 50
n_drop: 5
backtest:
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: TabNetModel
module_path: qlib.contrib.model.tabnet
kwargs:
n_d: 8
n_a: 8
n_steps: 3
gamma: 1.3
n_independent: 2
n_shared: 2
seed: 0
momentum: 0.02
lambda_sparse: 1e-3
optimizer_params: {lr: 2e-3}
dataset:
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: ALPHA360_Denoise
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
ana_long_short: False
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -41,14 +41,18 @@ class CatBoostModel(Model):
**kwargs
):
df_train, df_valid = dataset.prepare(
["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
["train", "valid"],
col_set=["feature", "label"],
data_key=DataHandlerLP.DK_L,
)
x_train, y_train = df_train["feature"], df_train["label"]
x_valid, y_valid = df_valid["feature"], df_valid["label"]
# CatBoost needs 1D array as its label
if y_train.values.ndim == 2 and y_train.values.shape[1] == 1:
y_train_1d, y_valid_1d = np.squeeze(y_train.values), np.squeeze(y_valid.values)
y_train_1d, y_valid_1d = np.squeeze(y_train.values), np.squeeze(
y_valid.values
)
else:
raise ValueError("CatBoost doesn't support multi-label training")

View File

@@ -11,7 +11,12 @@ import pandas as pd
import copy
from sklearn.metrics import roc_auc_score, mean_squared_error
import logging
from ...utils import unpack_archive_with_buffer, save_multiple_parts_file, create_save_path, drop_nan_by_y_index
from ...utils import (
unpack_archive_with_buffer,
save_multiple_parts_file,
create_save_path,
drop_nan_by_y_index,
)
from ...log import get_module_logger, TimeInspector
import torch
@@ -109,14 +114,19 @@ class ALSTM(Model):
)
self.ALSTM_model = ALSTMModel(
d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout
d_feat=self.d_feat,
hidden_size=self.hidden_size,
num_layers=self.num_layers,
dropout=self.dropout,
)
if optimizer.lower() == "adam":
self.train_optimizer = optim.Adam(self.ALSTM_model.parameters(), lr=self.lr)
elif optimizer.lower() == "gd":
self.train_optimizer = optim.SGD(self.ALSTM_model.parameters(), lr=self.lr)
else:
raise NotImplementedError("optimizer {} is not supported!".format(optimizer))
raise NotImplementedError(
"optimizer {} is not supported!".format(optimizer)
)
self._fitted = False
if self.use_gpu:
@@ -141,7 +151,7 @@ class ALSTM(Model):
mask = torch.isfinite(label)
if self.metric == "" or self.metric == "loss": # use loss
if self.metric == "" or self.metric == "loss":
return -self.loss_fn(pred[mask], label[mask])
raise ValueError("unknown metric `%s`" % self.metric)
@@ -161,8 +171,12 @@ class ALSTM(Model):
if len(indices) - i < self.batch_size:
break
feature = torch.from_numpy(x_train_values[indices[i : i + self.batch_size]]).float()
label = torch.from_numpy(y_train_values[indices[i : i + self.batch_size]]).float()
feature = torch.from_numpy(
x_train_values[indices[i : i + self.batch_size]]
).float()
label = torch.from_numpy(
y_train_values[indices[i : i + self.batch_size]]
).float()
if self.use_gpu:
feature = feature.cuda()
@@ -194,7 +208,9 @@ class ALSTM(Model):
if len(indices) - i < self.batch_size:
break
feature = torch.from_numpy(x_values[indices[i : i + self.batch_size]]).float()
feature = torch.from_numpy(
x_values[indices[i : i + self.batch_size]]
).float()
label = torch.from_numpy(y_values[indices[i : i + self.batch_size]]).float()
if self.use_gpu:
@@ -219,7 +235,9 @@ class ALSTM(Model):
):
df_train, df_valid, df_test = dataset.prepare(
["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
["train", "valid", "test"],
col_set=["feature", "label"],
data_key=DataHandlerLP.DK_L,
)
x_train, y_train = df_train["feature"], df_train["label"]
@@ -302,7 +320,9 @@ class ALSTM(Model):
class ALSTMModel(nn.Module):
def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, rnn_type="GRU"):
def __init__(
self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, rnn_type="GRU"
):
super().__init__()
self.hid_size = hidden_size
self.input_size = d_feat
@@ -317,7 +337,9 @@ class ALSTMModel(nn.Module):
except:
raise ValueError("unknown rnn_type `%s`" % self.rnn_type)
self.net = nn.Sequential()
self.net.add_module("fc_in", nn.Linear(in_features=self.input_size, out_features=self.hid_size))
self.net.add_module(
"fc_in", nn.Linear(in_features=self.input_size, out_features=self.hid_size)
)
self.net.add_module("act", nn.Tanh())
self.rnn = klass(
input_size=self.hid_size,
@@ -328,17 +350,27 @@ class ALSTMModel(nn.Module):
)
self.fc_out = nn.Linear(in_features=self.hid_size * 2, out_features=1)
self.att_net = nn.Sequential()
self.att_net.add_module("att_fc_in", nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)))
self.att_net.add_module(
"att_fc_in",
nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)),
)
self.att_net.add_module("att_dropout", torch.nn.Dropout(self.dropout))
self.att_net.add_module("att_act", nn.Tanh())
self.att_net.add_module("att_fc_out", nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False))
self.att_net.add_module(
"att_fc_out",
nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False),
)
self.att_net.add_module("att_softmax", nn.Softmax(dim=1))
def forward(self, inputs):
# inputs: [batch_size, input_size*input_day]
inputs = inputs.view(len(inputs), self.input_size, -1)
inputs = inputs.permute(0, 2, 1) # [batch, input_size, seq_len] -> [batch, seq_len, input_size]
rnn_out, _ = self.rnn(self.net(inputs)) # [batch, seq_len, num_directions * hidden_size]
inputs = inputs.permute(
0, 2, 1
) # [batch, input_size, seq_len] -> [batch, seq_len, input_size]
rnn_out, _ = self.rnn(
self.net(inputs)
) # [batch, seq_len, num_directions * hidden_size]
attention_score = self.att_net(rnn_out) # [batch, seq_len, 1]
out_att = torch.mul(rnn_out, attention_score)
out_att = torch.sum(out_att, dim=1)

100
qlib/contrib/model/pytorch_gats.py Executable file → Normal file
View File

@@ -19,10 +19,12 @@ import torch.optim as optim
from ...model.base import Model
from ...data.dataset import DatasetH
from ...data.dataset.handler import DataHandlerLP
from ...contrib.model.pytorch_lstm import LSTMModel
from ...contrib.model.pytorch_gru import GRUModel
class GAT(Model):
"""GAT Model
class GATs(Model):
"""GATs Model
Parameters
----------
@@ -57,8 +59,8 @@ class GAT(Model):
**kwargs
):
# Set logger.
self.logger = get_module_logger("GAT")
self.logger.info("GAT pytorch version...")
self.logger = get_module_logger("GATs")
self.logger.info("GATs pytorch version...")
# set hyper-parameters.
self.d_feat = d_feat
@@ -78,7 +80,7 @@ class GAT(Model):
self.seed = seed
self.logger.info(
"GAT parameters setting:"
"GATs parameters setting:"
"\nd_feat : {}"
"\nhidden_size : {}"
"\nnum_layers : {}"
@@ -124,7 +126,9 @@ class GAT(Model):
elif optimizer.lower() == "gd":
self.train_optimizer = optim.SGD(self.GAT_model.parameters(), lr=self.lr)
else:
raise NotImplementedError("optimizer {} is not supported!".format(optimizer))
raise NotImplementedError(
"optimizer {} is not supported!".format(optimizer)
)
self._fitted = False
if self.use_gpu:
@@ -149,18 +153,18 @@ class GAT(Model):
mask = torch.isfinite(label)
if self.metric == "" or self.metric == "loss": # use loss
if self.metric == "" or self.metric == "loss":
return -self.loss_fn(pred[mask], label[mask])
raise ValueError("unknown metric `%s`" % self.metric)
def get_daily_inter(self, df, shuffle=False):
# organize the train data into daily inter as daily batches
# organize the train data into daily batches
daily_count = df.groupby(level=0).size().values
daily_index = np.roll(np.cumsum(daily_count), 1)
daily_index[0] = 0
if shuffle:
# shuffle the daily inter data
# shuffle data
daily_shuffle = list(zip(daily_index, daily_count))
np.random.shuffle(daily_shuffle)
daily_index, daily_count = zip(*daily_shuffle)
@@ -172,7 +176,7 @@ class GAT(Model):
y_train_values = np.squeeze(y_train.values)
self.GAT_model.train()
# organize the train data into daily inter as daily batches
# organize the train data into daily batches
daily_index, daily_count = self.get_daily_inter(x_train, shuffle=True)
for idx, count in zip(daily_index, daily_count):
@@ -203,7 +207,7 @@ class GAT(Model):
scores = []
losses = []
# organize the test data into daily inter as daily batches
# organize the test data into daily batches
daily_index, daily_count = self.get_daily_inter(data_x, shuffle=False)
for idx, count in zip(daily_index, daily_count):
@@ -233,7 +237,9 @@ class GAT(Model):
):
df_train, df_valid, df_test = dataset.prepare(
["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
["train", "valid", "test"],
col_set=["feature", "label"],
data_key=DataHandlerLP.DK_L,
)
x_train, y_train = df_train["feature"], df_train["label"]
@@ -251,17 +257,23 @@ class GAT(Model):
if self.with_pretrain:
self.logger.info("Loading pretrained model...")
if self.base_model == "LSTM":
from ...contrib.model.pytorch_lstm import LSTMModel
pretrained_model = LSTMModel()
pretrained_model.load_state_dict(torch.load("benchmarks/LSTM/model_lstm_csi300.pkl"))
elif self.base_model == "GRU":
from ...contrib.model.pytorch_gru import GRUModel
pretrained_model.load_state_dict(
torch.load("benchmarks/LSTM/model_lstm_csi300.pkl")
)
elif self.base_model == "GRU":
pretrained_model = GRUModel()
pretrained_model.load_state_dict(torch.load("benchmarks/GRU/model_gru_csi300.pkl"))
pretrained_model.load_state_dict(
torch.load("benchmarks/GRU/model_gru_csi300.pkl")
)
model_dict = self.GAT_model.state_dict()
pretrained_dict = {k: v for k, v in pretrained_model.state_dict().items() if k in model_dict}
pretrained_dict = {
k: v
for k, v in pretrained_model.state_dict().items()
if k in model_dict
}
model_dict.update(pretrained_dict)
self.GAT_model.load_state_dict(model_dict)
self.logger.info("Loading pretrained model Done...")
@@ -269,7 +281,6 @@ class GAT(Model):
# train
self.logger.info("training...")
self._fitted = True
# return
for step in range(self.n_epochs):
self.logger.info("Epoch%d:", step)
@@ -310,7 +321,7 @@ class GAT(Model):
x_values = x_test.values
preds = []
# organize the data into daily inter as daily batches
# organize the data into daily batches
daily_index, daily_count = self.get_daily_inter(x_test, shuffle=False)
for idx, count in zip(daily_index, daily_count):
@@ -332,7 +343,9 @@ class GAT(Model):
class GATModel(nn.Module):
def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, base_model="GRU"):
def __init__(
self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, base_model="GRU"
):
super().__init__()
if base_model == "GRU":
@@ -355,22 +368,29 @@ class GATModel(nn.Module):
raise ValueError("unknown base model name `%s`" % base_model)
self.hidden_size = hidden_size
self.bn1 = nn.BatchNorm1d(num_features=hidden_size, track_running_stats=False)
self.fc = nn.Linear(hidden_size, hidden_size)
self.bn2 = nn.BatchNorm1d(num_features=hidden_size, track_running_stats=False)
self.d_feat = d_feat
self.transformation = nn.Linear(self.hidden_size, self.hidden_size)
self.a = nn.Parameter(torch.randn(self.hidden_size * 2, 1))
self.a.requires_grad = True
self.fc = nn.Linear(self.hidden_size, self.hidden_size)
self.fc_out = nn.Linear(hidden_size, 1)
self.leaky_relu = nn.LeakyReLU()
self.softmax = nn.Softmax(dim=1)
self.d_feat = d_feat
def cal_convariance(self, x, y): # the 2nd dimension of x and y are the same
e_x = torch.mean(x, dim=1).reshape(-1, 1)
e_y = torch.mean(y, dim=1).reshape(-1, 1)
e_x_e_y = e_x.mm(torch.t(e_y))
x_extend = x.reshape(x.shape[0], 1, x.shape[1]).repeat(1, y.shape[0], 1)
y_extend = y.reshape(1, y.shape[0], y.shape[1]).repeat(x.shape[0], 1, 1)
e_xy = torch.mean(x_extend * y_extend, dim=2)
return e_xy - e_x_e_y
def cal_attention(self, x, y):
x = self.transformation(x)
y = self.transformation(y)
sample_num = x.shape[0]
dim = x.shape[1]
e_x = x.expand(sample_num, sample_num, dim)
e_y = torch.transpose(e_x, 0, 1)
attention_in = torch.cat((e_x, e_y), 2).view(-1, dim * 2)
self.a_t = torch.t(self.a)
attention_out = self.a_t.mm(torch.t(attention_in)).view(sample_num, sample_num)
attention_out = self.leaky_relu(attention_out)
att_weight = self.softmax(attention_out)
return att_weight
def forward(self, x):
# x: [N, F*T]
@@ -378,10 +398,8 @@ class GATModel(nn.Module):
x = x.permute(0, 2, 1) # [N, T, F]
out, _ = self.rnn(x)
hidden = out[:, -1, :]
hidden = self.bn1(hidden)
gamma = self.cal_convariance(hidden, hidden)
output = gamma.mm(hidden)
output = self.fc(output)
output = self.bn2(output)
output = self.leaky_relu(output)
return self.fc_out(output).squeeze()
att_weight = self.cal_attention(hidden, hidden)
hidden = att_weight.mm(hidden) + hidden
hidden = self.fc(hidden)
hidden = self.leaky_relu(hidden)
return self.fc_out(hidden).squeeze()

View File

@@ -11,7 +11,12 @@ import pandas as pd
import copy
from sklearn.metrics import roc_auc_score, mean_squared_error
import logging
from ...utils import unpack_archive_with_buffer, save_multiple_parts_file, create_save_path, drop_nan_by_y_index
from ...utils import (
unpack_archive_with_buffer,
save_multiple_parts_file,
create_save_path,
drop_nan_by_y_index,
)
from ...log import get_module_logger, TimeInspector
import torch
@@ -109,14 +114,19 @@ class GRU(Model):
)
self.gru_model = GRUModel(
d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout
d_feat=self.d_feat,
hidden_size=self.hidden_size,
num_layers=self.num_layers,
dropout=self.dropout,
)
if optimizer.lower() == "adam":
self.train_optimizer = optim.Adam(self.gru_model.parameters(), lr=self.lr)
elif optimizer.lower() == "gd":
self.train_optimizer = optim.SGD(self.gru_model.parameters(), lr=self.lr)
else:
raise NotImplementedError("optimizer {} is not supported!".format(optimizer))
raise NotImplementedError(
"optimizer {} is not supported!".format(optimizer)
)
self._fitted = False
if self.use_gpu:
@@ -141,7 +151,7 @@ class GRU(Model):
mask = torch.isfinite(label)
if self.metric == "" or self.metric == "loss": # use loss
if self.metric == "" or self.metric == "loss":
return -self.loss_fn(pred[mask], label[mask])
raise ValueError("unknown metric `%s`" % self.metric)
@@ -161,8 +171,12 @@ class GRU(Model):
if len(indices) - i < self.batch_size:
break
feature = torch.from_numpy(x_train_values[indices[i : i + self.batch_size]]).float()
label = torch.from_numpy(y_train_values[indices[i : i + self.batch_size]]).float()
feature = torch.from_numpy(
x_train_values[indices[i : i + self.batch_size]]
).float()
label = torch.from_numpy(
y_train_values[indices[i : i + self.batch_size]]
).float()
if self.use_gpu:
feature = feature.cuda()
@@ -194,7 +208,9 @@ class GRU(Model):
if len(indices) - i < self.batch_size:
break
feature = torch.from_numpy(x_values[indices[i : i + self.batch_size]]).float()
feature = torch.from_numpy(
x_values[indices[i : i + self.batch_size]]
).float()
label = torch.from_numpy(y_values[indices[i : i + self.batch_size]]).float()
if self.use_gpu:
@@ -219,7 +235,9 @@ class GRU(Model):
):
df_train, df_valid, df_test = dataset.prepare(
["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
["train", "valid", "test"],
col_set=["feature", "label"],
data_key=DataHandlerLP.DK_L,
)
x_train, y_train = df_train["feature"], df_train["label"]

View File

@@ -1,491 +0,0 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
from __future__ import print_function
import os
import numpy as np
import pandas as pd
import copy
from ...utils import create_save_path
from ...log import get_module_logger
import torch
import torch.nn as nn
import torch.optim as optim
from ...model.base import Model
from ...data.dataset import DatasetH
from ...data.dataset.handler import DataHandlerLP
class HATS(Model):
"""HATS Model
Parameters
----------
d_feat : int
input dimension for each time step
metric: str
the evaluate metric used in early stop
optimizer : str
optimizer name
GPU : str
the GPU ID(s) used for training
"""
def __init__(
self,
d_feat=6,
hidden_size=64,
num_layers=2,
dropout=0.5,
n_epochs=200,
lr=0.01,
metric="",
early_stop=20,
loss="mse",
base_model="GRU",
with_pretrain=True,
optimizer="adam",
GPU="0",
seed=0,
**kwargs
):
# Set logger.
self.logger = get_module_logger("HATS")
self.logger.info("HATS pytorch version...")
# set hyper-parameters.
self.d_feat = d_feat
self.hidden_size = hidden_size
self.num_layers = num_layers
self.dropout = dropout
self.n_epochs = n_epochs
self.lr = lr
self.metric = metric
self.early_stop = early_stop
self.optimizer = optimizer.lower()
self.loss = loss
self.base_model = base_model
self.with_pretrain = with_pretrain
self.visible_GPU = GPU
self.use_gpu = torch.cuda.is_available()
self.seed = seed
self.logger.info(
"HATS parameters setting:"
"\nd_feat : {}"
"\nhidden_size : {}"
"\nnum_layers : {}"
"\ndropout : {}"
"\nn_epochs : {}"
"\nlr : {}"
"\nmetric : {}"
"\nearly_stop : {}"
"\noptimizer : {}"
"\nloss_type : {}"
"\nbase_model : {}"
"\nwith_pretrain : {}"
"\nvisible_GPU : {}"
"\nuse_GPU : {}"
"\nseed : {}".format(
d_feat,
hidden_size,
num_layers,
dropout,
n_epochs,
lr,
metric,
early_stop,
optimizer.lower(),
loss,
base_model,
with_pretrain,
GPU,
self.use_gpu,
seed,
)
)
self.HATS_model = HATSModel(
d_feat=self.d_feat,
hidden_size=self.hidden_size,
num_layers=self.num_layers,
dropout=self.dropout,
base_model=self.base_model,
)
if optimizer.lower() == "adam":
self.train_optimizer = optim.Adam(self.HATS_model.parameters(), lr=self.lr)
elif optimizer.lower() == "gd":
self.train_optimizer = optim.SGD(self.HATS_model.parameters(), lr=self.lr)
else:
raise NotImplementedError("optimizer {} is not supported!".format(optimizer))
self._fitted = False
if self.use_gpu:
self.HATS_model.cuda()
# set the visible GPU
if self.visible_GPU:
os.environ["CUDA_VISIBLE_DEVICES"] = self.visible_GPU
def mse(self, pred, label):
loss = (pred - label) ** 2
return torch.mean(loss)
def loss_fn(self, pred, label):
mask = ~torch.isnan(label)
if self.loss == "mse":
return self.mse(pred[mask], label[mask])
raise ValueError("unknown loss `%s`" % self.loss)
def metric_fn(self, pred, label):
mask = torch.isfinite(label)
if self.metric == "" or self.metric == "loss": # use loss
return -self.loss_fn(pred[mask], label[mask])
raise ValueError("unknown metric `%s`" % self.metric)
def get_daily_inter(self, df, shuffle=False):
# organize the train data into daily inter as daily batches
daily_count = df.groupby(level=0).size().values
daily_index = np.roll(np.cumsum(daily_count), 1)
daily_index[0] = 0
if shuffle:
# shuffle the daily inter data
daily_shuffle = list(zip(daily_index, daily_count))
np.random.shuffle(daily_shuffle)
daily_index, daily_count = zip(*daily_shuffle)
return daily_index, daily_count
def train_epoch(self, x_train, y_train):
x_train_values = x_train.values
y_train_values = np.squeeze(y_train.values)
self.HATS_model.train()
# organize the train data into daily inter as daily batches
daily_index, daily_count = self.get_daily_inter(x_train, shuffle=True)
for idx, count in zip(daily_index, daily_count):
batch = slice(idx, idx + count)
feature = torch.from_numpy(x_train_values[batch]).float()
label = torch.from_numpy(y_train_values[batch]).float()
if self.use_gpu:
feature = feature.cuda()
label = label.cuda()
pred = self.HATS_model(feature)
loss = self.loss_fn(pred, label)
self.train_optimizer.zero_grad()
loss.backward()
torch.nn.utils.clip_grad_value_(self.HATS_model.parameters(), 3.0)
self.train_optimizer.step()
def test_epoch(self, data_x, data_y):
# prepare testing data
x_values = data_x.values
y_values = np.squeeze(data_y.values)
self.HATS_model.eval()
scores = []
losses = []
# organize the test data into daily inter as daily batches
daily_index, daily_count = self.get_daily_inter(data_x, shuffle=False)
for idx, count in zip(daily_index, daily_count):
batch = slice(idx, idx + count)
feature = torch.from_numpy(x_values[batch]).float()
label = torch.from_numpy(y_values[batch]).float()
if self.use_gpu:
feature = feature.cuda()
label = label.cuda()
pred = self.HATS_model(feature)
loss = self.loss_fn(pred, label)
losses.append(loss.item())
score = self.metric_fn(pred, label)
scores.append(score.item())
return np.mean(losses), np.mean(scores)
def fit(
self,
dataset: DatasetH,
evals_result=dict(),
verbose=True,
save_path=None,
):
df_train, df_valid, df_test = dataset.prepare(
["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
)
x_train, y_train = df_train["feature"], df_train["label"]
x_valid, y_valid = df_valid["feature"], df_valid["label"]
if save_path == None:
save_path = create_save_path(save_path)
stop_steps = 0
best_score = -np.inf
best_epoch = 0
evals_result["train"] = []
evals_result["valid"] = []
# load pretrained base_model
if self.with_pretrain:
self.logger.info("Loading pretrained model...")
if self.base_model == "LSTM":
from ...contrib.model.pytorch_lstm import LSTMModel
pretrained_model = LSTMModel()
pretrained_model.load_state_dict(torch.load("benchmarks/LSTM/model_lstm_csi300.pkl"))
elif self.base_model == "GRU":
from ...contrib.model.pytorch_gru import GRUModel
pretrained_model = GRUModel()
pretrained_model.load_state_dict(torch.load("benchmarks/GRU/model_gru_csi300.pkl"))
model_dict = self.HATS_model.state_dict()
pretrained_dict = {k: v for k, v in pretrained_model.state_dict().items() if k in model_dict}
model_dict.update(pretrained_dict)
self.HATS_model.load_state_dict(model_dict)
self.logger.info("Loading pretrained model Done...")
# train
self.logger.info("training...")
self._fitted = True
for step in range(self.n_epochs):
self.logger.info("Epoch%d:", step)
self.logger.info("training...")
self.train_epoch(x_train, y_train)
self.logger.info("evaluating...")
train_loss, train_score = self.test_epoch(x_train, y_train)
val_loss, val_score = self.test_epoch(x_valid, y_valid)
self.logger.info("train %.6f, valid %.6f" % (train_score, val_score))
evals_result["train"].append(train_score)
evals_result["valid"].append(val_score)
if val_score > best_score:
best_score = val_score
stop_steps = 0
best_epoch = step
best_param = copy.deepcopy(self.HATS_model.state_dict())
else:
stop_steps += 1
if stop_steps >= self.early_stop:
self.logger.info("early stop")
break
self.logger.info("best score: %.6lf @ %d" % (best_score, best_epoch))
self.HATS_model.load_state_dict(best_param)
torch.save(best_param, save_path)
if self.use_gpu:
torch.cuda.empty_cache()
def predict(self, dataset):
if not self._fitted:
raise ValueError("model is not fitted yet!")
x_test = dataset.prepare("test", col_set="feature")
index = x_test.index
self.HATS_model.eval()
x_values = x_test.values
sample_num = x_values.shape[0]
preds = []
# organize the data into daily inter as daily batches
daily_index, daily_count = self.get_daily_inter(x_test, shuffle=False)
for idx, count in zip(daily_index, daily_count):
batch = slice(idx, idx + count)
x_batch = torch.from_numpy(x_values[batch]).float()
if self.use_gpu:
x_batch = x_batch.cuda()
with torch.no_grad():
if self.use_gpu:
pred = self.HATS_model(x_batch).detach().cpu().numpy()
else:
pred = self.HATS_model(x_batch).detach().numpy()
preds.append(pred)
return pd.Series(np.concatenate(preds), index=index)
class HATSModel(nn.Module):
def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, base_model="GRU"):
super().__init__()
if base_model == "GRU":
self.model = nn.GRU(
input_size=d_feat,
hidden_size=hidden_size,
num_layers=num_layers,
batch_first=True,
dropout=dropout,
)
elif base_model == "LSTM":
self.model = nn.LSTM(
input_size=d_feat,
hidden_size=hidden_size,
num_layers=num_layers,
batch_first=True,
dropout=dropout,
)
else:
raise ValueError("unknown base model name `%s`" % base_model)
self.hidden_size = hidden_size
self.bn1 = nn.BatchNorm1d(num_features=hidden_size, track_running_stats=False)
self.fc = nn.Linear(hidden_size, hidden_size)
self.bn2 = nn.BatchNorm1d(num_features=hidden_size, track_running_stats=False)
self.fc_out = nn.Linear(hidden_size, 1)
self.leaky_relu = nn.LeakyReLU()
self.softmax = nn.Softmax(dim=1)
self.d_feat = d_feat
num_head_att = [1] * num_layers
hidden_dim = [hidden_size] * num_layers
dims = [d_feat] + [d * nh for (d, nh) in zip(hidden_dim, num_head_att[:-1])] + [num_head_att[-1]]
in_dims = dims[:-1]
out_dims = [d // nh for (d, nh) in zip(dims[1:], num_head_att)]
self.attn = nn.ModuleList(
[GraphAttention(i, o, nh, dropout) for (i, o, nh) in zip(in_dims, out_dims, num_head_att)]
)
self.bns = nn.ModuleList([nn.BatchNorm1d(dim) for dim in dims[1:-1]])
self.dropout = nn.Dropout(dropout)
self.elu = nn.ELU()
def forward(self, x):
x = x.reshape(len(x), self.d_feat, -1) # [N, F, T]
x = x.permute(0, 2, 1) # [N, T, F]
out, _ = self.model(x)
hidden = out[:, -1, :]
hidden = self.bn1(hidden)
attention = GraphAttention.cal_attention(hidden, hidden)
output = attention.mm(hidden)
output = self.fc(output)
output = self.bn2(output)
output = self.leaky_relu(output)
return self.fc_out(output).squeeze()
class GraphAttention(nn.Module):
def __init__(self, input_dim, output_dim, num_heads, dropout=0.5):
super().__init__()
"""
Parameters
----------
input_dim : int
Dimension of input node features.
output_dim : int
Dimension of output node features.
num_heads : list of ints
Number of attention heads in each hidden layer and output layer. Must be non empty. Note that len(num_heads) = len(hidden_dims)+1.
dropout : float
Dropout rate. Default: 0.5.
"""
self.input_dim = input_dim
self.output_dim = output_dim
self.num_heads = num_heads
self.fcs = nn.ModuleList([nn.Linear(input_dim, output_dim) for _ in range(num_heads)])
self.a = nn.ModuleList([nn.Linear(2 * output_dim, 1) for _ in range(num_heads)])
self.dropout = nn.Dropout(dropout)
self.softmax = nn.Softmax(dim=0)
self.leakyrelu = nn.LeakyReLU()
def forward(self, features, nodes, mappings, rows):
"""
Parameters
----------
features : torch.Tensor
An (n' x input_dim) tensor of input node features.
nodes : list of numpy array
nodes[i] is an array of the nodes in the ith layer of the
computation graph.
mappings : list of dictionary
mappings[i] is a dictionary mappings node v (labelled 0 to |V|-1)
in nodes[i] to its position in nodes[i]. For example,
if nodes[i] = [2,5], then mappings[i][2] = 0 and
mappings[i][5] = 1.
rows : numpy array
rows[i] is an array of neighbors of node i.
Returns
-------
out : torch.Tensor
An (len(node_layers[-1]) x output_dim) tensor of output node features.
"""
nprime = features.shape[0]
rows = [np.array([mappings[v] for v in row], dtype=np.int64) for row in rows]
sum_degs = np.hstack(([0], np.cumsum([len(row) for row in rows])))
mapped_nodes = [mappings[v] for v in nodes]
indices = torch.LongTensor([[v, c] for (v, row) in zip(mapped_nodes, rows) for c in row]).t()
out = []
for k in range(self.num_heads):
h = self.fcs[k](features)
nbr_h = torch.cat(tuple([h[row] for row in rows]), dim=0)
self_h = torch.cat(
tuple([h[mappings[nodes[i]]].repeat(len(row), 1) for (i, row) in enumerate(rows)]), dim=0
)
cat_h = torch.cat((self_h, nbr_h), dim=1)
e = self.leakyrelu(self.a[k](cat_h))
alpha = [self.softmax(e[lo:hi]) for (lo, hi) in zip(sum_degs, sum_degs[1:])]
alpha = torch.cat(tuple(alpha), dim=0)
alpha = alpha.squeeze(1)
alpha = self.dropout(alpha)
adj = torch.sparse.FloatTensor(indices, alpha, torch.Size([nprime, nprime]))
out.append(torch.sparse.mm(adj, h)[mapped_nodes])
return out
@staticmethod
def cal_attention(x, y):
att_x = torch.mean(x, dim=1).reshape(-1, 1)
att_y = torch.mean(y, dim=1).reshape(-1, 1)
att = att_x.mm(torch.t(att_y))
return (
torch.mean(
x.reshape(x.shape[0], 1, x.shape[1]).repeat(1, y.shape[0], 1)
* y.reshape(1, y.shape[0], y.shape[1]).repeat(x.shape[0], 1, 1),
dim=2,
)
- att
)

View File

@@ -11,7 +11,12 @@ import pandas as pd
import copy
from sklearn.metrics import roc_auc_score, mean_squared_error
import logging
from ...utils import unpack_archive_with_buffer, save_multiple_parts_file, create_save_path, drop_nan_by_y_index
from ...utils import (
unpack_archive_with_buffer,
save_multiple_parts_file,
create_save_path,
drop_nan_by_y_index,
)
from ...log import get_module_logger, TimeInspector
import torch
@@ -109,14 +114,19 @@ class LSTM(Model):
)
self.lstm_model = LSTMModel(
d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout
d_feat=self.d_feat,
hidden_size=self.hidden_size,
num_layers=self.num_layers,
dropout=self.dropout,
)
if optimizer.lower() == "adam":
self.train_optimizer = optim.Adam(self.lstm_model.parameters(), lr=self.lr)
elif optimizer.lower() == "gd":
self.train_optimizer = optim.SGD(self.lstm_model.parameters(), lr=self.lr)
else:
raise NotImplementedError("optimizer {} is not supported!".format(optimizer))
raise NotImplementedError(
"optimizer {} is not supported!".format(optimizer)
)
self._fitted = False
if self.use_gpu:
@@ -141,7 +151,7 @@ class LSTM(Model):
mask = torch.isfinite(label)
if self.metric == "" or self.metric == "loss": # use loss
if self.metric == "" or self.metric == "loss":
return -self.loss_fn(pred[mask], label[mask])
raise ValueError("unknown metric `%s`" % self.metric)
@@ -161,8 +171,12 @@ class LSTM(Model):
if len(indices) - i < self.batch_size:
break
feature = torch.from_numpy(x_train_values[indices[i : i + self.batch_size]]).float()
label = torch.from_numpy(y_train_values[indices[i : i + self.batch_size]]).float()
feature = torch.from_numpy(
x_train_values[indices[i : i + self.batch_size]]
).float()
label = torch.from_numpy(
y_train_values[indices[i : i + self.batch_size]]
).float()
if self.use_gpu:
feature = feature.cuda()
@@ -194,7 +208,9 @@ class LSTM(Model):
if len(indices) - i < self.batch_size:
break
feature = torch.from_numpy(x_values[indices[i : i + self.batch_size]]).float()
feature = torch.from_numpy(
x_values[indices[i : i + self.batch_size]]
).float()
label = torch.from_numpy(y_values[indices[i : i + self.batch_size]]).float()
if self.use_gpu:
@@ -219,7 +235,9 @@ class LSTM(Model):
):
df_train, df_valid, df_test = dataset.prepare(
["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
["train", "valid", "test"],
col_set=["feature", "label"],
data_key=DataHandlerLP.DK_L,
)
x_train, y_train = df_train["feature"], df_train["label"]

View File

@@ -19,7 +19,12 @@ import pandas as pd
import copy
from sklearn.metrics import roc_auc_score, mean_squared_error
import logging
from ...utils import unpack_archive_with_buffer, save_multiple_parts_file, create_save_path, drop_nan_by_y_index
from ...utils import (
unpack_archive_with_buffer,
save_multiple_parts_file,
create_save_path,
drop_nan_by_y_index,
)
from ...log import get_module_logger, TimeInspector
import torch
@@ -33,7 +38,16 @@ from ...data.dataset.handler import DataHandlerLP
class SFM_Model(nn.Module):
def __init__(self, d_feat=6, output_dim=1, freq_dim=10, hidden_size=64, dropout_W=0.0, dropout_U=0.0, device="cpu"):
def __init__(
self,
d_feat=6,
output_dim=1,
freq_dim=10,
hidden_size=64,
dropout_W=0.0,
dropout_U=0.0,
device="cpu",
):
super().__init__()
self.input_dim = d_feat
@@ -42,30 +56,52 @@ class SFM_Model(nn.Module):
self.hidden_dim = hidden_size
self.device = device
self.W_i = nn.Parameter(init.xavier_uniform_(torch.empty((self.input_dim, self.hidden_dim))))
self.U_i = nn.Parameter(init.orthogonal_(torch.empty(self.hidden_dim, self.hidden_dim)))
self.W_i = nn.Parameter(
init.xavier_uniform_(torch.empty((self.input_dim, self.hidden_dim)))
)
self.U_i = nn.Parameter(
init.orthogonal_(torch.empty(self.hidden_dim, self.hidden_dim))
)
self.b_i = nn.Parameter(torch.zeros(self.hidden_dim))
self.W_ste = nn.Parameter(init.xavier_uniform_(torch.empty(self.input_dim, self.hidden_dim)))
self.U_ste = nn.Parameter(init.orthogonal_(torch.empty(self.hidden_dim, self.hidden_dim)))
self.W_ste = nn.Parameter(
init.xavier_uniform_(torch.empty(self.input_dim, self.hidden_dim))
)
self.U_ste = nn.Parameter(
init.orthogonal_(torch.empty(self.hidden_dim, self.hidden_dim))
)
self.b_ste = nn.Parameter(torch.ones(self.hidden_dim))
self.W_fre = nn.Parameter(init.xavier_uniform_(torch.empty(self.input_dim, self.freq_dim)))
self.U_fre = nn.Parameter(init.orthogonal_(torch.empty(self.hidden_dim, self.freq_dim)))
self.W_fre = nn.Parameter(
init.xavier_uniform_(torch.empty(self.input_dim, self.freq_dim))
)
self.U_fre = nn.Parameter(
init.orthogonal_(torch.empty(self.hidden_dim, self.freq_dim))
)
self.b_fre = nn.Parameter(torch.ones(self.freq_dim))
self.W_c = nn.Parameter(init.xavier_uniform_(torch.empty(self.input_dim, self.hidden_dim)))
self.U_c = nn.Parameter(init.orthogonal_(torch.empty(self.hidden_dim, self.hidden_dim)))
self.W_c = nn.Parameter(
init.xavier_uniform_(torch.empty(self.input_dim, self.hidden_dim))
)
self.U_c = nn.Parameter(
init.orthogonal_(torch.empty(self.hidden_dim, self.hidden_dim))
)
self.b_c = nn.Parameter(torch.zeros(self.hidden_dim))
self.W_o = nn.Parameter(init.xavier_uniform_(torch.empty(self.input_dim, self.hidden_dim)))
self.U_o = nn.Parameter(init.orthogonal_(torch.empty(self.hidden_dim, self.hidden_dim)))
self.W_o = nn.Parameter(
init.xavier_uniform_(torch.empty(self.input_dim, self.hidden_dim))
)
self.U_o = nn.Parameter(
init.orthogonal_(torch.empty(self.hidden_dim, self.hidden_dim))
)
self.b_o = nn.Parameter(torch.zeros(self.hidden_dim))
self.U_a = nn.Parameter(init.orthogonal_(torch.empty(self.freq_dim, 1)))
self.b_a = nn.Parameter(torch.zeros(self.hidden_dim))
self.W_p = nn.Parameter(init.xavier_uniform_(torch.empty(self.hidden_dim, self.output_dim)))
self.W_p = nn.Parameter(
init.xavier_uniform_(torch.empty(self.hidden_dim, self.output_dim))
)
self.b_p = nn.Parameter(torch.zeros(self.output_dim))
self.activation = nn.Tanh()
@@ -101,8 +137,12 @@ class SFM_Model(nn.Module):
x_o = torch.matmul(x * B_W[0], self.W_o) + self.b_o
i = self.inner_activation(x_i + torch.matmul(h_tm1 * B_U[0], self.U_i))
ste = self.inner_activation(x_ste + torch.matmul(h_tm1 * B_U[0], self.U_ste))
fre = self.inner_activation(x_fre + torch.matmul(h_tm1 * B_U[0], self.U_fre))
ste = self.inner_activation(
x_ste + torch.matmul(h_tm1 * B_U[0], self.U_ste)
)
fre = self.inner_activation(
x_fre + torch.matmul(h_tm1 * B_U[0], self.U_fre)
)
ste = torch.reshape(ste, (-1, self.hidden_dim, 1))
fre = torch.reshape(fre, (-1, 1, self.freq_dim))
@@ -157,7 +197,16 @@ class SFM_Model(nn.Module):
init_state_time = torch.tensor(0).to(self.device)
self.states = [init_state_p, init_state_h, init_state_S_re, init_state_S_im, init_state_time, None, None, None]
self.states = [
init_state_p,
init_state_h,
init_state_S_re,
init_state_S_im,
init_state_time,
None,
None,
None,
]
def get_constants(self, x):
constants = []
@@ -282,7 +331,9 @@ class SFM(Model):
elif optimizer.lower() == "gd":
self.train_optimizer = optim.SGD(self.sfm_model.parameters(), lr=self.lr)
else:
raise NotImplementedError("optimizer {} is not supported!".format(optimizer))
raise NotImplementedError(
"optimizer {} is not supported!".format(optimizer)
)
self._fitted = False
self.sfm_model.to(self.device)
@@ -305,8 +356,16 @@ class SFM(Model):
if len(indices) - i < self.batch_size:
break
feature = torch.from_numpy(x_values[indices[i : i + self.batch_size]]).float().to(self.device)
label = torch.from_numpy(y_values[indices[i : i + self.batch_size]]).float().to(self.device)
feature = (
torch.from_numpy(x_values[indices[i : i + self.batch_size]])
.float()
.to(self.device)
)
label = (
torch.from_numpy(y_values[indices[i : i + self.batch_size]])
.float()
.to(self.device)
)
pred = self.sfm_model(feature)
loss = self.loss_fn(pred, label)
@@ -332,8 +391,16 @@ class SFM(Model):
if len(indices) - i < self.batch_size:
break
feature = torch.from_numpy(x_train_values[indices[i : i + self.batch_size]]).float().to(self.device)
label = torch.from_numpy(y_train_values[indices[i : i + self.batch_size]]).float().to(self.device)
feature = (
torch.from_numpy(x_train_values[indices[i : i + self.batch_size]])
.float()
.to(self.device)
)
label = (
torch.from_numpy(y_train_values[indices[i : i + self.batch_size]])
.float()
.to(self.device)
)
pred = self.sfm_model(feature)
loss = self.loss_fn(pred, label)
@@ -352,7 +419,9 @@ class SFM(Model):
):
df_train, df_valid = dataset.prepare(
["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
["train", "valid"],
col_set=["feature", "label"],
data_key=DataHandlerLP.DK_L,
)
x_train, y_train = df_train["feature"], df_train["label"]
x_valid, y_valid = df_valid["feature"], df_valid["label"]
@@ -409,7 +478,7 @@ class SFM(Model):
mask = torch.isfinite(label)
if self.metric == "" or self.metric == "loss": # use loss
if self.metric == "" or self.metric == "loss":
return -self.loss_fn(pred[mask], label[mask])
raise ValueError("unknown metric `%s`" % self.metric)

View File

@@ -1,85 +0,0 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import numpy as np
import pandas as pd
from pytorch_tabnet.tab_model import TabNetRegressor
from ...model.base import Model
from ...data.dataset import DatasetH
from ...data.dataset.handler import DataHandlerLP
class TabNetModel(Model):
"""TabNetModel Model"""
def __init__(
self,
n_d,
n_a,
n_steps,
gamma,
n_independent,
n_shared,
seed,
momentum,
lambda_sparse,
optimizer_params,
**kwargs
):
self.model = None
self.n_d = n_d
self.n_a = n_a
self.n_steps = n_steps
self.gamma = gamma
self.n_independent = n_independent
self.n_shared = n_shared
self.seed = seed
self.momentum = momentum
self.lambda_sparse = lambda_sparse
self.optimizer_params = optimizer_params
def fit(
self,
dataset: DatasetH,
n_d=8,
n_a=8,
n_steps=3,
gamma=1.3,
n_independent=2,
n_shared=2,
seed=0,
momentum=0.02,
lambda_sparse=1e-3,
optimizer_params={"lr": 2e-3},
**kwargs
):
df_train, df_valid = dataset.prepare(
["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
)
x_train, y_train = df_train["feature"].values, df_train["label"].values * 100
x_valid, y_valid = df_valid["feature"].values, df_valid["label"].values * 100
self.model = TabNetRegressor(
n_d=self.n_d,
n_a=self.n_a,
n_steps=self.n_steps,
gamma=self.gamma,
n_independent=self.n_independent,
n_shared=self.n_shared,
seed=self.seed,
momentum=self.momentum,
lambda_sparse=self.lambda_sparse,
optimizer_params=self.optimizer_params,
**kwargs
)
self.model.fit(x_train, y_train, eval_set=[(x_valid, y_valid)])
def predict(self, dataset):
if self.model is None:
raise ValueError("model is not fitted yet!")
x_test = dataset.prepare("test", col_set="feature")
test_pred = self.model.predict(x_test.values)
return pd.Series(test_pred.reshape([-1]), index=x_test.index)

View File

@@ -38,14 +38,18 @@ class XGBModel(Model):
):
df_train, df_valid = dataset.prepare(
["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
["train", "valid"],
col_set=["feature", "label"],
data_key=DataHandlerLP.DK_L,
)
x_train, y_train = df_train["feature"], df_train["label"]
x_valid, y_valid = df_valid["feature"], df_valid["label"]
# Lightgbm need 1D array as its label
if y_train.values.ndim == 2 and y_train.values.shape[1] == 1:
y_train_1d, y_valid_1d = np.squeeze(y_train.values), np.squeeze(y_valid.values)
y_train_1d, y_valid_1d = np.squeeze(y_train.values), np.squeeze(
y_valid.values
)
else:
raise ValueError("XGBoost doesn't support multi-label training")
@@ -68,4 +72,6 @@ class XGBModel(Model):
if self.model is None:
raise ValueError("model is not fitted yet!")
x_test = dataset.prepare("test", col_set="feature")
return pd.Series(self.model.predict(xgb.DMatrix(x_test.values)), index=x_test.index)
return pd.Series(
self.model.predict(xgb.DMatrix(x_test.values)), index=x_test.index
)