1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00

Add ALSTM config

This commit is contained in:
Jactus
2020-11-25 19:29:30 +08:00
parent 05599d1de8
commit a99db6a1dc
10 changed files with 139 additions and 53 deletions

View File

@@ -196,10 +196,12 @@ Here is a list of models built on `Qlib`.
- [MLP based on pytorch](qlib/contrib/model/pytorch_nn.py)
- [GRU based on pytorch](qlib/contrib/model/pytorch_gru.py)
- [LSTM based on pytorcn](qlib/contrib/model/pytorch_lstm.py)
- [ALSTM based on pytorcn](qlib/contrib/model/pytorch_alstm.py)
- [GATs based on pytorch](qlib/contrib/model/pytorch_gats.py)
- [TabNet based on pytorch](qlib/contrib/model/tabnet.py)
- [SFM based on pytorch](qlib/contrib/model/pytorch_sfm.py)
<!-- - [TFT based on tensorflow](examples/benchmarks/TFT/tft.py) -->
- [HATs based on pytorch](qlib/contrib/model/pytorch_hats.py)
- [TFT based on tensorflow](examples/benchmarks/TFT/tft.py)
Your PR of new Quant models is highly welcomed.

View File

@@ -0,0 +1,4 @@
numpy==1.17.4
pandas==1.1.2
scikit_learn==0.23.2
torch==1.7.0

View File

@@ -0,0 +1,69 @@
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
market: &market csi300
benchmark: &benchmark SH000300
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy.strategy
kwargs:
topk: 50
n_drop: 5
backtest:
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: ALSTM
module_path: qlib.contrib.model.pytorch_alstm
kwargs:
d_feat: 6
hidden_size: 64
num_layers: 2
dropout: 0.0
n_epochs: 200
lr: 1e-3
early_stop: 20
batch_size: 800
metric: IC
loss: mse
seed: 0
GPU: 0
rnn_type: GRU
dataset:
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: ALPHA360_Denoise
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
ana_long_short: False
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -74,7 +74,7 @@ if __name__ == "__main__":
"loss": "mse",
"seed": 0,
"GPU": 0,
"rnn_type": "GRU"
"rnn_type": "GRU",
},
},
"dataset": {
@@ -142,4 +142,4 @@ if __name__ == "__main__":
report_normal["return"] - report_normal["bench"] - report_normal["cost"]
)
analysis_df = pd.concat(analysis) # type: pd.DataFrame
print(analysis_df)
print(analysis_df)

View File

@@ -100,7 +100,7 @@ if __name__ == "__main__":
# model = train_model(task)
model = init_instance_by_config(task["model"])
dataset = init_instance_by_config(task["dataset"])
model.fit(dataset,save_path='benchmarks/HATS/model_hat.pkl')
model.fit(dataset, save_path="benchmarks/HATS/model_hat.pkl")
pred_score = model.predict(dataset)

View File

@@ -228,7 +228,7 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **k
strategy type: TopkAmountStrategy ,TopkWeightStrategy or TopkDropoutStrategy
- **exchange related arguments**
exchange: Exchange()
pass the exchange for speeding up.
subscribe_fields: list

View File

@@ -345,7 +345,6 @@ class GRUModel(nn.Module):
return self.fc_out(out[:, -1, :]).squeeze()
class ALSTMModel(nn.Module):
def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, rnn_type="GRU"):
super().__init__()
@@ -360,33 +359,36 @@ class ALSTMModel(nn.Module):
try:
klass = getattr(nn, self.rnn_type.upper())
except:
raise ValueError('unknown rnn_type `%s`' % self.rnn_type)
raise ValueError("unknown rnn_type `%s`" % self.rnn_type)
self.net = nn.Sequential()
self.net.add_module('fc_in', nn.Linear(in_features=self.input_size, out_features=self.hid_size))
self.net.add_module('act', nn.Tanh())
self.rnn = klass(input_size=self.hid_size,
hidden_size=self.hid_size,
num_layers=self.rnn_layer,
batch_first=True,
dropout=self.dropout)
self.fc_out = nn.Linear(in_features=self.hid_size*2, out_features=1)
self.net.add_module("fc_in", nn.Linear(in_features=self.input_size, out_features=self.hid_size))
self.net.add_module("act", nn.Tanh())
self.rnn = klass(
input_size=self.hid_size,
hidden_size=self.hid_size,
num_layers=self.rnn_layer,
batch_first=True,
dropout=self.dropout,
)
self.fc_out = nn.Linear(in_features=self.hid_size * 2, out_features=1)
# self.fc_out = nn.Linear(in_features=self.hid_size, out_features=1)
self.att_net = nn.Sequential()
self.att_net.add_module('att_fc_in', nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size/2)))
self.att_net.add_module('att_dropout', torch.nn.Dropout(self.dropout))
self.att_net.add_module('att_act', nn.Tanh())
self.att_net.add_module('att_fc_out', nn.Linear(in_features=int(self.hid_size/2), out_features=1, bias=False))
self.att_net.add_module('att_softmax', nn.Softmax(dim=1))
self.att_net.add_module("att_fc_in", nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)))
self.att_net.add_module("att_dropout", torch.nn.Dropout(self.dropout))
self.att_net.add_module("att_act", nn.Tanh())
self.att_net.add_module("att_fc_out", nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False))
self.att_net.add_module("att_softmax", nn.Softmax(dim=1))
def forward(self, inputs):
# inputs: [batch_size, input_size*input_day]
inputs = inputs.view(len(inputs), self.input_size, -1)
inputs = inputs.permute(0, 2, 1) # [batch, input_size, seq_len] -> [batch, seq_len, input_size]
rnn_out, _ = self.rnn(self.net(inputs)) # [batch, seq_len, num_directions * hidden_size]
attention_score = self.att_net(rnn_out) # [batch, seq_len, 1]
inputs = inputs.permute(0, 2, 1) # [batch, input_size, seq_len] -> [batch, seq_len, input_size]
rnn_out, _ = self.rnn(self.net(inputs)) # [batch, seq_len, num_directions * hidden_size]
attention_score = self.att_net(rnn_out) # [batch, seq_len, 1]
out_att = torch.mul(rnn_out, attention_score)
out_att = torch.sum(out_att, dim=1)
out = self.fc_out(torch.cat((rnn_out[:, -1, :], out_att), dim=1)) # [batch, seq_len, num_directions * hidden_size] -> [batch, 1]
out = self.fc_out(
torch.cat((rnn_out[:, -1, :], out_att), dim=1)
) # [batch, seq_len, num_directions * hidden_size] -> [batch, 1]
# out = self.fc_out(rnn_out[:, -1, :] + out_att)
return out[..., 0]

View File

@@ -265,12 +265,14 @@ class GAT(Model):
self.logger.info("Loading pretrained model...")
if self.base_model == "LSTM":
from ...contrib.model.pytorch_lstm import LSTMModel
pretrained_model = LSTMModel()
pretrained_model.load_state_dict(torch.load('benchmarks/LSTM/model_lstm_csi300.pkl'))
pretrained_model.load_state_dict(torch.load("benchmarks/LSTM/model_lstm_csi300.pkl"))
elif self.base_model == "GRU":
from ...contrib.model.pytorch_gru import GRUModel
pretrained_model = GRUModel()
pretrained_model.load_state_dict(torch.load('benchmarks/GRU/model_gru_csi300.pkl'))
pretrained_model.load_state_dict(torch.load("benchmarks/GRU/model_gru_csi300.pkl"))
model_dict = self.GAT_model.state_dict()
pretrained_dict = {k: v for k, v in pretrained_model.state_dict().items() if k in model_dict}
model_dict.update(pretrained_dict)

View File

@@ -78,7 +78,7 @@ class HATS(Model):
self.optimizer = optimizer.lower()
self.loss = loss
self.base_model = base_model
self.with_pretrain = with_pretrain #### True if train HATS with pretrained base model
self.with_pretrain = with_pretrain #### True if train HATS with pretrained base model
self.visible_GPU = GPU
self.use_gpu = torch.cuda.is_available()
self.seed = seed
@@ -97,7 +97,7 @@ class HATS(Model):
"\noptimizer : {}"
"\nloss_type : {}"
"\nbase_model : {}"
"\nwith_pretrain : {}" ##### debug
"\nwith_pretrain : {}" ##### debug
"\nvisible_GPU : {}"
"\nuse_GPU : {}"
"\nseed : {}".format(
@@ -113,7 +113,7 @@ class HATS(Model):
optimizer.lower(),
loss,
base_model,
with_pretrain, ### debug
with_pretrain, ### debug
GPU,
self.use_gpu,
seed,
@@ -265,12 +265,14 @@ class HATS(Model):
self.logger.info("loading pretrained model...")
if self.base_model == "LSTM":
from ...contrib.model.pytorch_lstm import LSTMModel
pretrained_model = LSTMModel()
pretrained_model.load_state_dict(torch.load('benchmarks/LSTM/model_lstm_csi300.pkl'))
pretrained_model.load_state_dict(torch.load("benchmarks/LSTM/model_lstm_csi300.pkl"))
elif self.base_model == "GRU":
from ...contrib.model.pytorch_gru import GRUModel
pretrained_model = GRUModel()
pretrained_model.load_state_dict(torch.load('benchmarks/GRU/model_gru_csi300.pkl'))
pretrained_model.load_state_dict(torch.load("benchmarks/GRU/model_gru_csi300.pkl"))
model_dict = self.HATS_model.state_dict()
# filter unnecessary parameters
@@ -281,7 +283,6 @@ class HATS(Model):
self.HATS_model.load_state_dict(model_dict)
self.logger.info("loading pretrained model Done...")
# train
self.logger.info("training...")
self._fitted = True
@@ -382,22 +383,24 @@ class HATSModel(nn.Module):
self.softmax = nn.Softmax(dim=1)
self.d_feat = d_feat
num_head_att = [1]*num_layers
hidden_dim = [hidden_size]*num_layers
dims = [d_feat] + [d*nh for (d, nh) in zip(hidden_dim, num_head_att[:-1])] + [num_head_att[-1]]
num_head_att = [1] * num_layers
hidden_dim = [hidden_size] * num_layers
dims = [d_feat] + [d * nh for (d, nh) in zip(hidden_dim, num_head_att[:-1])] + [num_head_att[-1]]
in_dims = dims[:-1]
out_dims = [d // nh for (d, nh) in zip(dims[1:], num_head_att)]
self.attn = nn.ModuleList([GraphAttention(i, o, nh, dropout) for (i, o, nh) in zip(in_dims, out_dims,num_head_att)])
self.attn = nn.ModuleList(
[GraphAttention(i, o, nh, dropout) for (i, o, nh) in zip(in_dims, out_dims, num_head_att)]
)
self.bns = nn.ModuleList([nn.BatchNorm1d(dim) for dim in dims[1:-1]])
self.dropout = nn.Dropout(dropout)
self.elu = nn.ELU()
def forward(self, x):
x = x.reshape(len(x), self.d_feat, -1) # [N, F, T]
x = x.permute(0, 2, 1) # [N, T, F]
out,_ = self.model(x)
x = x.reshape(len(x), self.d_feat, -1) # [N, F, T]
x = x.permute(0, 2, 1) # [N, T, F]
out, _ = self.model(x)
hidden = out[:, -1, :]
hidden = self.bn1(hidden)
hidden = self.bn1(hidden)
attention = GraphAttention.cal_attention(hidden, hidden)
output = attention.mm(hidden)
output = self.fc(output)
@@ -406,9 +409,7 @@ class HATSModel(nn.Module):
return self.fc_out(output).squeeze()
class GraphAttention(nn.Module):
def __init__(self, input_dim, output_dim, num_heads, dropout=0.5):
super().__init__()
@@ -431,7 +432,7 @@ class GraphAttention(nn.Module):
self.num_heads = num_heads
self.fcs = nn.ModuleList([nn.Linear(input_dim, output_dim) for _ in range(num_heads)])
self.a = nn.ModuleList([nn.Linear(2*output_dim, 1) for _ in range(num_heads)])
self.a = nn.ModuleList([nn.Linear(2 * output_dim, 1) for _ in range(num_heads)])
self.dropout = nn.Dropout(dropout)
self.softmax = nn.Softmax(dim=0)
@@ -465,7 +466,6 @@ class GraphAttention(nn.Module):
sum_degs = np.hstack(([0], np.cumsum([len(row) for row in rows])))
mapped_nodes = [mapping[v] for v in nodes]
indices = torch.LongTensor([[v, c] for (v, row) in zip(mapped_nodes, rows) for c in row]).t()
out = []
for k in range(self.num_heads):
@@ -477,7 +477,7 @@ class GraphAttention(nn.Module):
e = self.leakyrelu(self.a[k](cat_h))
alpha = [self.softmax(e[lo : hi]) for (lo, hi) in zip(sum_degs, sum_degs[1:])]
alpha = [self.softmax(e[lo:hi]) for (lo, hi) in zip(sum_degs, sum_degs[1:])]
alpha = torch.cat(tuple(alpha), dim=0)
alpha = alpha.squeeze(1)
alpha = self.dropout(alpha)
@@ -487,11 +487,18 @@ class GraphAttention(nn.Module):
return out
def cal_attention(x, y):
att_x = torch.mean(x, dim = 1).reshape(-1, 1)
att_y = torch.mean(y, dim = 1).reshape(-1, 1)
def cal_attention(x, y):
att_x = torch.mean(x, dim=1).reshape(-1, 1)
att_y = torch.mean(y, dim=1).reshape(-1, 1)
att = att_x.mm(torch.t(att_y))
x_att = x.reshape(x.shape[0], 1, x.shape[1]).repeat(1, y.shape[0], 1)
y_att = y.reshape(1, y.shape[0], y.shape[1]).repeat(x.shape[0], 1, 1)
return torch.mean(x.reshape(x.shape[0], 1, x.shape[1]).repeat(1, y.shape[0], 1)*y.reshape(1, y.shape[0], y.shape[1]).repeat(x.shape[0], 1, 1), dim = 2)-att
return (
torch.mean(
x.reshape(x.shape[0], 1, x.shape[1]).repeat(1, y.shape[0], 1)
* y.reshape(1, y.shape[0], y.shape[1]).repeat(x.shape[0], 1, 1),
dim=2,
)
- att
)

View File

@@ -18,7 +18,7 @@ class Dataset(Serializable):
- setup data
- The data related attributes' names should start with '_' so that it will not be saved on disk when serializing
- initialize the state of the dataset(info to prepare the data)
- The name of essential state for preparing data should not start with '_' so that it could be serialized on disk when serializing.
@@ -99,7 +99,7 @@ class DatasetH(Dataset):
Here are some examples:
.. code-block::
1) 'segments': {
'train': ("2008-01-01", "2014-12-31"),
'valid': ("2017-01-01", "2020-08-01",),