mirror of
https://github.com/microsoft/qlib.git
synced 2026-06-06 05:51:17 +08:00
Add ALSTM config
This commit is contained in:
@@ -196,10 +196,12 @@ Here is a list of models built on `Qlib`.
|
||||
- [MLP based on pytorch](qlib/contrib/model/pytorch_nn.py)
|
||||
- [GRU based on pytorch](qlib/contrib/model/pytorch_gru.py)
|
||||
- [LSTM based on pytorcn](qlib/contrib/model/pytorch_lstm.py)
|
||||
- [ALSTM based on pytorcn](qlib/contrib/model/pytorch_alstm.py)
|
||||
- [GATs based on pytorch](qlib/contrib/model/pytorch_gats.py)
|
||||
- [TabNet based on pytorch](qlib/contrib/model/tabnet.py)
|
||||
- [SFM based on pytorch](qlib/contrib/model/pytorch_sfm.py)
|
||||
<!-- - [TFT based on tensorflow](examples/benchmarks/TFT/tft.py) -->
|
||||
- [HATs based on pytorch](qlib/contrib/model/pytorch_hats.py)
|
||||
- [TFT based on tensorflow](examples/benchmarks/TFT/tft.py)
|
||||
|
||||
Your PR of new Quant models is highly welcomed.
|
||||
|
||||
|
||||
4
examples/benchmarks/ALSTM/requirements.txt
Normal file
4
examples/benchmarks/ALSTM/requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
numpy==1.17.4
|
||||
pandas==1.1.2
|
||||
scikit_learn==0.23.2
|
||||
torch==1.7.0
|
||||
69
examples/benchmarks/ALSTM/workflow_config_alstm.yaml
Normal file
69
examples/benchmarks/ALSTM/workflow_config_alstm.yaml
Normal file
@@ -0,0 +1,69 @@
|
||||
provider_uri: "~/.qlib/qlib_data/cn_data"
|
||||
region: cn
|
||||
market: &market csi300
|
||||
benchmark: &benchmark SH000300
|
||||
data_handler_config: &data_handler_config
|
||||
start_time: 2008-01-01
|
||||
end_time: 2020-08-01
|
||||
fit_start_time: 2008-01-01
|
||||
fit_end_time: 2014-12-31
|
||||
instruments: *market
|
||||
port_analysis_config: &port_analysis_config
|
||||
strategy:
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy.strategy
|
||||
kwargs:
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
verbose: False
|
||||
limit_threshold: 0.095
|
||||
account: 100000000
|
||||
benchmark: *benchmark
|
||||
deal_price: close
|
||||
open_cost: 0.0005
|
||||
close_cost: 0.0015
|
||||
min_cost: 5
|
||||
task:
|
||||
model:
|
||||
class: ALSTM
|
||||
module_path: qlib.contrib.model.pytorch_alstm
|
||||
kwargs:
|
||||
d_feat: 6
|
||||
hidden_size: 64
|
||||
num_layers: 2
|
||||
dropout: 0.0
|
||||
n_epochs: 200
|
||||
lr: 1e-3
|
||||
early_stop: 20
|
||||
batch_size: 800
|
||||
metric: IC
|
||||
loss: mse
|
||||
seed: 0
|
||||
GPU: 0
|
||||
rnn_type: GRU
|
||||
dataset:
|
||||
class: DatasetH
|
||||
module_path: qlib.data.dataset
|
||||
kwargs:
|
||||
handler:
|
||||
class: ALPHA360_Denoise
|
||||
module_path: qlib.contrib.data.handler
|
||||
kwargs: *data_handler_config
|
||||
segments:
|
||||
train: [2008-01-01, 2014-12-31]
|
||||
valid: [2015-01-01, 2016-12-31]
|
||||
test: [2017-01-01, 2020-08-01]
|
||||
record:
|
||||
- class: SignalRecord
|
||||
module_path: qlib.workflow.record_temp
|
||||
kwargs: {}
|
||||
- class: SigAnaRecord
|
||||
module_path: qlib.workflow.record_temp
|
||||
kwargs:
|
||||
ana_long_short: False
|
||||
ann_scaler: 252
|
||||
- class: PortAnaRecord
|
||||
module_path: qlib.workflow.record_temp
|
||||
kwargs:
|
||||
config: *port_analysis_config
|
||||
@@ -74,7 +74,7 @@ if __name__ == "__main__":
|
||||
"loss": "mse",
|
||||
"seed": 0,
|
||||
"GPU": 0,
|
||||
"rnn_type": "GRU"
|
||||
"rnn_type": "GRU",
|
||||
},
|
||||
},
|
||||
"dataset": {
|
||||
@@ -142,4 +142,4 @@ if __name__ == "__main__":
|
||||
report_normal["return"] - report_normal["bench"] - report_normal["cost"]
|
||||
)
|
||||
analysis_df = pd.concat(analysis) # type: pd.DataFrame
|
||||
print(analysis_df)
|
||||
print(analysis_df)
|
||||
|
||||
@@ -100,7 +100,7 @@ if __name__ == "__main__":
|
||||
# model = train_model(task)
|
||||
model = init_instance_by_config(task["model"])
|
||||
dataset = init_instance_by_config(task["dataset"])
|
||||
model.fit(dataset,save_path='benchmarks/HATS/model_hat.pkl')
|
||||
model.fit(dataset, save_path="benchmarks/HATS/model_hat.pkl")
|
||||
|
||||
pred_score = model.predict(dataset)
|
||||
|
||||
|
||||
@@ -228,7 +228,7 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **k
|
||||
strategy type: TopkAmountStrategy ,TopkWeightStrategy or TopkDropoutStrategy
|
||||
|
||||
- **exchange related arguments**
|
||||
|
||||
|
||||
exchange: Exchange()
|
||||
pass the exchange for speeding up.
|
||||
subscribe_fields: list
|
||||
|
||||
@@ -345,7 +345,6 @@ class GRUModel(nn.Module):
|
||||
return self.fc_out(out[:, -1, :]).squeeze()
|
||||
|
||||
|
||||
|
||||
class ALSTMModel(nn.Module):
|
||||
def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, rnn_type="GRU"):
|
||||
super().__init__()
|
||||
@@ -360,33 +359,36 @@ class ALSTMModel(nn.Module):
|
||||
try:
|
||||
klass = getattr(nn, self.rnn_type.upper())
|
||||
except:
|
||||
raise ValueError('unknown rnn_type `%s`' % self.rnn_type)
|
||||
raise ValueError("unknown rnn_type `%s`" % self.rnn_type)
|
||||
self.net = nn.Sequential()
|
||||
self.net.add_module('fc_in', nn.Linear(in_features=self.input_size, out_features=self.hid_size))
|
||||
self.net.add_module('act', nn.Tanh())
|
||||
self.rnn = klass(input_size=self.hid_size,
|
||||
hidden_size=self.hid_size,
|
||||
num_layers=self.rnn_layer,
|
||||
batch_first=True,
|
||||
dropout=self.dropout)
|
||||
self.fc_out = nn.Linear(in_features=self.hid_size*2, out_features=1)
|
||||
self.net.add_module("fc_in", nn.Linear(in_features=self.input_size, out_features=self.hid_size))
|
||||
self.net.add_module("act", nn.Tanh())
|
||||
self.rnn = klass(
|
||||
input_size=self.hid_size,
|
||||
hidden_size=self.hid_size,
|
||||
num_layers=self.rnn_layer,
|
||||
batch_first=True,
|
||||
dropout=self.dropout,
|
||||
)
|
||||
self.fc_out = nn.Linear(in_features=self.hid_size * 2, out_features=1)
|
||||
# self.fc_out = nn.Linear(in_features=self.hid_size, out_features=1)
|
||||
self.att_net = nn.Sequential()
|
||||
self.att_net.add_module('att_fc_in', nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size/2)))
|
||||
self.att_net.add_module('att_dropout', torch.nn.Dropout(self.dropout))
|
||||
self.att_net.add_module('att_act', nn.Tanh())
|
||||
self.att_net.add_module('att_fc_out', nn.Linear(in_features=int(self.hid_size/2), out_features=1, bias=False))
|
||||
self.att_net.add_module('att_softmax', nn.Softmax(dim=1))
|
||||
self.att_net.add_module("att_fc_in", nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)))
|
||||
self.att_net.add_module("att_dropout", torch.nn.Dropout(self.dropout))
|
||||
self.att_net.add_module("att_act", nn.Tanh())
|
||||
self.att_net.add_module("att_fc_out", nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False))
|
||||
self.att_net.add_module("att_softmax", nn.Softmax(dim=1))
|
||||
|
||||
def forward(self, inputs):
|
||||
# inputs: [batch_size, input_size*input_day]
|
||||
inputs = inputs.view(len(inputs), self.input_size, -1)
|
||||
inputs = inputs.permute(0, 2, 1) # [batch, input_size, seq_len] -> [batch, seq_len, input_size]
|
||||
rnn_out, _ = self.rnn(self.net(inputs)) # [batch, seq_len, num_directions * hidden_size]
|
||||
attention_score = self.att_net(rnn_out) # [batch, seq_len, 1]
|
||||
inputs = inputs.permute(0, 2, 1) # [batch, input_size, seq_len] -> [batch, seq_len, input_size]
|
||||
rnn_out, _ = self.rnn(self.net(inputs)) # [batch, seq_len, num_directions * hidden_size]
|
||||
attention_score = self.att_net(rnn_out) # [batch, seq_len, 1]
|
||||
out_att = torch.mul(rnn_out, attention_score)
|
||||
out_att = torch.sum(out_att, dim=1)
|
||||
out = self.fc_out(torch.cat((rnn_out[:, -1, :], out_att), dim=1)) # [batch, seq_len, num_directions * hidden_size] -> [batch, 1]
|
||||
out = self.fc_out(
|
||||
torch.cat((rnn_out[:, -1, :], out_att), dim=1)
|
||||
) # [batch, seq_len, num_directions * hidden_size] -> [batch, 1]
|
||||
# out = self.fc_out(rnn_out[:, -1, :] + out_att)
|
||||
return out[..., 0]
|
||||
|
||||
|
||||
@@ -265,12 +265,14 @@ class GAT(Model):
|
||||
self.logger.info("Loading pretrained model...")
|
||||
if self.base_model == "LSTM":
|
||||
from ...contrib.model.pytorch_lstm import LSTMModel
|
||||
|
||||
pretrained_model = LSTMModel()
|
||||
pretrained_model.load_state_dict(torch.load('benchmarks/LSTM/model_lstm_csi300.pkl'))
|
||||
pretrained_model.load_state_dict(torch.load("benchmarks/LSTM/model_lstm_csi300.pkl"))
|
||||
elif self.base_model == "GRU":
|
||||
from ...contrib.model.pytorch_gru import GRUModel
|
||||
|
||||
pretrained_model = GRUModel()
|
||||
pretrained_model.load_state_dict(torch.load('benchmarks/GRU/model_gru_csi300.pkl'))
|
||||
pretrained_model.load_state_dict(torch.load("benchmarks/GRU/model_gru_csi300.pkl"))
|
||||
model_dict = self.GAT_model.state_dict()
|
||||
pretrained_dict = {k: v for k, v in pretrained_model.state_dict().items() if k in model_dict}
|
||||
model_dict.update(pretrained_dict)
|
||||
|
||||
@@ -78,7 +78,7 @@ class HATS(Model):
|
||||
self.optimizer = optimizer.lower()
|
||||
self.loss = loss
|
||||
self.base_model = base_model
|
||||
self.with_pretrain = with_pretrain #### True if train HATS with pretrained base model
|
||||
self.with_pretrain = with_pretrain #### True if train HATS with pretrained base model
|
||||
self.visible_GPU = GPU
|
||||
self.use_gpu = torch.cuda.is_available()
|
||||
self.seed = seed
|
||||
@@ -97,7 +97,7 @@ class HATS(Model):
|
||||
"\noptimizer : {}"
|
||||
"\nloss_type : {}"
|
||||
"\nbase_model : {}"
|
||||
"\nwith_pretrain : {}" ##### debug
|
||||
"\nwith_pretrain : {}" ##### debug
|
||||
"\nvisible_GPU : {}"
|
||||
"\nuse_GPU : {}"
|
||||
"\nseed : {}".format(
|
||||
@@ -113,7 +113,7 @@ class HATS(Model):
|
||||
optimizer.lower(),
|
||||
loss,
|
||||
base_model,
|
||||
with_pretrain, ### debug
|
||||
with_pretrain, ### debug
|
||||
GPU,
|
||||
self.use_gpu,
|
||||
seed,
|
||||
@@ -265,12 +265,14 @@ class HATS(Model):
|
||||
self.logger.info("loading pretrained model...")
|
||||
if self.base_model == "LSTM":
|
||||
from ...contrib.model.pytorch_lstm import LSTMModel
|
||||
|
||||
pretrained_model = LSTMModel()
|
||||
pretrained_model.load_state_dict(torch.load('benchmarks/LSTM/model_lstm_csi300.pkl'))
|
||||
pretrained_model.load_state_dict(torch.load("benchmarks/LSTM/model_lstm_csi300.pkl"))
|
||||
elif self.base_model == "GRU":
|
||||
from ...contrib.model.pytorch_gru import GRUModel
|
||||
|
||||
pretrained_model = GRUModel()
|
||||
pretrained_model.load_state_dict(torch.load('benchmarks/GRU/model_gru_csi300.pkl'))
|
||||
pretrained_model.load_state_dict(torch.load("benchmarks/GRU/model_gru_csi300.pkl"))
|
||||
model_dict = self.HATS_model.state_dict()
|
||||
|
||||
# filter unnecessary parameters
|
||||
@@ -281,7 +283,6 @@ class HATS(Model):
|
||||
self.HATS_model.load_state_dict(model_dict)
|
||||
self.logger.info("loading pretrained model Done...")
|
||||
|
||||
|
||||
# train
|
||||
self.logger.info("training...")
|
||||
self._fitted = True
|
||||
@@ -382,22 +383,24 @@ class HATSModel(nn.Module):
|
||||
self.softmax = nn.Softmax(dim=1)
|
||||
self.d_feat = d_feat
|
||||
|
||||
num_head_att = [1]*num_layers
|
||||
hidden_dim = [hidden_size]*num_layers
|
||||
dims = [d_feat] + [d*nh for (d, nh) in zip(hidden_dim, num_head_att[:-1])] + [num_head_att[-1]]
|
||||
num_head_att = [1] * num_layers
|
||||
hidden_dim = [hidden_size] * num_layers
|
||||
dims = [d_feat] + [d * nh for (d, nh) in zip(hidden_dim, num_head_att[:-1])] + [num_head_att[-1]]
|
||||
in_dims = dims[:-1]
|
||||
out_dims = [d // nh for (d, nh) in zip(dims[1:], num_head_att)]
|
||||
self.attn = nn.ModuleList([GraphAttention(i, o, nh, dropout) for (i, o, nh) in zip(in_dims, out_dims,num_head_att)])
|
||||
self.attn = nn.ModuleList(
|
||||
[GraphAttention(i, o, nh, dropout) for (i, o, nh) in zip(in_dims, out_dims, num_head_att)]
|
||||
)
|
||||
self.bns = nn.ModuleList([nn.BatchNorm1d(dim) for dim in dims[1:-1]])
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
self.elu = nn.ELU()
|
||||
|
||||
def forward(self, x):
|
||||
x = x.reshape(len(x), self.d_feat, -1) # [N, F, T]
|
||||
x = x.permute(0, 2, 1) # [N, T, F]
|
||||
out,_ = self.model(x)
|
||||
x = x.reshape(len(x), self.d_feat, -1) # [N, F, T]
|
||||
x = x.permute(0, 2, 1) # [N, T, F]
|
||||
out, _ = self.model(x)
|
||||
hidden = out[:, -1, :]
|
||||
hidden = self.bn1(hidden)
|
||||
hidden = self.bn1(hidden)
|
||||
attention = GraphAttention.cal_attention(hidden, hidden)
|
||||
output = attention.mm(hidden)
|
||||
output = self.fc(output)
|
||||
@@ -406,9 +409,7 @@ class HATSModel(nn.Module):
|
||||
return self.fc_out(output).squeeze()
|
||||
|
||||
|
||||
|
||||
class GraphAttention(nn.Module):
|
||||
|
||||
def __init__(self, input_dim, output_dim, num_heads, dropout=0.5):
|
||||
|
||||
super().__init__()
|
||||
@@ -431,7 +432,7 @@ class GraphAttention(nn.Module):
|
||||
self.num_heads = num_heads
|
||||
|
||||
self.fcs = nn.ModuleList([nn.Linear(input_dim, output_dim) for _ in range(num_heads)])
|
||||
self.a = nn.ModuleList([nn.Linear(2*output_dim, 1) for _ in range(num_heads)])
|
||||
self.a = nn.ModuleList([nn.Linear(2 * output_dim, 1) for _ in range(num_heads)])
|
||||
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
self.softmax = nn.Softmax(dim=0)
|
||||
@@ -465,7 +466,6 @@ class GraphAttention(nn.Module):
|
||||
sum_degs = np.hstack(([0], np.cumsum([len(row) for row in rows])))
|
||||
mapped_nodes = [mapping[v] for v in nodes]
|
||||
indices = torch.LongTensor([[v, c] for (v, row) in zip(mapped_nodes, rows) for c in row]).t()
|
||||
|
||||
|
||||
out = []
|
||||
for k in range(self.num_heads):
|
||||
@@ -477,7 +477,7 @@ class GraphAttention(nn.Module):
|
||||
|
||||
e = self.leakyrelu(self.a[k](cat_h))
|
||||
|
||||
alpha = [self.softmax(e[lo : hi]) for (lo, hi) in zip(sum_degs, sum_degs[1:])]
|
||||
alpha = [self.softmax(e[lo:hi]) for (lo, hi) in zip(sum_degs, sum_degs[1:])]
|
||||
alpha = torch.cat(tuple(alpha), dim=0)
|
||||
alpha = alpha.squeeze(1)
|
||||
alpha = self.dropout(alpha)
|
||||
@@ -487,11 +487,18 @@ class GraphAttention(nn.Module):
|
||||
|
||||
return out
|
||||
|
||||
def cal_attention(x, y):
|
||||
|
||||
att_x = torch.mean(x, dim = 1).reshape(-1, 1)
|
||||
att_y = torch.mean(y, dim = 1).reshape(-1, 1)
|
||||
def cal_attention(x, y):
|
||||
|
||||
att_x = torch.mean(x, dim=1).reshape(-1, 1)
|
||||
att_y = torch.mean(y, dim=1).reshape(-1, 1)
|
||||
att = att_x.mm(torch.t(att_y))
|
||||
x_att = x.reshape(x.shape[0], 1, x.shape[1]).repeat(1, y.shape[0], 1)
|
||||
y_att = y.reshape(1, y.shape[0], y.shape[1]).repeat(x.shape[0], 1, 1)
|
||||
return torch.mean(x.reshape(x.shape[0], 1, x.shape[1]).repeat(1, y.shape[0], 1)*y.reshape(1, y.shape[0], y.shape[1]).repeat(x.shape[0], 1, 1), dim = 2)-att
|
||||
return (
|
||||
torch.mean(
|
||||
x.reshape(x.shape[0], 1, x.shape[1]).repeat(1, y.shape[0], 1)
|
||||
* y.reshape(1, y.shape[0], y.shape[1]).repeat(x.shape[0], 1, 1),
|
||||
dim=2,
|
||||
)
|
||||
- att
|
||||
)
|
||||
|
||||
@@ -18,7 +18,7 @@ class Dataset(Serializable):
|
||||
|
||||
- setup data
|
||||
- The data related attributes' names should start with '_' so that it will not be saved on disk when serializing
|
||||
|
||||
|
||||
- initialize the state of the dataset(info to prepare the data)
|
||||
- The name of essential state for preparing data should not start with '_' so that it could be serialized on disk when serializing.
|
||||
|
||||
@@ -99,7 +99,7 @@ class DatasetH(Dataset):
|
||||
Here are some examples:
|
||||
|
||||
.. code-block::
|
||||
|
||||
|
||||
1) 'segments': {
|
||||
'train': ("2008-01-01", "2014-12-31"),
|
||||
'valid': ("2017-01-01", "2020-08-01",),
|
||||
|
||||
Reference in New Issue
Block a user