From a99db6a1dc6afb3df12a7ece52375e535204ae65 Mon Sep 17 00:00:00 2001 From: Jactus Date: Wed, 25 Nov 2020 19:29:30 +0800 Subject: [PATCH] Add ALSTM config --- README.md | 4 +- examples/benchmarks/ALSTM/requirements.txt | 4 ++ .../ALSTM/workflow_config_alstm.yaml | 69 +++++++++++++++++++ examples/workflow_by_code_alstm.py | 4 +- examples/workflow_by_code_hats.py | 2 +- qlib/contrib/evaluate.py | 2 +- qlib/contrib/model/pytorch_alstm.py | 42 +++++------ qlib/contrib/model/pytorch_gats.py | 6 +- qlib/contrib/model/pytorch_hats.py | 55 ++++++++------- qlib/data/dataset/__init__.py | 4 +- 10 files changed, 139 insertions(+), 53 deletions(-) create mode 100644 examples/benchmarks/ALSTM/requirements.txt create mode 100644 examples/benchmarks/ALSTM/workflow_config_alstm.yaml diff --git a/README.md b/README.md index 4383dea26..cd0c8542f 100644 --- a/README.md +++ b/README.md @@ -196,10 +196,12 @@ Here is a list of models built on `Qlib`. - [MLP based on pytorch](qlib/contrib/model/pytorch_nn.py) - [GRU based on pytorch](qlib/contrib/model/pytorch_gru.py) - [LSTM based on pytorcn](qlib/contrib/model/pytorch_lstm.py) +- [ALSTM based on pytorcn](qlib/contrib/model/pytorch_alstm.py) - [GATs based on pytorch](qlib/contrib/model/pytorch_gats.py) - [TabNet based on pytorch](qlib/contrib/model/tabnet.py) - [SFM based on pytorch](qlib/contrib/model/pytorch_sfm.py) - +- [HATs based on pytorch](qlib/contrib/model/pytorch_hats.py) +- [TFT based on tensorflow](examples/benchmarks/TFT/tft.py) Your PR of new Quant models is highly welcomed. diff --git a/examples/benchmarks/ALSTM/requirements.txt b/examples/benchmarks/ALSTM/requirements.txt new file mode 100644 index 000000000..1fc2779c0 --- /dev/null +++ b/examples/benchmarks/ALSTM/requirements.txt @@ -0,0 +1,4 @@ +numpy==1.17.4 +pandas==1.1.2 +scikit_learn==0.23.2 +torch==1.7.0 diff --git a/examples/benchmarks/ALSTM/workflow_config_alstm.yaml b/examples/benchmarks/ALSTM/workflow_config_alstm.yaml new file mode 100644 index 000000000..bb35b6da5 --- /dev/null +++ b/examples/benchmarks/ALSTM/workflow_config_alstm.yaml @@ -0,0 +1,69 @@ +provider_uri: "~/.qlib/qlib_data/cn_data" +region: cn +market: &market csi300 +benchmark: &benchmark SH000300 +data_handler_config: &data_handler_config + start_time: 2008-01-01 + end_time: 2020-08-01 + fit_start_time: 2008-01-01 + fit_end_time: 2014-12-31 + instruments: *market +port_analysis_config: &port_analysis_config + strategy: + class: TopkDropoutStrategy + module_path: qlib.contrib.strategy.strategy + kwargs: + topk: 50 + n_drop: 5 + backtest: + verbose: False + limit_threshold: 0.095 + account: 100000000 + benchmark: *benchmark + deal_price: close + open_cost: 0.0005 + close_cost: 0.0015 + min_cost: 5 +task: + model: + class: ALSTM + module_path: qlib.contrib.model.pytorch_alstm + kwargs: + d_feat: 6 + hidden_size: 64 + num_layers: 2 + dropout: 0.0 + n_epochs: 200 + lr: 1e-3 + early_stop: 20 + batch_size: 800 + metric: IC + loss: mse + seed: 0 + GPU: 0 + rnn_type: GRU + dataset: + class: DatasetH + module_path: qlib.data.dataset + kwargs: + handler: + class: ALPHA360_Denoise + module_path: qlib.contrib.data.handler + kwargs: *data_handler_config + segments: + train: [2008-01-01, 2014-12-31] + valid: [2015-01-01, 2016-12-31] + test: [2017-01-01, 2020-08-01] + record: + - class: SignalRecord + module_path: qlib.workflow.record_temp + kwargs: {} + - class: SigAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + ana_long_short: False + ann_scaler: 252 + - class: PortAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + config: *port_analysis_config \ No newline at end of file diff --git a/examples/workflow_by_code_alstm.py b/examples/workflow_by_code_alstm.py index 3137b6605..eabce3b07 100644 --- a/examples/workflow_by_code_alstm.py +++ b/examples/workflow_by_code_alstm.py @@ -74,7 +74,7 @@ if __name__ == "__main__": "loss": "mse", "seed": 0, "GPU": 0, - "rnn_type": "GRU" + "rnn_type": "GRU", }, }, "dataset": { @@ -142,4 +142,4 @@ if __name__ == "__main__": report_normal["return"] - report_normal["bench"] - report_normal["cost"] ) analysis_df = pd.concat(analysis) # type: pd.DataFrame - print(analysis_df) \ No newline at end of file + print(analysis_df) diff --git a/examples/workflow_by_code_hats.py b/examples/workflow_by_code_hats.py index 0cba29b63..3ea81ba49 100644 --- a/examples/workflow_by_code_hats.py +++ b/examples/workflow_by_code_hats.py @@ -100,7 +100,7 @@ if __name__ == "__main__": # model = train_model(task) model = init_instance_by_config(task["model"]) dataset = init_instance_by_config(task["dataset"]) - model.fit(dataset,save_path='benchmarks/HATS/model_hat.pkl') + model.fit(dataset, save_path="benchmarks/HATS/model_hat.pkl") pred_score = model.predict(dataset) diff --git a/qlib/contrib/evaluate.py b/qlib/contrib/evaluate.py index cf1793c93..2b85f1a9b 100644 --- a/qlib/contrib/evaluate.py +++ b/qlib/contrib/evaluate.py @@ -228,7 +228,7 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **k strategy type: TopkAmountStrategy ,TopkWeightStrategy or TopkDropoutStrategy - **exchange related arguments** - + exchange: Exchange() pass the exchange for speeding up. subscribe_fields: list diff --git a/qlib/contrib/model/pytorch_alstm.py b/qlib/contrib/model/pytorch_alstm.py index b302925ec..bdf1e3ea0 100644 --- a/qlib/contrib/model/pytorch_alstm.py +++ b/qlib/contrib/model/pytorch_alstm.py @@ -345,7 +345,6 @@ class GRUModel(nn.Module): return self.fc_out(out[:, -1, :]).squeeze() - class ALSTMModel(nn.Module): def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, rnn_type="GRU"): super().__init__() @@ -360,33 +359,36 @@ class ALSTMModel(nn.Module): try: klass = getattr(nn, self.rnn_type.upper()) except: - raise ValueError('unknown rnn_type `%s`' % self.rnn_type) + raise ValueError("unknown rnn_type `%s`" % self.rnn_type) self.net = nn.Sequential() - self.net.add_module('fc_in', nn.Linear(in_features=self.input_size, out_features=self.hid_size)) - self.net.add_module('act', nn.Tanh()) - self.rnn = klass(input_size=self.hid_size, - hidden_size=self.hid_size, - num_layers=self.rnn_layer, - batch_first=True, - dropout=self.dropout) - self.fc_out = nn.Linear(in_features=self.hid_size*2, out_features=1) + self.net.add_module("fc_in", nn.Linear(in_features=self.input_size, out_features=self.hid_size)) + self.net.add_module("act", nn.Tanh()) + self.rnn = klass( + input_size=self.hid_size, + hidden_size=self.hid_size, + num_layers=self.rnn_layer, + batch_first=True, + dropout=self.dropout, + ) + self.fc_out = nn.Linear(in_features=self.hid_size * 2, out_features=1) # self.fc_out = nn.Linear(in_features=self.hid_size, out_features=1) self.att_net = nn.Sequential() - self.att_net.add_module('att_fc_in', nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size/2))) - self.att_net.add_module('att_dropout', torch.nn.Dropout(self.dropout)) - self.att_net.add_module('att_act', nn.Tanh()) - self.att_net.add_module('att_fc_out', nn.Linear(in_features=int(self.hid_size/2), out_features=1, bias=False)) - self.att_net.add_module('att_softmax', nn.Softmax(dim=1)) + self.att_net.add_module("att_fc_in", nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2))) + self.att_net.add_module("att_dropout", torch.nn.Dropout(self.dropout)) + self.att_net.add_module("att_act", nn.Tanh()) + self.att_net.add_module("att_fc_out", nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False)) + self.att_net.add_module("att_softmax", nn.Softmax(dim=1)) def forward(self, inputs): # inputs: [batch_size, input_size*input_day] inputs = inputs.view(len(inputs), self.input_size, -1) - inputs = inputs.permute(0, 2, 1) # [batch, input_size, seq_len] -> [batch, seq_len, input_size] - rnn_out, _ = self.rnn(self.net(inputs)) # [batch, seq_len, num_directions * hidden_size] - attention_score = self.att_net(rnn_out) # [batch, seq_len, 1] + inputs = inputs.permute(0, 2, 1) # [batch, input_size, seq_len] -> [batch, seq_len, input_size] + rnn_out, _ = self.rnn(self.net(inputs)) # [batch, seq_len, num_directions * hidden_size] + attention_score = self.att_net(rnn_out) # [batch, seq_len, 1] out_att = torch.mul(rnn_out, attention_score) out_att = torch.sum(out_att, dim=1) - out = self.fc_out(torch.cat((rnn_out[:, -1, :], out_att), dim=1)) # [batch, seq_len, num_directions * hidden_size] -> [batch, 1] + out = self.fc_out( + torch.cat((rnn_out[:, -1, :], out_att), dim=1) + ) # [batch, seq_len, num_directions * hidden_size] -> [batch, 1] # out = self.fc_out(rnn_out[:, -1, :] + out_att) return out[..., 0] - diff --git a/qlib/contrib/model/pytorch_gats.py b/qlib/contrib/model/pytorch_gats.py index 77e3b9de9..07af4eda4 100755 --- a/qlib/contrib/model/pytorch_gats.py +++ b/qlib/contrib/model/pytorch_gats.py @@ -265,12 +265,14 @@ class GAT(Model): self.logger.info("Loading pretrained model...") if self.base_model == "LSTM": from ...contrib.model.pytorch_lstm import LSTMModel + pretrained_model = LSTMModel() - pretrained_model.load_state_dict(torch.load('benchmarks/LSTM/model_lstm_csi300.pkl')) + pretrained_model.load_state_dict(torch.load("benchmarks/LSTM/model_lstm_csi300.pkl")) elif self.base_model == "GRU": from ...contrib.model.pytorch_gru import GRUModel + pretrained_model = GRUModel() - pretrained_model.load_state_dict(torch.load('benchmarks/GRU/model_gru_csi300.pkl')) + pretrained_model.load_state_dict(torch.load("benchmarks/GRU/model_gru_csi300.pkl")) model_dict = self.GAT_model.state_dict() pretrained_dict = {k: v for k, v in pretrained_model.state_dict().items() if k in model_dict} model_dict.update(pretrained_dict) diff --git a/qlib/contrib/model/pytorch_hats.py b/qlib/contrib/model/pytorch_hats.py index 6a09e685b..7b4307e25 100644 --- a/qlib/contrib/model/pytorch_hats.py +++ b/qlib/contrib/model/pytorch_hats.py @@ -78,7 +78,7 @@ class HATS(Model): self.optimizer = optimizer.lower() self.loss = loss self.base_model = base_model - self.with_pretrain = with_pretrain #### True if train HATS with pretrained base model + self.with_pretrain = with_pretrain #### True if train HATS with pretrained base model self.visible_GPU = GPU self.use_gpu = torch.cuda.is_available() self.seed = seed @@ -97,7 +97,7 @@ class HATS(Model): "\noptimizer : {}" "\nloss_type : {}" "\nbase_model : {}" - "\nwith_pretrain : {}" ##### debug + "\nwith_pretrain : {}" ##### debug "\nvisible_GPU : {}" "\nuse_GPU : {}" "\nseed : {}".format( @@ -113,7 +113,7 @@ class HATS(Model): optimizer.lower(), loss, base_model, - with_pretrain, ### debug + with_pretrain, ### debug GPU, self.use_gpu, seed, @@ -265,12 +265,14 @@ class HATS(Model): self.logger.info("loading pretrained model...") if self.base_model == "LSTM": from ...contrib.model.pytorch_lstm import LSTMModel + pretrained_model = LSTMModel() - pretrained_model.load_state_dict(torch.load('benchmarks/LSTM/model_lstm_csi300.pkl')) + pretrained_model.load_state_dict(torch.load("benchmarks/LSTM/model_lstm_csi300.pkl")) elif self.base_model == "GRU": from ...contrib.model.pytorch_gru import GRUModel + pretrained_model = GRUModel() - pretrained_model.load_state_dict(torch.load('benchmarks/GRU/model_gru_csi300.pkl')) + pretrained_model.load_state_dict(torch.load("benchmarks/GRU/model_gru_csi300.pkl")) model_dict = self.HATS_model.state_dict() # filter unnecessary parameters @@ -281,7 +283,6 @@ class HATS(Model): self.HATS_model.load_state_dict(model_dict) self.logger.info("loading pretrained model Done...") - # train self.logger.info("training...") self._fitted = True @@ -382,22 +383,24 @@ class HATSModel(nn.Module): self.softmax = nn.Softmax(dim=1) self.d_feat = d_feat - num_head_att = [1]*num_layers - hidden_dim = [hidden_size]*num_layers - dims = [d_feat] + [d*nh for (d, nh) in zip(hidden_dim, num_head_att[:-1])] + [num_head_att[-1]] + num_head_att = [1] * num_layers + hidden_dim = [hidden_size] * num_layers + dims = [d_feat] + [d * nh for (d, nh) in zip(hidden_dim, num_head_att[:-1])] + [num_head_att[-1]] in_dims = dims[:-1] out_dims = [d // nh for (d, nh) in zip(dims[1:], num_head_att)] - self.attn = nn.ModuleList([GraphAttention(i, o, nh, dropout) for (i, o, nh) in zip(in_dims, out_dims,num_head_att)]) + self.attn = nn.ModuleList( + [GraphAttention(i, o, nh, dropout) for (i, o, nh) in zip(in_dims, out_dims, num_head_att)] + ) self.bns = nn.ModuleList([nn.BatchNorm1d(dim) for dim in dims[1:-1]]) self.dropout = nn.Dropout(dropout) self.elu = nn.ELU() def forward(self, x): - x = x.reshape(len(x), self.d_feat, -1) # [N, F, T] - x = x.permute(0, 2, 1) # [N, T, F] - out,_ = self.model(x) + x = x.reshape(len(x), self.d_feat, -1) # [N, F, T] + x = x.permute(0, 2, 1) # [N, T, F] + out, _ = self.model(x) hidden = out[:, -1, :] - hidden = self.bn1(hidden) + hidden = self.bn1(hidden) attention = GraphAttention.cal_attention(hidden, hidden) output = attention.mm(hidden) output = self.fc(output) @@ -406,9 +409,7 @@ class HATSModel(nn.Module): return self.fc_out(output).squeeze() - class GraphAttention(nn.Module): - def __init__(self, input_dim, output_dim, num_heads, dropout=0.5): super().__init__() @@ -431,7 +432,7 @@ class GraphAttention(nn.Module): self.num_heads = num_heads self.fcs = nn.ModuleList([nn.Linear(input_dim, output_dim) for _ in range(num_heads)]) - self.a = nn.ModuleList([nn.Linear(2*output_dim, 1) for _ in range(num_heads)]) + self.a = nn.ModuleList([nn.Linear(2 * output_dim, 1) for _ in range(num_heads)]) self.dropout = nn.Dropout(dropout) self.softmax = nn.Softmax(dim=0) @@ -465,7 +466,6 @@ class GraphAttention(nn.Module): sum_degs = np.hstack(([0], np.cumsum([len(row) for row in rows]))) mapped_nodes = [mapping[v] for v in nodes] indices = torch.LongTensor([[v, c] for (v, row) in zip(mapped_nodes, rows) for c in row]).t() - out = [] for k in range(self.num_heads): @@ -477,7 +477,7 @@ class GraphAttention(nn.Module): e = self.leakyrelu(self.a[k](cat_h)) - alpha = [self.softmax(e[lo : hi]) for (lo, hi) in zip(sum_degs, sum_degs[1:])] + alpha = [self.softmax(e[lo:hi]) for (lo, hi) in zip(sum_degs, sum_degs[1:])] alpha = torch.cat(tuple(alpha), dim=0) alpha = alpha.squeeze(1) alpha = self.dropout(alpha) @@ -487,11 +487,18 @@ class GraphAttention(nn.Module): return out - def cal_attention(x, y): - - att_x = torch.mean(x, dim = 1).reshape(-1, 1) - att_y = torch.mean(y, dim = 1).reshape(-1, 1) + def cal_attention(x, y): + + att_x = torch.mean(x, dim=1).reshape(-1, 1) + att_y = torch.mean(y, dim=1).reshape(-1, 1) att = att_x.mm(torch.t(att_y)) x_att = x.reshape(x.shape[0], 1, x.shape[1]).repeat(1, y.shape[0], 1) y_att = y.reshape(1, y.shape[0], y.shape[1]).repeat(x.shape[0], 1, 1) - return torch.mean(x.reshape(x.shape[0], 1, x.shape[1]).repeat(1, y.shape[0], 1)*y.reshape(1, y.shape[0], y.shape[1]).repeat(x.shape[0], 1, 1), dim = 2)-att \ No newline at end of file + return ( + torch.mean( + x.reshape(x.shape[0], 1, x.shape[1]).repeat(1, y.shape[0], 1) + * y.reshape(1, y.shape[0], y.shape[1]).repeat(x.shape[0], 1, 1), + dim=2, + ) + - att + ) diff --git a/qlib/data/dataset/__init__.py b/qlib/data/dataset/__init__.py index 3dbc17c23..e972aba3c 100644 --- a/qlib/data/dataset/__init__.py +++ b/qlib/data/dataset/__init__.py @@ -18,7 +18,7 @@ class Dataset(Serializable): - setup data - The data related attributes' names should start with '_' so that it will not be saved on disk when serializing - + - initialize the state of the dataset(info to prepare the data) - The name of essential state for preparing data should not start with '_' so that it could be serialized on disk when serializing. @@ -99,7 +99,7 @@ class DatasetH(Dataset): Here are some examples: .. code-block:: - + 1) 'segments': { 'train': ("2008-01-01", "2014-12-31"), 'valid': ("2017-01-01", "2020-08-01",),