From a99db6a1dc6afb3df12a7ece52375e535204ae65 Mon Sep 17 00:00:00 2001
From: Jactus <dw1920@nyu.edu>
Date: Wed, 25 Nov 2020 19:29:30 +0800
Subject: [PATCH] Add ALSTM config

---
 README.md                                     |  4 +-
 examples/benchmarks/ALSTM/requirements.txt    |  4 ++
 .../ALSTM/workflow_config_alstm.yaml          | 69 +++++++++++++++++++
 examples/workflow_by_code_alstm.py            |  4 +-
 examples/workflow_by_code_hats.py             |  2 +-
 qlib/contrib/evaluate.py                      |  2 +-
 qlib/contrib/model/pytorch_alstm.py           | 42 +++++------
 qlib/contrib/model/pytorch_gats.py            |  6 +-
 qlib/contrib/model/pytorch_hats.py            | 55 ++++++++-------
 qlib/data/dataset/__init__.py                 |  4 +-
 10 files changed, 139 insertions(+), 53 deletions(-)
 create mode 100644 examples/benchmarks/ALSTM/requirements.txt
 create mode 100644 examples/benchmarks/ALSTM/workflow_config_alstm.yaml

diff --git a/README.md b/README.md
index 4383dea26..cd0c8542f 100644
--- a/README.md
+++ b/README.md
@@ -196,10 +196,12 @@ Here is a list of models built on `Qlib`.
 - [MLP based on pytorch](qlib/contrib/model/pytorch_nn.py)
 - [GRU based on pytorch](qlib/contrib/model/pytorch_gru.py)
 - [LSTM based on pytorcn](qlib/contrib/model/pytorch_lstm.py)
+- [ALSTM based on pytorcn](qlib/contrib/model/pytorch_alstm.py)
 - [GATs based on pytorch](qlib/contrib/model/pytorch_gats.py)
 - [TabNet based on pytorch](qlib/contrib/model/tabnet.py)
 - [SFM based on pytorch](qlib/contrib/model/pytorch_sfm.py)
-<!-- - [TFT based on tensorflow](examples/benchmarks/TFT/tft.py) -->
+- [HATs based on pytorch](qlib/contrib/model/pytorch_hats.py)
+- [TFT based on tensorflow](examples/benchmarks/TFT/tft.py)
 
 Your PR of new Quant models is highly welcomed.
 
diff --git a/examples/benchmarks/ALSTM/requirements.txt b/examples/benchmarks/ALSTM/requirements.txt
new file mode 100644
index 000000000..1fc2779c0
--- /dev/null
+++ b/examples/benchmarks/ALSTM/requirements.txt
@@ -0,0 +1,4 @@
+numpy==1.17.4
+pandas==1.1.2
+scikit_learn==0.23.2
+torch==1.7.0
diff --git a/examples/benchmarks/ALSTM/workflow_config_alstm.yaml b/examples/benchmarks/ALSTM/workflow_config_alstm.yaml
new file mode 100644
index 000000000..bb35b6da5
--- /dev/null
+++ b/examples/benchmarks/ALSTM/workflow_config_alstm.yaml
@@ -0,0 +1,69 @@
+provider_uri: "~/.qlib/qlib_data/cn_data"
+region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: ALSTM
+        module_path: qlib.contrib.model.pytorch_alstm
+        kwargs:
+            d_feat: 6
+            hidden_size: 64
+            num_layers: 2
+            dropout: 0.0
+            n_epochs: 200
+            lr: 1e-3
+            early_stop: 20
+            batch_size: 800
+            metric: IC
+            loss: mse
+            seed: 0
+            GPU: 0
+            rnn_type: GRU
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: ALPHA360_Denoise
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
\ No newline at end of file
diff --git a/examples/workflow_by_code_alstm.py b/examples/workflow_by_code_alstm.py
index 3137b6605..eabce3b07 100644
--- a/examples/workflow_by_code_alstm.py
+++ b/examples/workflow_by_code_alstm.py
@@ -74,7 +74,7 @@ if __name__ == "__main__":
                 "loss": "mse",
                 "seed": 0,
                 "GPU": 0,
-                "rnn_type": "GRU"
+                "rnn_type": "GRU",
             },
         },
         "dataset": {
@@ -142,4 +142,4 @@ if __name__ == "__main__":
         report_normal["return"] - report_normal["bench"] - report_normal["cost"]
     )
     analysis_df = pd.concat(analysis)  # type: pd.DataFrame
-    print(analysis_df)
\ No newline at end of file
+    print(analysis_df)
diff --git a/examples/workflow_by_code_hats.py b/examples/workflow_by_code_hats.py
index 0cba29b63..3ea81ba49 100644
--- a/examples/workflow_by_code_hats.py
+++ b/examples/workflow_by_code_hats.py
@@ -100,7 +100,7 @@ if __name__ == "__main__":
     # model = train_model(task)
     model = init_instance_by_config(task["model"])
     dataset = init_instance_by_config(task["dataset"])
-    model.fit(dataset,save_path='benchmarks/HATS/model_hat.pkl')
+    model.fit(dataset, save_path="benchmarks/HATS/model_hat.pkl")
 
     pred_score = model.predict(dataset)
 
diff --git a/qlib/contrib/evaluate.py b/qlib/contrib/evaluate.py
index cf1793c93..2b85f1a9b 100644
--- a/qlib/contrib/evaluate.py
+++ b/qlib/contrib/evaluate.py
@@ -228,7 +228,7 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **k
         strategy type: TopkAmountStrategy ,TopkWeightStrategy or TopkDropoutStrategy
 
     - **exchange related arguments**
-    
+
     exchange: Exchange()
         pass the exchange for speeding up.
     subscribe_fields: list
diff --git a/qlib/contrib/model/pytorch_alstm.py b/qlib/contrib/model/pytorch_alstm.py
index b302925ec..bdf1e3ea0 100644
--- a/qlib/contrib/model/pytorch_alstm.py
+++ b/qlib/contrib/model/pytorch_alstm.py
@@ -345,7 +345,6 @@ class GRUModel(nn.Module):
         return self.fc_out(out[:, -1, :]).squeeze()
 
 
-
 class ALSTMModel(nn.Module):
     def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, rnn_type="GRU"):
         super().__init__()
@@ -360,33 +359,36 @@ class ALSTMModel(nn.Module):
         try:
             klass = getattr(nn, self.rnn_type.upper())
         except:
-            raise ValueError('unknown rnn_type `%s`' % self.rnn_type)
+            raise ValueError("unknown rnn_type `%s`" % self.rnn_type)
         self.net = nn.Sequential()
-        self.net.add_module('fc_in', nn.Linear(in_features=self.input_size, out_features=self.hid_size))
-        self.net.add_module('act', nn.Tanh())
-        self.rnn = klass(input_size=self.hid_size,
-                         hidden_size=self.hid_size,
-                         num_layers=self.rnn_layer,
-                         batch_first=True,
-                         dropout=self.dropout)
-        self.fc_out = nn.Linear(in_features=self.hid_size*2, out_features=1)
+        self.net.add_module("fc_in", nn.Linear(in_features=self.input_size, out_features=self.hid_size))
+        self.net.add_module("act", nn.Tanh())
+        self.rnn = klass(
+            input_size=self.hid_size,
+            hidden_size=self.hid_size,
+            num_layers=self.rnn_layer,
+            batch_first=True,
+            dropout=self.dropout,
+        )
+        self.fc_out = nn.Linear(in_features=self.hid_size * 2, out_features=1)
         # self.fc_out = nn.Linear(in_features=self.hid_size, out_features=1)
         self.att_net = nn.Sequential()
-        self.att_net.add_module('att_fc_in', nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size/2)))
-        self.att_net.add_module('att_dropout', torch.nn.Dropout(self.dropout))
-        self.att_net.add_module('att_act', nn.Tanh())
-        self.att_net.add_module('att_fc_out', nn.Linear(in_features=int(self.hid_size/2), out_features=1, bias=False))
-        self.att_net.add_module('att_softmax', nn.Softmax(dim=1))
+        self.att_net.add_module("att_fc_in", nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)))
+        self.att_net.add_module("att_dropout", torch.nn.Dropout(self.dropout))
+        self.att_net.add_module("att_act", nn.Tanh())
+        self.att_net.add_module("att_fc_out", nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False))
+        self.att_net.add_module("att_softmax", nn.Softmax(dim=1))
 
     def forward(self, inputs):
         # inputs: [batch_size, input_size*input_day]
         inputs = inputs.view(len(inputs), self.input_size, -1)
-        inputs = inputs.permute(0, 2, 1)        # [batch, input_size, seq_len] -> [batch, seq_len, input_size]
-        rnn_out, _ = self.rnn(self.net(inputs)) # [batch, seq_len, num_directions * hidden_size]
-        attention_score = self.att_net(rnn_out) # [batch, seq_len, 1]
+        inputs = inputs.permute(0, 2, 1)  # [batch, input_size, seq_len] -> [batch, seq_len, input_size]
+        rnn_out, _ = self.rnn(self.net(inputs))  # [batch, seq_len, num_directions * hidden_size]
+        attention_score = self.att_net(rnn_out)  # [batch, seq_len, 1]
         out_att = torch.mul(rnn_out, attention_score)
         out_att = torch.sum(out_att, dim=1)
-        out = self.fc_out(torch.cat((rnn_out[:, -1, :], out_att), dim=1))  # [batch, seq_len, num_directions * hidden_size] -> [batch, 1]
+        out = self.fc_out(
+            torch.cat((rnn_out[:, -1, :], out_att), dim=1)
+        )  # [batch, seq_len, num_directions * hidden_size] -> [batch, 1]
         # out = self.fc_out(rnn_out[:, -1, :] + out_att)
         return out[..., 0]
-
diff --git a/qlib/contrib/model/pytorch_gats.py b/qlib/contrib/model/pytorch_gats.py
index 77e3b9de9..07af4eda4 100755
--- a/qlib/contrib/model/pytorch_gats.py
+++ b/qlib/contrib/model/pytorch_gats.py
@@ -265,12 +265,14 @@ class GAT(Model):
             self.logger.info("Loading pretrained model...")
             if self.base_model == "LSTM":
                 from ...contrib.model.pytorch_lstm import LSTMModel
+
                 pretrained_model = LSTMModel()
-                pretrained_model.load_state_dict(torch.load('benchmarks/LSTM/model_lstm_csi300.pkl'))
+                pretrained_model.load_state_dict(torch.load("benchmarks/LSTM/model_lstm_csi300.pkl"))
             elif self.base_model == "GRU":
                 from ...contrib.model.pytorch_gru import GRUModel
+
                 pretrained_model = GRUModel()
-                pretrained_model.load_state_dict(torch.load('benchmarks/GRU/model_gru_csi300.pkl'))
+                pretrained_model.load_state_dict(torch.load("benchmarks/GRU/model_gru_csi300.pkl"))
             model_dict = self.GAT_model.state_dict()
             pretrained_dict = {k: v for k, v in pretrained_model.state_dict().items() if k in model_dict}
             model_dict.update(pretrained_dict)
diff --git a/qlib/contrib/model/pytorch_hats.py b/qlib/contrib/model/pytorch_hats.py
index 6a09e685b..7b4307e25 100644
--- a/qlib/contrib/model/pytorch_hats.py
+++ b/qlib/contrib/model/pytorch_hats.py
@@ -78,7 +78,7 @@ class HATS(Model):
         self.optimizer = optimizer.lower()
         self.loss = loss
         self.base_model = base_model
-        self.with_pretrain = with_pretrain #### True if train HATS with pretrained base model
+        self.with_pretrain = with_pretrain  #### True if train HATS with pretrained base model
         self.visible_GPU = GPU
         self.use_gpu = torch.cuda.is_available()
         self.seed = seed
@@ -97,7 +97,7 @@ class HATS(Model):
             "\noptimizer : {}"
             "\nloss_type : {}"
             "\nbase_model : {}"
-            "\nwith_pretrain : {}" ##### debug
+            "\nwith_pretrain : {}"  ##### debug
             "\nvisible_GPU : {}"
             "\nuse_GPU : {}"
             "\nseed : {}".format(
@@ -113,7 +113,7 @@ class HATS(Model):
                 optimizer.lower(),
                 loss,
                 base_model,
-                with_pretrain, ### debug
+                with_pretrain,  ### debug
                 GPU,
                 self.use_gpu,
                 seed,
@@ -265,12 +265,14 @@ class HATS(Model):
             self.logger.info("loading pretrained model...")
             if self.base_model == "LSTM":
                 from ...contrib.model.pytorch_lstm import LSTMModel
+
                 pretrained_model = LSTMModel()
-                pretrained_model.load_state_dict(torch.load('benchmarks/LSTM/model_lstm_csi300.pkl'))
+                pretrained_model.load_state_dict(torch.load("benchmarks/LSTM/model_lstm_csi300.pkl"))
             elif self.base_model == "GRU":
                 from ...contrib.model.pytorch_gru import GRUModel
+
                 pretrained_model = GRUModel()
-                pretrained_model.load_state_dict(torch.load('benchmarks/GRU/model_gru_csi300.pkl'))
+                pretrained_model.load_state_dict(torch.load("benchmarks/GRU/model_gru_csi300.pkl"))
             model_dict = self.HATS_model.state_dict()
 
             # filter unnecessary parameters
@@ -281,7 +283,6 @@ class HATS(Model):
             self.HATS_model.load_state_dict(model_dict)
             self.logger.info("loading pretrained model Done...")
 
-
         # train
         self.logger.info("training...")
         self._fitted = True
@@ -382,22 +383,24 @@ class HATSModel(nn.Module):
         self.softmax = nn.Softmax(dim=1)
         self.d_feat = d_feat
 
-        num_head_att = [1]*num_layers
-        hidden_dim = [hidden_size]*num_layers
-        dims = [d_feat] + [d*nh for (d, nh) in zip(hidden_dim, num_head_att[:-1])] + [num_head_att[-1]]
+        num_head_att = [1] * num_layers
+        hidden_dim = [hidden_size] * num_layers
+        dims = [d_feat] + [d * nh for (d, nh) in zip(hidden_dim, num_head_att[:-1])] + [num_head_att[-1]]
         in_dims = dims[:-1]
         out_dims = [d // nh for (d, nh) in zip(dims[1:], num_head_att)]
-        self.attn = nn.ModuleList([GraphAttention(i, o, nh, dropout) for (i, o, nh) in zip(in_dims, out_dims,num_head_att)])
+        self.attn = nn.ModuleList(
+            [GraphAttention(i, o, nh, dropout) for (i, o, nh) in zip(in_dims, out_dims, num_head_att)]
+        )
         self.bns = nn.ModuleList([nn.BatchNorm1d(dim) for dim in dims[1:-1]])
         self.dropout = nn.Dropout(dropout)
         self.elu = nn.ELU()
 
     def forward(self, x):
-        x = x.reshape(len(x), self.d_feat, -1) # [N, F, T]
-        x = x.permute(0, 2, 1) # [N, T, F]
-        out,_ = self.model(x)
+        x = x.reshape(len(x), self.d_feat, -1)  # [N, F, T]
+        x = x.permute(0, 2, 1)  # [N, T, F]
+        out, _ = self.model(x)
         hidden = out[:, -1, :]
-        hidden = self.bn1(hidden) 
+        hidden = self.bn1(hidden)
         attention = GraphAttention.cal_attention(hidden, hidden)
         output = attention.mm(hidden)
         output = self.fc(output)
@@ -406,9 +409,7 @@ class HATSModel(nn.Module):
         return self.fc_out(output).squeeze()
 
 
-
 class GraphAttention(nn.Module):
-
     def __init__(self, input_dim, output_dim, num_heads, dropout=0.5):
 
         super().__init__()
@@ -431,7 +432,7 @@ class GraphAttention(nn.Module):
         self.num_heads = num_heads
 
         self.fcs = nn.ModuleList([nn.Linear(input_dim, output_dim) for _ in range(num_heads)])
-        self.a = nn.ModuleList([nn.Linear(2*output_dim, 1) for _ in range(num_heads)])
+        self.a = nn.ModuleList([nn.Linear(2 * output_dim, 1) for _ in range(num_heads)])
 
         self.dropout = nn.Dropout(dropout)
         self.softmax = nn.Softmax(dim=0)
@@ -465,7 +466,6 @@ class GraphAttention(nn.Module):
         sum_degs = np.hstack(([0], np.cumsum([len(row) for row in rows])))
         mapped_nodes = [mapping[v] for v in nodes]
         indices = torch.LongTensor([[v, c] for (v, row) in zip(mapped_nodes, rows) for c in row]).t()
-        
 
         out = []
         for k in range(self.num_heads):
@@ -477,7 +477,7 @@ class GraphAttention(nn.Module):
 
             e = self.leakyrelu(self.a[k](cat_h))
 
-            alpha = [self.softmax(e[lo : hi]) for (lo, hi) in zip(sum_degs, sum_degs[1:])]
+            alpha = [self.softmax(e[lo:hi]) for (lo, hi) in zip(sum_degs, sum_degs[1:])]
             alpha = torch.cat(tuple(alpha), dim=0)
             alpha = alpha.squeeze(1)
             alpha = self.dropout(alpha)
@@ -487,11 +487,18 @@ class GraphAttention(nn.Module):
 
         return out
 
-    def cal_attention(x, y): 
-     
-        att_x = torch.mean(x, dim = 1).reshape(-1, 1)
-        att_y = torch.mean(y, dim = 1).reshape(-1, 1)
+    def cal_attention(x, y):
+
+        att_x = torch.mean(x, dim=1).reshape(-1, 1)
+        att_y = torch.mean(y, dim=1).reshape(-1, 1)
         att = att_x.mm(torch.t(att_y))
         x_att = x.reshape(x.shape[0], 1, x.shape[1]).repeat(1, y.shape[0], 1)
         y_att = y.reshape(1, y.shape[0], y.shape[1]).repeat(x.shape[0], 1, 1)
-        return torch.mean(x.reshape(x.shape[0], 1, x.shape[1]).repeat(1, y.shape[0], 1)*y.reshape(1, y.shape[0], y.shape[1]).repeat(x.shape[0], 1, 1), dim = 2)-att
\ No newline at end of file
+        return (
+            torch.mean(
+                x.reshape(x.shape[0], 1, x.shape[1]).repeat(1, y.shape[0], 1)
+                * y.reshape(1, y.shape[0], y.shape[1]).repeat(x.shape[0], 1, 1),
+                dim=2,
+            )
+            - att
+        )
diff --git a/qlib/data/dataset/__init__.py b/qlib/data/dataset/__init__.py
index 3dbc17c23..e972aba3c 100644
--- a/qlib/data/dataset/__init__.py
+++ b/qlib/data/dataset/__init__.py
@@ -18,7 +18,7 @@ class Dataset(Serializable):
 
         - setup data
             - The data related attributes' names should start with '_' so that it will not be saved on disk when serializing
-        
+
         - initialize the state of the dataset(info to prepare the data)
             - The name of essential state for preparing data should not start with '_' so that it could be serialized on disk when serializing.
 
@@ -99,7 +99,7 @@ class DatasetH(Dataset):
             Here are some examples:
 
             .. code-block::
-            
+
                 1) 'segments': {
                         'train': ("2008-01-01", "2014-12-31"),
                         'valid': ("2017-01-01", "2020-08-01",),