Add ALSTM config

2026-07-21 11:17:34 +08:00 · 2020-11-25 19:29:30 +08:00
parent 05599d1de8
commit a99db6a1dc
10 changed files with 139 additions and 53 deletions
--- a/README.md
+++ b/README.md
@@ -196,10 +196,12 @@ Here is a list of models built on `Qlib`.
 - [MLP based on pytorch](qlib/contrib/model/pytorch_nn.py)
 - [GRU based on pytorch](qlib/contrib/model/pytorch_gru.py)
 - [LSTM based on pytorcn](qlib/contrib/model/pytorch_lstm.py)
+- [ALSTM based on pytorcn](qlib/contrib/model/pytorch_alstm.py)
 - [GATs based on pytorch](qlib/contrib/model/pytorch_gats.py)
 - [TabNet based on pytorch](qlib/contrib/model/tabnet.py)
 - [SFM based on pytorch](qlib/contrib/model/pytorch_sfm.py)
-<!-- - [TFT based on tensorflow](examples/benchmarks/TFT/tft.py) -->
+- [HATs based on pytorch](qlib/contrib/model/pytorch_hats.py)
+- [TFT based on tensorflow](examples/benchmarks/TFT/tft.py)

 Your PR of new Quant models is highly welcomed.

--- a/examples/benchmarks/ALSTM/requirements.txt
+++ b/examples/benchmarks/ALSTM/requirements.txt
@@ -0,0 +1,4 @@
+numpy==1.17.4
+pandas==1.1.2
+scikit_learn==0.23.2
+torch==1.7.0
--- a/examples/benchmarks/ALSTM/workflow_config_alstm.yaml
+++ b/examples/benchmarks/ALSTM/workflow_config_alstm.yaml
@@ -0,0 +1,69 @@
+provider_uri: "~/.qlib/qlib_data/cn_data"
+region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: ALSTM
+        module_path: qlib.contrib.model.pytorch_alstm
+        kwargs:
+            d_feat: 6
+            hidden_size: 64
+            num_layers: 2
+            dropout: 0.0
+            n_epochs: 200
+            lr: 1e-3
+            early_stop: 20
+            batch_size: 800
+            metric: IC
+            loss: mse
+            seed: 0
+            GPU: 0
+            rnn_type: GRU
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: ALPHA360_Denoise
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/workflow_by_code_alstm.py
+++ b/examples/workflow_by_code_alstm.py
@@ -74,7 +74,7 @@ if __name__ == "__main__":
                "loss": "mse",
                "seed": 0,
                "GPU": 0,
-                "rnn_type": "GRU"
+                "rnn_type": "GRU",
            },
        },
        "dataset": {
@@ -142,4 +142,4 @@ if __name__ == "__main__":
        report_normal["return"] - report_normal["bench"] - report_normal["cost"]
    )
    analysis_df = pd.concat(analysis)  # type: pd.DataFrame
-    print(analysis_df)
+    print(analysis_df)
--- a/examples/workflow_by_code_hats.py
+++ b/examples/workflow_by_code_hats.py
@@ -100,7 +100,7 @@ if __name__ == "__main__":
    # model = train_model(task)
    model = init_instance_by_config(task["model"])
    dataset = init_instance_by_config(task["dataset"])
-    model.fit(dataset,save_path='benchmarks/HATS/model_hat.pkl')
+    model.fit(dataset, save_path="benchmarks/HATS/model_hat.pkl")

    pred_score = model.predict(dataset)

--- a/qlib/contrib/evaluate.py
+++ b/qlib/contrib/evaluate.py
@@ -228,7 +228,7 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **k
        strategy type: TopkAmountStrategy ,TopkWeightStrategy or TopkDropoutStrategy

    - **exchange related arguments**
-    
+
    exchange: Exchange()
        pass the exchange for speeding up.
    subscribe_fields: list
--- a/qlib/contrib/model/pytorch_alstm.py
+++ b/qlib/contrib/model/pytorch_alstm.py
@@ -345,7 +345,6 @@ class GRUModel(nn.Module):
        return self.fc_out(out[:, -1, :]).squeeze()


-
 class ALSTMModel(nn.Module):
    def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, rnn_type="GRU"):
        super().__init__()
@@ -360,33 +359,36 @@ class ALSTMModel(nn.Module):
        try:
            klass = getattr(nn, self.rnn_type.upper())
        except:
-            raise ValueError('unknown rnn_type `%s`' % self.rnn_type)
+            raise ValueError("unknown rnn_type `%s`" % self.rnn_type)
        self.net = nn.Sequential()
-        self.net.add_module('fc_in', nn.Linear(in_features=self.input_size, out_features=self.hid_size))
-        self.net.add_module('act', nn.Tanh())
-        self.rnn = klass(input_size=self.hid_size,
-                         hidden_size=self.hid_size,
-                         num_layers=self.rnn_layer,
-                         batch_first=True,
-                         dropout=self.dropout)
-        self.fc_out = nn.Linear(in_features=self.hid_size*2, out_features=1)
+        self.net.add_module("fc_in", nn.Linear(in_features=self.input_size, out_features=self.hid_size))
+        self.net.add_module("act", nn.Tanh())
+        self.rnn = klass(
+            input_size=self.hid_size,
+            hidden_size=self.hid_size,
+            num_layers=self.rnn_layer,
+            batch_first=True,
+            dropout=self.dropout,
+        )
+        self.fc_out = nn.Linear(in_features=self.hid_size * 2, out_features=1)
        # self.fc_out = nn.Linear(in_features=self.hid_size, out_features=1)
        self.att_net = nn.Sequential()
-        self.att_net.add_module('att_fc_in', nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size/2)))
-        self.att_net.add_module('att_dropout', torch.nn.Dropout(self.dropout))
-        self.att_net.add_module('att_act', nn.Tanh())
-        self.att_net.add_module('att_fc_out', nn.Linear(in_features=int(self.hid_size/2), out_features=1, bias=False))
-        self.att_net.add_module('att_softmax', nn.Softmax(dim=1))
+        self.att_net.add_module("att_fc_in", nn.Linear(in_features=self.hid_size, out_features=int(self.hid_size / 2)))
+        self.att_net.add_module("att_dropout", torch.nn.Dropout(self.dropout))
+        self.att_net.add_module("att_act", nn.Tanh())
+        self.att_net.add_module("att_fc_out", nn.Linear(in_features=int(self.hid_size / 2), out_features=1, bias=False))
+        self.att_net.add_module("att_softmax", nn.Softmax(dim=1))

    def forward(self, inputs):
        # inputs: [batch_size, input_size*input_day]
        inputs = inputs.view(len(inputs), self.input_size, -1)
-        inputs = inputs.permute(0, 2, 1)        # [batch, input_size, seq_len] -> [batch, seq_len, input_size]
-        rnn_out, _ = self.rnn(self.net(inputs)) # [batch, seq_len, num_directions * hidden_size]
-        attention_score = self.att_net(rnn_out) # [batch, seq_len, 1]
+        inputs = inputs.permute(0, 2, 1)  # [batch, input_size, seq_len] -> [batch, seq_len, input_size]
+        rnn_out, _ = self.rnn(self.net(inputs))  # [batch, seq_len, num_directions * hidden_size]
+        attention_score = self.att_net(rnn_out)  # [batch, seq_len, 1]
        out_att = torch.mul(rnn_out, attention_score)
        out_att = torch.sum(out_att, dim=1)
-        out = self.fc_out(torch.cat((rnn_out[:, -1, :], out_att), dim=1))  # [batch, seq_len, num_directions * hidden_size] -> [batch, 1]
+        out = self.fc_out(
+            torch.cat((rnn_out[:, -1, :], out_att), dim=1)
+        )  # [batch, seq_len, num_directions * hidden_size] -> [batch, 1]
        # out = self.fc_out(rnn_out[:, -1, :] + out_att)
        return out[..., 0]
-
--- a/qlib/contrib/model/pytorch_gats.py
+++ b/qlib/contrib/model/pytorch_gats.py
@@ -265,12 +265,14 @@ class GAT(Model):
            self.logger.info("Loading pretrained model...")
            if self.base_model == "LSTM":
                from ...contrib.model.pytorch_lstm import LSTMModel
+
                pretrained_model = LSTMModel()
-                pretrained_model.load_state_dict(torch.load('benchmarks/LSTM/model_lstm_csi300.pkl'))
+                pretrained_model.load_state_dict(torch.load("benchmarks/LSTM/model_lstm_csi300.pkl"))
            elif self.base_model == "GRU":
                from ...contrib.model.pytorch_gru import GRUModel
+
                pretrained_model = GRUModel()
-                pretrained_model.load_state_dict(torch.load('benchmarks/GRU/model_gru_csi300.pkl'))
+                pretrained_model.load_state_dict(torch.load("benchmarks/GRU/model_gru_csi300.pkl"))
            model_dict = self.GAT_model.state_dict()
            pretrained_dict = {k: v for k, v in pretrained_model.state_dict().items() if k in model_dict}
            model_dict.update(pretrained_dict)
--- a/qlib/contrib/model/pytorch_hats.py
+++ b/qlib/contrib/model/pytorch_hats.py
@@ -78,7 +78,7 @@ class HATS(Model):
        self.optimizer = optimizer.lower()
        self.loss = loss
        self.base_model = base_model
-        self.with_pretrain = with_pretrain #### True if train HATS with pretrained base model
+        self.with_pretrain = with_pretrain  #### True if train HATS with pretrained base model
        self.visible_GPU = GPU
        self.use_gpu = torch.cuda.is_available()
        self.seed = seed
@@ -97,7 +97,7 @@ class HATS(Model):
            "\noptimizer : {}"
            "\nloss_type : {}"
            "\nbase_model : {}"
-            "\nwith_pretrain : {}" ##### debug
+            "\nwith_pretrain : {}"  ##### debug
            "\nvisible_GPU : {}"
            "\nuse_GPU : {}"
            "\nseed : {}".format(
@@ -113,7 +113,7 @@ class HATS(Model):
                optimizer.lower(),
                loss,
                base_model,
-                with_pretrain, ### debug
+                with_pretrain,  ### debug
                GPU,
                self.use_gpu,
                seed,
@@ -265,12 +265,14 @@ class HATS(Model):
            self.logger.info("loading pretrained model...")
            if self.base_model == "LSTM":
                from ...contrib.model.pytorch_lstm import LSTMModel
+
                pretrained_model = LSTMModel()
-                pretrained_model.load_state_dict(torch.load('benchmarks/LSTM/model_lstm_csi300.pkl'))
+                pretrained_model.load_state_dict(torch.load("benchmarks/LSTM/model_lstm_csi300.pkl"))
            elif self.base_model == "GRU":
                from ...contrib.model.pytorch_gru import GRUModel
+
                pretrained_model = GRUModel()
-                pretrained_model.load_state_dict(torch.load('benchmarks/GRU/model_gru_csi300.pkl'))
+                pretrained_model.load_state_dict(torch.load("benchmarks/GRU/model_gru_csi300.pkl"))
            model_dict = self.HATS_model.state_dict()

            # filter unnecessary parameters
@@ -281,7 +283,6 @@ class HATS(Model):
            self.HATS_model.load_state_dict(model_dict)
            self.logger.info("loading pretrained model Done...")

-
        # train
        self.logger.info("training...")
        self._fitted = True
@@ -382,22 +383,24 @@ class HATSModel(nn.Module):
        self.softmax = nn.Softmax(dim=1)
        self.d_feat = d_feat

-        num_head_att = [1]*num_layers
-        hidden_dim = [hidden_size]*num_layers
-        dims = [d_feat] + [d*nh for (d, nh) in zip(hidden_dim, num_head_att[:-1])] + [num_head_att[-1]]
+        num_head_att = [1] * num_layers
+        hidden_dim = [hidden_size] * num_layers
+        dims = [d_feat] + [d * nh for (d, nh) in zip(hidden_dim, num_head_att[:-1])] + [num_head_att[-1]]
        in_dims = dims[:-1]
        out_dims = [d // nh for (d, nh) in zip(dims[1:], num_head_att)]
-        self.attn = nn.ModuleList([GraphAttention(i, o, nh, dropout) for (i, o, nh) in zip(in_dims, out_dims,num_head_att)])
+        self.attn = nn.ModuleList(
+            [GraphAttention(i, o, nh, dropout) for (i, o, nh) in zip(in_dims, out_dims, num_head_att)]
+        )
        self.bns = nn.ModuleList([nn.BatchNorm1d(dim) for dim in dims[1:-1]])
        self.dropout = nn.Dropout(dropout)
        self.elu = nn.ELU()

    def forward(self, x):
-        x = x.reshape(len(x), self.d_feat, -1) # [N, F, T]
-        x = x.permute(0, 2, 1) # [N, T, F]
-        out,_ = self.model(x)
+        x = x.reshape(len(x), self.d_feat, -1)  # [N, F, T]
+        x = x.permute(0, 2, 1)  # [N, T, F]
+        out, _ = self.model(x)
        hidden = out[:, -1, :]
-        hidden = self.bn1(hidden) 
+        hidden = self.bn1(hidden)
        attention = GraphAttention.cal_attention(hidden, hidden)
        output = attention.mm(hidden)
        output = self.fc(output)
@@ -406,9 +409,7 @@ class HATSModel(nn.Module):
        return self.fc_out(output).squeeze()


-
 class GraphAttention(nn.Module):
-
    def __init__(self, input_dim, output_dim, num_heads, dropout=0.5):

        super().__init__()
@@ -431,7 +432,7 @@ class GraphAttention(nn.Module):
        self.num_heads = num_heads

        self.fcs = nn.ModuleList([nn.Linear(input_dim, output_dim) for _ in range(num_heads)])
-        self.a = nn.ModuleList([nn.Linear(2*output_dim, 1) for _ in range(num_heads)])
+        self.a = nn.ModuleList([nn.Linear(2 * output_dim, 1) for _ in range(num_heads)])

        self.dropout = nn.Dropout(dropout)
        self.softmax = nn.Softmax(dim=0)
@@ -465,7 +466,6 @@ class GraphAttention(nn.Module):
        sum_degs = np.hstack(([0], np.cumsum([len(row) for row in rows])))
        mapped_nodes = [mapping[v] for v in nodes]
        indices = torch.LongTensor([[v, c] for (v, row) in zip(mapped_nodes, rows) for c in row]).t()
-        

        out = []
        for k in range(self.num_heads):
@@ -477,7 +477,7 @@ class GraphAttention(nn.Module):

            e = self.leakyrelu(self.a[k](cat_h))

-            alpha = [self.softmax(e[lo : hi]) for (lo, hi) in zip(sum_degs, sum_degs[1:])]
+            alpha = [self.softmax(e[lo:hi]) for (lo, hi) in zip(sum_degs, sum_degs[1:])]
            alpha = torch.cat(tuple(alpha), dim=0)
            alpha = alpha.squeeze(1)
            alpha = self.dropout(alpha)
@@ -487,11 +487,18 @@ class GraphAttention(nn.Module):

        return out

-    def cal_attention(x, y): 
-     
-        att_x = torch.mean(x, dim = 1).reshape(-1, 1)
-        att_y = torch.mean(y, dim = 1).reshape(-1, 1)
+    def cal_attention(x, y):
+
+        att_x = torch.mean(x, dim=1).reshape(-1, 1)
+        att_y = torch.mean(y, dim=1).reshape(-1, 1)
        att = att_x.mm(torch.t(att_y))
        x_att = x.reshape(x.shape[0], 1, x.shape[1]).repeat(1, y.shape[0], 1)
        y_att = y.reshape(1, y.shape[0], y.shape[1]).repeat(x.shape[0], 1, 1)
-        return torch.mean(x.reshape(x.shape[0], 1, x.shape[1]).repeat(1, y.shape[0], 1)*y.reshape(1, y.shape[0], y.shape[1]).repeat(x.shape[0], 1, 1), dim = 2)-att
+        return (
+            torch.mean(
+                x.reshape(x.shape[0], 1, x.shape[1]).repeat(1, y.shape[0], 1)
+                * y.reshape(1, y.shape[0], y.shape[1]).repeat(x.shape[0], 1, 1),
+                dim=2,
+            )
+            - att
+        )
--- a/qlib/data/dataset/init.py
+++ b/qlib/data/dataset/init.py
@@ -18,7 +18,7 @@ class Dataset(Serializable):

        - setup data
            - The data related attributes' names should start with '_' so that it will not be saved on disk when serializing
-        
+
        - initialize the state of the dataset(info to prepare the data)
            - The name of essential state for preparing data should not start with '_' so that it could be serialized on disk when serializing.

@@ -99,7 +99,7 @@ class DatasetH(Dataset):
            Here are some examples:

            .. code-block::
-            
+
                1) 'segments': {
                        'train': ("2008-01-01", "2014-12-31"),
                        'valid': ("2017-01-01", "2020-08-01",),