diff --git a/examples/trade/README.md b/examples/trade/README.md index 8add4ecf0..c29f439ab 100644 --- a/examples/trade/README.md +++ b/examples/trade/README.md @@ -1,5 +1,5 @@ # Universal Trading for Order Execution with Oracle Policy Distillation -This is the experiment code for our AAAI 2021 paper "[Universal Trading for Order Execution with Oracle Policy Distillation](https://seqml.github.io/opd/opd_aaai21.pdf)", including the implementations of all the compared methods in the paper and a general reinforcement learning framework for order execution in quantitative finance. +This is the experiment code for our AAAI 2021 paper "[Universal Trading for Order Execution with Oracle Policy Distillation](https://arxiv.org/abs/2103.10860)", including the implementations of all the compared methods in the paper and a general reinforcement learning framework for order execution in quantitative finance. ## Abstract As a fundamental problem in algorithmic trading, order execution aims at fulfilling a specific trading order, either liquidation or acquirement, for a given instrument. Towards effective execution strategy, recent years have witnessed the shift from the analytical view with model-based market assumptions to model-free perspective, i.e., reinforcement learning, due to its nature of sequential decision optimization. However, the noisy and yet imperfect market information that can be leveraged by the policy has made it quite challenging to build up sample efficient reinforcement learning methods to achieve effective order execution. In this paper, we propose a novel universal trading policy optimization framework to bridge the gap between the noisy yet imperfect market states and the optimal action sequences for order execution. Particularly, this framework leverages a policy distillation method that can better guide the learning of the common policy towards practically optimal execution by an oracle teacher with perfect information to approximate the optimal trading strategy. The extensive experiments have shown significant improvements of our method over various strong baselines, with reasonable trading actions. diff --git a/examples/trade/model/__init__.py b/examples/trade/model/__init__.py deleted file mode 100644 index e5da2c1c0..000000000 --- a/examples/trade/model/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .ppo import * -from .qmodel import * -from .teacher import * -from .util import * -from .opd import * diff --git a/examples/trade/model/opd.py b/examples/trade/model/opd.py deleted file mode 100644 index 6a25d0f35..000000000 --- a/examples/trade/model/opd.py +++ /dev/null @@ -1,74 +0,0 @@ -import torch -import numpy as np -from torch import nn -import torch.nn.functional as F -from copy import deepcopy -import sys - -from tianshou.data import to_torch - - -class OPD_Extractor(nn.Module): - def __init__(self, device="cpu", **kargs): - super().__init__() - self.device = device - hidden_size = kargs["hidden_size"] - fc_size = kargs["fc_size"] - self.cnn_shape = kargs["cnn_shape"] - - self.rnn = nn.GRU(64, hidden_size, batch_first=True) - self.rnn2 = nn.GRU(64, hidden_size, batch_first=True) - self.dnn = nn.Sequential(nn.Linear(2, 64), nn.ReLU(),) - self.cnn = nn.Sequential(nn.Conv1d(self.cnn_shape[1], 3, 3), nn.ReLU(),) - self.raw_fc = nn.Sequential(nn.Linear((self.cnn_shape[0] - 2) * 3, 64), nn.ReLU(),) - - self.fc = nn.Sequential( - nn.Linear(hidden_size * 2, hidden_size), nn.ReLU(), nn.Linear(hidden_size, 32), nn.ReLU(), - ) - - def forward(self, inp): - inp = to_torch(inp, dtype=torch.float32, device=self.device) - teacher_action = inp[:, 0] - inp = inp[:, 1:] - seq_len = inp[:, -1].to(torch.long) - batch_size = inp.shape[0] - raw_in = inp[:, : 6 * 240] - raw_in = torch.cat((torch.zeros_like(inp[:, : 6 * 30]), raw_in), dim=-1) - raw_in = raw_in.reshape(-1, 30, 6).transpose(1, 2) - dnn_in = inp[:, 6 * 240 : -1].reshape(batch_size, -1, 2) - cnn_out = self.cnn(raw_in).view(batch_size, 9, -1) - rnn_in = self.raw_fc(cnn_out) - rnn2_in = self.dnn(dnn_in) - rnn2_out = self.rnn2(rnn2_in)[0] - rnn_out = self.rnn(rnn_in)[0] - rnn_out = rnn_out[torch.arange(rnn_out.size(0)), seq_len] - rnn2_out = rnn2_out[torch.arange(rnn2_out.size(0)), seq_len] - # dnn_out = self.dnn(dnn_in) - fc_in = torch.cat((rnn_out, rnn2_out), dim=-1) - feature = self.fc(fc_in) - return feature, teacher_action / 2 - - -class OPD_Actor(nn.Module): - def __init__(self, extractor, out_shape, device=torch.device("cpu"), **kargs): - super().__init__() - self.extractor = extractor - self.layer_out = nn.Sequential(nn.Linear(32, out_shape), nn.Softmax(dim=-1)) - self.device = device - - def forward(self, obs, state=None, info={}): - feature, self.teacher_action = self.extractor(obs) - out = self.layer_out(feature) - return out, state - - -class OPD_Critic(nn.Module): - def __init__(self, extractor, out_shape, device=torch.device("cpu"), **kargs): - super().__init__() - self.extractor = extractor - self.value_out = nn.Linear(32, 1) - self.device = device - - def forward(self, obs, state=None, info={}): - feature, self.teacher_action = self.extractor(obs) - return self.value_out(feature).squeeze(dim=-1) diff --git a/examples/trade/model/ppo.py b/examples/trade/model/ppo.py deleted file mode 100644 index 4c2e9bcf1..000000000 --- a/examples/trade/model/ppo.py +++ /dev/null @@ -1,79 +0,0 @@ -import torch -import numpy as np -from torch import nn -import torch.nn.functional as F -from copy import deepcopy -import sys - -from tianshou.data import to_torch - - -class PPO_Extractor(nn.Module): - def __init__(self, device="cpu", **kargs): - super().__init__() - self.device = device - hidden_size = kargs["hidden_size"] - fc_size = kargs["fc_size"] - self.cnn_shape = kargs["cnn_shape"] - - self.rnn = nn.GRU(64, hidden_size, batch_first=True) - self.rnn2 = nn.GRU(64, hidden_size, batch_first=True) - self.dnn = nn.Sequential(nn.Linear(2, 64), nn.ReLU(),) - self.cnn = nn.Sequential(nn.Conv1d(self.cnn_shape[1], 3, 3), nn.ReLU(),) - self.raw_fc = nn.Sequential(nn.Linear((self.cnn_shape[0] - 2) * 3, 64), nn.ReLU(),) - - self.fc = nn.Sequential( - nn.Linear(hidden_size * 2, hidden_size), nn.ReLU(), nn.Linear(hidden_size, 32), nn.ReLU(), - ) - - def forward(self, inp): - inp = to_torch(inp, dtype=torch.float32, device=self.device) - # inp = torch.from_numpy(inp).to(torch.device('cpu')) - seq_len = inp[:, -1].to(torch.long) - batch_size = inp.shape[0] - raw_in = inp[:, : 6 * 240] - raw_in = torch.cat((torch.zeros_like(inp[:, : 6 * 30]), raw_in), dim=-1) - raw_in = raw_in.reshape(-1, 30, 6).transpose(1, 2) - dnn_in = inp[:, -19:-1].reshape(batch_size, -1, 2) - cnn_out = self.cnn(raw_in).view(batch_size, 9, -1) - assert not torch.isnan(cnn_out).any() - rnn_in = self.raw_fc(cnn_out) - assert not torch.isnan(rnn_in).any() - rnn2_in = self.dnn(dnn_in) - assert not torch.isnan(rnn2_in).any() - rnn2_out = self.rnn2(rnn2_in)[0] - assert not torch.isnan(rnn2_out).any() - rnn_out = self.rnn(rnn_in)[0] - assert not torch.isnan(rnn_out).any() - rnn_out = rnn_out[torch.arange(rnn_out.size(0)), seq_len] - rnn2_out = rnn2_out[torch.arange(rnn2_out.size(0)), seq_len] - # dnn_out = self.dnn(dnn_in) - fc_in = torch.cat((rnn_out, rnn2_out), dim=-1) - self.feature = self.fc(fc_in) - return self.feature - - -class PPO_Actor(nn.Module): - def __init__(self, extractor, out_shape, device=torch.device("cpu"), **kargs): - super().__init__() - self.extractor = extractor - self.layer_out = nn.Sequential(nn.Linear(32, out_shape), nn.Softmax(dim=-1)) - self.device = device - - def forward(self, obs, state=None, info={}): - self.feature = self.extractor(obs) - assert not (torch.isnan(self.feature).any() | torch.isinf(self.feature).any()), f"{self.feature}" - out = self.layer_out(self.feature) - return out, state - - -class PPO_Critic(nn.Module): - def __init__(self, extractor, out_shape, device=torch.device("cpu"), **kargs): - super().__init__() - self.extractor = extractor - self.value_out = nn.Linear(32, 1) - self.device = device - - def forward(self, obs, state=None, info={}): - self.feature = self.extractor(obs) - return self.value_out(self.feature).squeeze(dim=-1) diff --git a/examples/trade/model/qmodel.py b/examples/trade/model/qmodel.py deleted file mode 100644 index 361ad40d4..000000000 --- a/examples/trade/model/qmodel.py +++ /dev/null @@ -1,52 +0,0 @@ -import torch -import numpy as np -from torch import nn -import torch.nn.functional as F -from copy import deepcopy -import sys - -from tianshou.data import to_torch - - -class RNNQModel(nn.Module): - def __init__(self, device="cpu", out_shape=10, **kargs): - super().__init__() - self.device = device - hidden_size = kargs["hidden_size"] - fc_size = kargs["fc_size"] - self.cnn_shape = kargs["cnn_shape"] - - self.rnn = nn.GRU(64, hidden_size, batch_first=True) - self.rnn2 = nn.GRU(64, hidden_size, batch_first=True) - self.dnn = nn.Sequential(nn.Linear(2, 64), nn.ReLU(),) - self.cnn = nn.Sequential(nn.Conv1d(self.cnn_shape[1], 3, 3), nn.ReLU(),) - self.raw_fc = nn.Sequential(nn.Linear((self.cnn_shape[0] - 2) * 3, 64), nn.ReLU(),) - - self.fc = nn.Sequential( - nn.Linear(hidden_size * 2, hidden_size), - nn.ReLU(), - nn.Linear(hidden_size, 32), - nn.ReLU(), - nn.Linear(32, out_shape), - ) - - def forward(self, obs, state=None, info={}): - inp = to_torch(obs, dtype=torch.float32, device=self.device) - inp = inp[:, 182:] - seq_len = inp[:, -1].to(torch.long) - batch_size = inp.shape[0] - raw_in = inp[:, : 6 * 240] - raw_in = torch.cat((torch.zeros_like(inp[:, : 6 * 30]), raw_in), dim=-1) - raw_in = raw_in.reshape(-1, 30, 6).transpose(1, 2) - dnn_in = inp[:, 6 * 240 : -1].reshape(batch_size, -1, 2) - cnn_out = self.cnn(raw_in).view(batch_size, 9, -1) - rnn_in = self.raw_fc(cnn_out) - rnn2_in = self.dnn(dnn_in) - rnn2_out = self.rnn2(rnn2_in)[0] - rnn_out = self.rnn(rnn_in)[0] - rnn_out = rnn_out[torch.arange(rnn_out.size(0)), seq_len] - rnn2_out = rnn2_out[torch.arange(rnn2_out.size(0)), seq_len] - # dnn_out = self.dnn(dnn_in) - fc_in = torch.cat((rnn_out, rnn2_out), dim=-1) - out = self.fc(fc_in) - return out, state diff --git a/examples/trade/model/teacher.py b/examples/trade/model/teacher.py deleted file mode 100644 index b5e8d3f76..000000000 --- a/examples/trade/model/teacher.py +++ /dev/null @@ -1,70 +0,0 @@ -import torch -import numpy as np -from torch import nn -import torch.nn.functional as F -from copy import deepcopy -import sys - -from tianshou.data import to_torch - - -class Teacher_Extractor(nn.Module): - def __init__(self, device="cpu", feature_size=180, **kargs): - super().__init__() - self.device = device - hidden_size = kargs["hidden_size"] - fc_size = kargs["fc_size"] - self.cnn_shape = kargs["cnn_shape"] - - self.rnn = nn.GRU(64, hidden_size, batch_first=True) - self.rnn2 = nn.GRU(64, hidden_size, batch_first=True) - self.dnn = nn.Sequential(nn.Linear(2, 64), nn.ReLU(),) - self.cnn = nn.Sequential(nn.Conv1d(self.cnn_shape[1], 3, 3), nn.ReLU(),) - self.raw_fc = nn.Sequential(nn.Linear((self.cnn_shape[0] - 2) * 3, 64), nn.ReLU(),) - - self.fc = nn.Sequential( - nn.Linear(hidden_size * 2, hidden_size), nn.ReLU(), nn.Linear(hidden_size, 32), nn.ReLU(), - ) - - def forward(self, inp): - inp = to_torch(inp, dtype=torch.float32, device=self.device) - inp = inp[:, 182:] - seq_len = inp[:, -1].to(torch.long) - batch_size = inp.shape[0] - raw_in = inp[:, : 6 * 240].reshape(-1, 30, 6).transpose(1, 2) - dnn_in = inp[:, 6 * 240 : -1].reshape(batch_size, -1, 2) - cnn_out = self.cnn(raw_in).view(batch_size, 8, -1) - rnn_in = self.raw_fc(cnn_out) - rnn2_in = self.dnn(dnn_in) - rnn2_out = self.rnn2(rnn2_in)[0] - rnn_out = self.rnn(rnn_in)[0][:, -1, :] - rnn2_out = rnn2_out[torch.arange(rnn2_out.size(0)), seq_len] - # dnn_out = self.dnn(dnn_in) - fc_in = torch.cat((rnn_out, rnn2_out), dim=-1) - self.feature = self.fc(fc_in) - return self.feature - - -class Teacher_Actor(nn.Module): - def __init__(self, extractor, out_shape, device=torch.device("cpu"), **kargs): - super().__init__() - self.extractor = extractor - self.layer_out = nn.Sequential(nn.Linear(32, out_shape), nn.Softmax(dim=-1)) - self.device = device - - def forward(self, obs, state=None, info={}): - self.feature = self.extractor(obs) - out = self.layer_out(self.feature) - return out, state - - -class Teacher_Critic(nn.Module): - def __init__(self, extractor, out_shape, device=torch.device("cpu"), **kargs): - super().__init__() - self.extractor = extractor - self.value_out = nn.Linear(32, 1) - self.device = device - - def forward(self, obs, state=None, info={}): - self.feature = self.extractor(obs) - return self.value_out(self.feature).squeeze(-1) diff --git a/examples/trade/model/util.py b/examples/trade/model/util.py deleted file mode 100644 index 4b685ffd3..000000000 --- a/examples/trade/model/util.py +++ /dev/null @@ -1,191 +0,0 @@ -import torch -import numpy as np -from torch import nn -import torch.nn.functional as F -from copy import deepcopy -import sys - -from tianshou.data import to_torch - - -class Attention(nn.Module): - def __init__(self, in_dim, out_dim): - super().__init__() - self.get_w = nn.Sequential(nn.Linear(in_dim * 2, in_dim), nn.ReLU(), nn.Linear(in_dim, 1)) - - self.fc = nn.Sequential(nn.Linear(in_dim, out_dim), nn.ReLU(),) - - def forward(self, value, key): - key = key.unsqueeze(dim=1) - length = value.shape[1] - key = key.repeat([1, length, 1]) - weight = self.get_w(torch.cat((key, value), dim=-1)).squeeze() # B * l - weight = weight.softmax(dim=-1).unsqueeze(dim=-1) # B * l * 1 - out = (value * weight).sum(dim=1) - out = self.fc(out) - return out - - -class MaskAttention(nn.Module): - def __init__(self, in_dim, out_dim): - super().__init__() - self.get_w = nn.Sequential(nn.Linear(in_dim * 2, in_dim), nn.ReLU(), nn.Linear(in_dim, 1)) - - self.fc = nn.Sequential(nn.Linear(in_dim, out_dim), nn.ReLU(),) - - def forward(self, value, key, seq_len, maxlen=9): - # seq_len: (batch,) - device = value.device - key = key.unsqueeze(dim=1) - length = value.shape[1] - key = key.repeat([1, length, 1]) # (batch, 9, 64) - weight = self.get_w(torch.cat((key, value), dim=-1)).squeeze(-1) # (batch, 9) - mask = sequence_mask(seq_len + 1, maxlen=maxlen, device=device) - weight[~mask] = float("-inf") - weight = weight.softmax(dim=-1).unsqueeze(dim=-1) - out = (value * weight).sum(dim=1) - out = self.fc(out) - return out - - -class TFMaskAttention(nn.Module): - def __init__(self, in_dim, out_dim): - super().__init__() - self.get_w = nn.Sequential(nn.Linear(in_dim * 2, in_dim), nn.ReLU(), nn.Linear(in_dim, 1)) - - self.fc = nn.Sequential(nn.Linear(in_dim, out_dim), nn.ReLU(),) - - def forward(self, value, key, seq_len, maxlen=9): - device = value.device - key = key.unsqueeze(dim=1) - length = value.shape[1] - key = key.repeat([1, length, 1]) - weight = self.get_w(torch.cat((key, value), dim=-1)).squeeze(-1) - mask = sequence_mask(seq_len + 1, maxlen=maxlen, device=device) - mask = mask.repeat(1, 3) # (batch, 9*3) - weight[~mask] = float("-inf") - weight = weight.softmax(dim=-1).unsqueeze(dim=-1) - out = (value * weight).sum(dim=1) - out = self.fc(out) - return out - - -class NNAttention(nn.Module): - def __init__(self, in_dim, out_dim): - super().__init__() - self.q_net = nn.Linear(in_dim, out_dim) - self.k_net = nn.Linear(in_dim, out_dim) - self.v_net = nn.Linear(in_dim, out_dim) - - def forward(self, Q, K, V): - q = self.q_net(Q) - k = self.k_net(K) - v = self.v_net(V) - - attn = torch.einsum("ijk,ilk->ijl", q, k) - attn = attn.to(Q.device) - attn_prob = torch.softmax(attn, dim=-1) - - attn_vec = torch.einsum("ijk,ikl->ijl", attn_prob, v) - - return attn_vec - - -class Reshape(nn.Module): - def __init__(self, *args): - super(Reshape, self).__init__() - self.shape = args - - def forward(self, x): - return x.view(self.shape) - - -class DARNN(nn.Module): - def __init__(self, device="cpu", **kargs): - super().__init__() - self.emb_dim = kargs["emb_dim"] - self.hidden_size = kargs["hidden_size"] - self.num_layers = kargs["num_layers"] - self.is_bidir = kargs["is_bidir"] - self.dropout = kargs["dropout"] - self.seq_len = kargs["seq_len"] - self.interval = kargs["interval"] - self.today_length = 238 - self.prev_length = 240 - self.input_length = 480 - self.input_size = 6 - - self.rnn = nn.LSTM( - input_size=self.input_size + self.emb_dim, - hidden_size=self.hidden_size, - num_layers=self.num_layers, - batch_first=True, - bidirectional=self.is_bidir, - dropout=self.dropout, - ) - self.prev_rnn = nn.LSTM( - input_size=self.input_size, - hidden_size=self.hidden_size, - num_layers=self.num_layers, - batch_first=True, - bidirectional=self.is_bidir, - dropout=self.dropout, - ) - self.fc_out = nn.Linear(in_features=self.hidden_size * 2, out_features=1) - self.attention = NNAttention(self.hidden_size, self.hidden_size) - self.act_out = nn.Sigmoid() - if self.emb_dim != 0: - self.pos_emb = nn.Embedding(self.input_length, self.emb_dim) - - def forward(self, inputs): - inputs = inputs.view(-1, self.input_length, self.input_size) # [B, T, F] - today_input = inputs[:, : self.today_length, :] - today_input = torch.cat((torch.zeros_like(today_input[:, :1, :]), today_input), dim=1) - prev_input = inputs[:, 240 : 240 + self.prev_length, :] - if self.emb_dim != 0: - embedding = self.pos_emb(torch.arange(end=self.today_length + 1, device=inputs.device)) - embedding = embedding.repeat([today_input.size()[0], 1, 1]) - today_input = torch.cat((today_input, embedding), dim=-1) - prev_outs, _ = self.prev_rnn(prev_input) - today_outs, _ = self.rnn(today_input) - - outs = self.attention(today_outs, prev_outs, prev_outs) - outs = torch.cat((today_outs, outs), dim=-1) - outs = outs[:, range(0, self.seq_len * self.interval, self.interval), :] - # outs = self.fc_out(outs).squeeze() - return self.act_out(self.fc_out(outs).squeeze(-1)), outs - - -class Transpose(nn.Module): - def __init__(self, dim1=0, dim2=1): - super().__init__() - self.dim1 = dim1 - self.dim2 = dim2 - - def forward(self, x): - return x.transpose(self.dim1, self.dim2) - - -class SelfAttention(nn.Module): - def __init__(self, *args, **kargs): - super().__init__() - self.attention = nn.MultiheadAttention(*args, **kargs) - - def forward(self, x): - return self.attention(x, x, x)[0] - - -def onehot_enc(y, len): - y = y.unsqueeze(-1) - y_onehot = torch.zeros(y.shape[0], len) - # y_onehot.zero_() - y_onehot.scatter(1, y, 1) - return y_onehot - - -def sequence_mask(lengths, maxlen=None, dtype=torch.bool, device=None): - if maxlen is None: - maxlen = lengths.max() - mask = ~(torch.ones((len(lengths), maxlen), device=device).cumsum(dim=1).t() > lengths).t() - mask.type(dtype) - return mask diff --git a/examples/trade/network/teacher.py b/examples/trade/network/teacher.py index b5e8d3f76..395853f13 100644 --- a/examples/trade/network/teacher.py +++ b/examples/trade/network/teacher.py @@ -28,11 +28,10 @@ class Teacher_Extractor(nn.Module): def forward(self, inp): inp = to_torch(inp, dtype=torch.float32, device=self.device) - inp = inp[:, 182:] seq_len = inp[:, -1].to(torch.long) batch_size = inp.shape[0] - raw_in = inp[:, : 6 * 240].reshape(-1, 30, 6).transpose(1, 2) - dnn_in = inp[:, 6 * 240 : -1].reshape(batch_size, -1, 2) + raw_in = inp[:, : 6 * 240].reshape(-1, 30, 6).transpose(1, 2) ## public part of state + dnn_in = inp[:, 6 * 240 : -1].reshape(batch_size, -1, 2) ## private part of state cnn_out = self.cnn(raw_in).view(batch_size, 8, -1) rnn_in = self.raw_fc(cnn_out) rnn2_in = self.dnn(dnn_in) diff --git a/examples/trade/order_gen.py b/examples/trade/order_gen.py index 9a7c49ef8..71499523f 100644 --- a/examples/trade/order_gen.py +++ b/examples/trade/order_gen.py @@ -26,6 +26,7 @@ def w_order(f, start, end): df = pd.read_pickle(in_dir + f) #df['date'] = df.index.get_level_values(1).map(lambda x: x.date()) #df = df.set_index('date', append=True, drop=True) + order = generate_order(df, start, end) order_train = order[order.index.get_level_values(0) < '2020-12-01'] order_test = order[order.index.get_level_values(0) >= '2020-12-01'] @@ -50,6 +51,7 @@ def w_order(f, start, end): all_path = os.path.join(data_path, "order/all/") if not os.path.exists(all_path): os.makedirs(all_path) + order.to_pickle(all_path + f[:-9] + '.target') return 0 diff --git a/examples/trade/teacher_feature.py b/examples/trade/teacher_feature.py index 62e69ceba..9c6ed867a 100644 --- a/examples/trade/teacher_feature.py +++ b/examples/trade/teacher_feature.py @@ -6,12 +6,15 @@ feature_path = os.path.join(data_path, 'feature/teacher/') if not os.path.exists(feature_path): os.makedirs(feature_path) + log_file = os.path.join(os.environ.get('OUTPUT_DIR'),'example/OPDT_b/test/') + files = os.listdir(log_file) for f in files: if f.endswith(".log"): df = pd.read_pickle(log_file + f) + #df['datetime'] = df.index.get_level_values(1).map(lambda x: x[1]) df['datetime'] = df.index.get_level_values(1) df.set_index('datetime', append=True, drop=True, inplace=True)