1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-07-01 10:01:19 +08:00

Merge pull request #227 from Arthur-Null/high-freq-execution

High freq execution
This commit is contained in:
you-n-g
2021-01-28 16:27:05 +08:00
committed by GitHub
10 changed files with 524 additions and 15 deletions

16
examples/trade/README.md Normal file
View File

@@ -0,0 +1,16 @@
# Universal Trading for Order Execution with Oracle Policy Distillation
This is the experiment code for our AAAI 2021 paper "[Universal Trading for Order Execution with Oracle Policy Distillation](https://seqml.github.io/opd/opd_aaai21.pdf)", including the implementations of all the compared methods in the paper and a general reinforcement learning framework for order execution in quantitative finance.
## Abstract
As a fundamental problem in algorithmic trading, order execution aims at fulfilling a specific trading order, either liquidation or acquirement, for a given instrument. Towards effective execution strategy, recent years have witnessed the shift from the analytical view with model-based market assumptions to model-free perspective, i.e., reinforcement learning, due to its nature of sequential decision optimization. However, the noisy and yet imperfect market information that can be leveraged by the policy has made it quite challenging to build up sample efficient reinforcement learning methods to achieve effective order execution. In this paper, we propose a novel universal trading policy optimization framework to bridge the gap between the noisy yet imperfect market states and the optimal action sequences for order execution. Particularly, this framework leverages a policy distillation method that can better guide the learning of the common policy towards practically optimal execution by an oracle teacher with perfect information to approximate the optimal trading strategy. The extensive experiments have shown significant improvements of our method over various strong baselines, with reasonable trading actions.
### Citation
You are more than welcome to cite our paper:
```
@inproceedings{fang2021universal,
title={Universal Trading for Order Execution with Oracle Policy Distillation},
author={Fang, Yuchen and Ren, Kan and Liu, Weiqing and Zhou, Dong and Zhang, Weinan and Bian, Jiang and Yu, Yong and Liu, Tie-Yan},
booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
year={2021}
}
```

View File

@@ -1,5 +1,3 @@
from joblib import Parallel, delayed
from numba import njit, prange
from tianshou.policy import BasePolicy
from tianshou.data import Batch
import numpy as np

View File

@@ -5,7 +5,7 @@ import logger
import json
import os
import agent
import model
import network
import policy
import random
import tianshou as ts
@@ -48,7 +48,15 @@ def setup_seed(seed):
class BaseExecutor(object):
def __init__(
self, log_dir, resources, env_conf, optim=None, policy_conf=None, network=None, policy_path=None, seed=None,
self,
log_dir,
resources,
env_conf,
optim=None,
policy_conf=None,
network_conf=None,
policy_path=None,
seed=None,
):
"""A base class for executor
@@ -62,8 +70,8 @@ class BaseExecutor(object):
:type optim: dict, optional
:param policy_conf: Configurations for the RL algorithm, defaults to None
:type policy_conf: dict, optional
:param network: Configurations for policy network, defaults to None
:type network: dict, optional
:param network_conf: Configurations for policy network_conf, defaults to None
:type network_conf: dict, optional
:param policy_path: If is not None, would load the policy from this path, defaults to None
:type policy_path: string, optional
:param seed: Random seed, defaults to None
@@ -90,17 +98,23 @@ class BaseExecutor(object):
self.policy = getattr(agent, policy_conf["name"])(policy_conf["config"])
# print(self.policy)
else:
assert not network is None
if "extractor" in network.keys():
net = getattr(model, network["extractor"]["name"] + "_Extractor")(
device=self.device, **network["config"]
assert not network_conf is None
if "extractor" in network_conf.keys():
net = getattr(network, network_conf["extractor"]["name"] + "_Extractor")(
device=self.device, **network_conf["config"]
)
else:
net = getattr(model, network["name"] + "_Extractor")(device=self.device, **network["config"])
net = getattr(network, network_conf["name"] + "_Extractor")(
device=self.device, **network_conf["config"]
)
net.to(self.device)
actor = getattr(model, network["name"] + "_Actor")(extractor=net, device=self.device, **network["config"])
actor = getattr(network, network_conf["name"] + "_Actor")(
extractor=net, device=self.device, **network_conf["config"]
)
actor.to(self.device)
critic = getattr(model, network["name"] + "_Critic")(extractor=net, device=self.device, **network["config"])
critic = getattr(network, network_conf["name"] + "_Critic")(
extractor=net, device=self.device, **network_conf["config"]
)
critic.to(self.device)
self.optim = torch.optim.Adam(
list(actor.parameters()) + list(critic.parameters()),
@@ -180,7 +194,7 @@ class Executor(BaseExecutor):
io_conf,
optim=None,
policy_conf=None,
network=None,
network_conf=None,
policy_path=None,
seed=None,
share_memory=False,
@@ -210,7 +224,7 @@ class Executor(BaseExecutor):
:param buffer_size: The size of replay buffer, defaults to 200000
:type buffer_size: int, optional
"""
super().__init__(log_dir, resources, env_conf, optim, policy_conf, network, policy_path, seed)
super().__init__(log_dir, resources, env_conf, optim, policy_conf, network_conf, policy_path, seed)
single_env = getattr(env, env_conf["name"])
env_conf = merge_dicts(env_conf, train_paths)
env_conf["log"] = True

View File

@@ -0,0 +1,5 @@
from .ppo import *
from .qmodel import *
from .teacher import *
from .util import *
from .opd import *

View File

@@ -0,0 +1,74 @@
import torch
import numpy as np
from torch import nn
import torch.nn.functional as F
from copy import deepcopy
import sys
from tianshou.data import to_torch
class OPD_Extractor(nn.Module):
def __init__(self, device="cpu", **kargs):
super().__init__()
self.device = device
hidden_size = kargs["hidden_size"]
fc_size = kargs["fc_size"]
self.cnn_shape = kargs["cnn_shape"]
self.rnn = nn.GRU(64, hidden_size, batch_first=True)
self.rnn2 = nn.GRU(64, hidden_size, batch_first=True)
self.dnn = nn.Sequential(nn.Linear(2, 64), nn.ReLU(),)
self.cnn = nn.Sequential(nn.Conv1d(self.cnn_shape[1], 3, 3), nn.ReLU(),)
self.raw_fc = nn.Sequential(nn.Linear((self.cnn_shape[0] - 2) * 3, 64), nn.ReLU(),)
self.fc = nn.Sequential(
nn.Linear(hidden_size * 2, hidden_size), nn.ReLU(), nn.Linear(hidden_size, 32), nn.ReLU(),
)
def forward(self, inp):
inp = to_torch(inp, dtype=torch.float32, device=self.device)
teacher_action = inp[:, 0]
inp = inp[:, 1:]
seq_len = inp[:, -1].to(torch.long)
batch_size = inp.shape[0]
raw_in = inp[:, : 6 * 240]
raw_in = torch.cat((torch.zeros_like(inp[:, : 6 * 30]), raw_in), dim=-1)
raw_in = raw_in.reshape(-1, 30, 6).transpose(1, 2)
dnn_in = inp[:, 6 * 240 : -1].reshape(batch_size, -1, 2)
cnn_out = self.cnn(raw_in).view(batch_size, 9, -1)
rnn_in = self.raw_fc(cnn_out)
rnn2_in = self.dnn(dnn_in)
rnn2_out = self.rnn2(rnn2_in)[0]
rnn_out = self.rnn(rnn_in)[0]
rnn_out = rnn_out[torch.arange(rnn_out.size(0)), seq_len]
rnn2_out = rnn2_out[torch.arange(rnn2_out.size(0)), seq_len]
# dnn_out = self.dnn(dnn_in)
fc_in = torch.cat((rnn_out, rnn2_out), dim=-1)
feature = self.fc(fc_in)
return feature, teacher_action / 2
class OPD_Actor(nn.Module):
def __init__(self, extractor, out_shape, device=torch.device("cpu"), **kargs):
super().__init__()
self.extractor = extractor
self.layer_out = nn.Sequential(nn.Linear(32, out_shape), nn.Softmax(dim=-1))
self.device = device
def forward(self, obs, state=None, info={}):
feature, self.teacher_action = self.extractor(obs)
out = self.layer_out(feature)
return out, state
class OPD_Critic(nn.Module):
def __init__(self, extractor, out_shape, device=torch.device("cpu"), **kargs):
super().__init__()
self.extractor = extractor
self.value_out = nn.Linear(32, 1)
self.device = device
def forward(self, obs, state=None, info={}):
feature, self.teacher_action = self.extractor(obs)
return self.value_out(feature).squeeze(dim=-1)

View File

@@ -0,0 +1,79 @@
import torch
import numpy as np
from torch import nn
import torch.nn.functional as F
from copy import deepcopy
import sys
from tianshou.data import to_torch
class PPO_Extractor(nn.Module):
def __init__(self, device="cpu", **kargs):
super().__init__()
self.device = device
hidden_size = kargs["hidden_size"]
fc_size = kargs["fc_size"]
self.cnn_shape = kargs["cnn_shape"]
self.rnn = nn.GRU(64, hidden_size, batch_first=True)
self.rnn2 = nn.GRU(64, hidden_size, batch_first=True)
self.dnn = nn.Sequential(nn.Linear(2, 64), nn.ReLU(),)
self.cnn = nn.Sequential(nn.Conv1d(self.cnn_shape[1], 3, 3), nn.ReLU(),)
self.raw_fc = nn.Sequential(nn.Linear((self.cnn_shape[0] - 2) * 3, 64), nn.ReLU(),)
self.fc = nn.Sequential(
nn.Linear(hidden_size * 2, hidden_size), nn.ReLU(), nn.Linear(hidden_size, 32), nn.ReLU(),
)
def forward(self, inp):
inp = to_torch(inp, dtype=torch.float32, device=self.device)
# inp = torch.from_numpy(inp).to(torch.device('cpu'))
seq_len = inp[:, -1].to(torch.long)
batch_size = inp.shape[0]
raw_in = inp[:, : 6 * 240]
raw_in = torch.cat((torch.zeros_like(inp[:, : 6 * 30]), raw_in), dim=-1)
raw_in = raw_in.reshape(-1, 30, 6).transpose(1, 2)
dnn_in = inp[:, -19:-1].reshape(batch_size, -1, 2)
cnn_out = self.cnn(raw_in).view(batch_size, 9, -1)
assert not torch.isnan(cnn_out).any()
rnn_in = self.raw_fc(cnn_out)
assert not torch.isnan(rnn_in).any()
rnn2_in = self.dnn(dnn_in)
assert not torch.isnan(rnn2_in).any()
rnn2_out = self.rnn2(rnn2_in)[0]
assert not torch.isnan(rnn2_out).any()
rnn_out = self.rnn(rnn_in)[0]
assert not torch.isnan(rnn_out).any()
rnn_out = rnn_out[torch.arange(rnn_out.size(0)), seq_len]
rnn2_out = rnn2_out[torch.arange(rnn2_out.size(0)), seq_len]
# dnn_out = self.dnn(dnn_in)
fc_in = torch.cat((rnn_out, rnn2_out), dim=-1)
self.feature = self.fc(fc_in)
return self.feature
class PPO_Actor(nn.Module):
def __init__(self, extractor, out_shape, device=torch.device("cpu"), **kargs):
super().__init__()
self.extractor = extractor
self.layer_out = nn.Sequential(nn.Linear(32, out_shape), nn.Softmax(dim=-1))
self.device = device
def forward(self, obs, state=None, info={}):
self.feature = self.extractor(obs)
assert not (torch.isnan(self.feature).any() | torch.isinf(self.feature).any()), f"{self.feature}"
out = self.layer_out(self.feature)
return out, state
class PPO_Critic(nn.Module):
def __init__(self, extractor, out_shape, device=torch.device("cpu"), **kargs):
super().__init__()
self.extractor = extractor
self.value_out = nn.Linear(32, 1)
self.device = device
def forward(self, obs, state=None, info={}):
self.feature = self.extractor(obs)
return self.value_out(self.feature).squeeze(dim=-1)

View File

@@ -0,0 +1,52 @@
import torch
import numpy as np
from torch import nn
import torch.nn.functional as F
from copy import deepcopy
import sys
from tianshou.data import to_torch
class RNNQModel(nn.Module):
def __init__(self, device="cpu", out_shape=10, **kargs):
super().__init__()
self.device = device
hidden_size = kargs["hidden_size"]
fc_size = kargs["fc_size"]
self.cnn_shape = kargs["cnn_shape"]
self.rnn = nn.GRU(64, hidden_size, batch_first=True)
self.rnn2 = nn.GRU(64, hidden_size, batch_first=True)
self.dnn = nn.Sequential(nn.Linear(2, 64), nn.ReLU(),)
self.cnn = nn.Sequential(nn.Conv1d(self.cnn_shape[1], 3, 3), nn.ReLU(),)
self.raw_fc = nn.Sequential(nn.Linear((self.cnn_shape[0] - 2) * 3, 64), nn.ReLU(),)
self.fc = nn.Sequential(
nn.Linear(hidden_size * 2, hidden_size),
nn.ReLU(),
nn.Linear(hidden_size, 32),
nn.ReLU(),
nn.Linear(32, out_shape),
)
def forward(self, obs, state=None, info={}):
inp = to_torch(obs, dtype=torch.float32, device=self.device)
inp = inp[:, 182:]
seq_len = inp[:, -1].to(torch.long)
batch_size = inp.shape[0]
raw_in = inp[:, : 6 * 240]
raw_in = torch.cat((torch.zeros_like(inp[:, : 6 * 30]), raw_in), dim=-1)
raw_in = raw_in.reshape(-1, 30, 6).transpose(1, 2)
dnn_in = inp[:, 6 * 240 : -1].reshape(batch_size, -1, 2)
cnn_out = self.cnn(raw_in).view(batch_size, 9, -1)
rnn_in = self.raw_fc(cnn_out)
rnn2_in = self.dnn(dnn_in)
rnn2_out = self.rnn2(rnn2_in)[0]
rnn_out = self.rnn(rnn_in)[0]
rnn_out = rnn_out[torch.arange(rnn_out.size(0)), seq_len]
rnn2_out = rnn2_out[torch.arange(rnn2_out.size(0)), seq_len]
# dnn_out = self.dnn(dnn_in)
fc_in = torch.cat((rnn_out, rnn2_out), dim=-1)
out = self.fc(fc_in)
return out, state

View File

@@ -0,0 +1,70 @@
import torch
import numpy as np
from torch import nn
import torch.nn.functional as F
from copy import deepcopy
import sys
from tianshou.data import to_torch
class Teacher_Extractor(nn.Module):
def __init__(self, device="cpu", feature_size=180, **kargs):
super().__init__()
self.device = device
hidden_size = kargs["hidden_size"]
fc_size = kargs["fc_size"]
self.cnn_shape = kargs["cnn_shape"]
self.rnn = nn.GRU(64, hidden_size, batch_first=True)
self.rnn2 = nn.GRU(64, hidden_size, batch_first=True)
self.dnn = nn.Sequential(nn.Linear(2, 64), nn.ReLU(),)
self.cnn = nn.Sequential(nn.Conv1d(self.cnn_shape[1], 3, 3), nn.ReLU(),)
self.raw_fc = nn.Sequential(nn.Linear((self.cnn_shape[0] - 2) * 3, 64), nn.ReLU(),)
self.fc = nn.Sequential(
nn.Linear(hidden_size * 2, hidden_size), nn.ReLU(), nn.Linear(hidden_size, 32), nn.ReLU(),
)
def forward(self, inp):
inp = to_torch(inp, dtype=torch.float32, device=self.device)
inp = inp[:, 182:]
seq_len = inp[:, -1].to(torch.long)
batch_size = inp.shape[0]
raw_in = inp[:, : 6 * 240].reshape(-1, 30, 6).transpose(1, 2)
dnn_in = inp[:, 6 * 240 : -1].reshape(batch_size, -1, 2)
cnn_out = self.cnn(raw_in).view(batch_size, 8, -1)
rnn_in = self.raw_fc(cnn_out)
rnn2_in = self.dnn(dnn_in)
rnn2_out = self.rnn2(rnn2_in)[0]
rnn_out = self.rnn(rnn_in)[0][:, -1, :]
rnn2_out = rnn2_out[torch.arange(rnn2_out.size(0)), seq_len]
# dnn_out = self.dnn(dnn_in)
fc_in = torch.cat((rnn_out, rnn2_out), dim=-1)
self.feature = self.fc(fc_in)
return self.feature
class Teacher_Actor(nn.Module):
def __init__(self, extractor, out_shape, device=torch.device("cpu"), **kargs):
super().__init__()
self.extractor = extractor
self.layer_out = nn.Sequential(nn.Linear(32, out_shape), nn.Softmax(dim=-1))
self.device = device
def forward(self, obs, state=None, info={}):
self.feature = self.extractor(obs)
out = self.layer_out(self.feature)
return out, state
class Teacher_Critic(nn.Module):
def __init__(self, extractor, out_shape, device=torch.device("cpu"), **kargs):
super().__init__()
self.extractor = extractor
self.value_out = nn.Linear(32, 1)
self.device = device
def forward(self, obs, state=None, info={}):
self.feature = self.extractor(obs)
return self.value_out(self.feature).squeeze(-1)

View File

@@ -0,0 +1,191 @@
import torch
import numpy as np
from torch import nn
import torch.nn.functional as F
from copy import deepcopy
import sys
from tianshou.data import to_torch
class Attention(nn.Module):
def __init__(self, in_dim, out_dim):
super().__init__()
self.get_w = nn.Sequential(nn.Linear(in_dim * 2, in_dim), nn.ReLU(), nn.Linear(in_dim, 1))
self.fc = nn.Sequential(nn.Linear(in_dim, out_dim), nn.ReLU(),)
def forward(self, value, key):
key = key.unsqueeze(dim=1)
length = value.shape[1]
key = key.repeat([1, length, 1])
weight = self.get_w(torch.cat((key, value), dim=-1)).squeeze() # B * l
weight = weight.softmax(dim=-1).unsqueeze(dim=-1) # B * l * 1
out = (value * weight).sum(dim=1)
out = self.fc(out)
return out
class MaskAttention(nn.Module):
def __init__(self, in_dim, out_dim):
super().__init__()
self.get_w = nn.Sequential(nn.Linear(in_dim * 2, in_dim), nn.ReLU(), nn.Linear(in_dim, 1))
self.fc = nn.Sequential(nn.Linear(in_dim, out_dim), nn.ReLU(),)
def forward(self, value, key, seq_len, maxlen=9):
# seq_len: (batch,)
device = value.device
key = key.unsqueeze(dim=1)
length = value.shape[1]
key = key.repeat([1, length, 1]) # (batch, 9, 64)
weight = self.get_w(torch.cat((key, value), dim=-1)).squeeze(-1) # (batch, 9)
mask = sequence_mask(seq_len + 1, maxlen=maxlen, device=device)
weight[~mask] = float("-inf")
weight = weight.softmax(dim=-1).unsqueeze(dim=-1)
out = (value * weight).sum(dim=1)
out = self.fc(out)
return out
class TFMaskAttention(nn.Module):
def __init__(self, in_dim, out_dim):
super().__init__()
self.get_w = nn.Sequential(nn.Linear(in_dim * 2, in_dim), nn.ReLU(), nn.Linear(in_dim, 1))
self.fc = nn.Sequential(nn.Linear(in_dim, out_dim), nn.ReLU(),)
def forward(self, value, key, seq_len, maxlen=9):
device = value.device
key = key.unsqueeze(dim=1)
length = value.shape[1]
key = key.repeat([1, length, 1])
weight = self.get_w(torch.cat((key, value), dim=-1)).squeeze(-1)
mask = sequence_mask(seq_len + 1, maxlen=maxlen, device=device)
mask = mask.repeat(1, 3) # (batch, 9*3)
weight[~mask] = float("-inf")
weight = weight.softmax(dim=-1).unsqueeze(dim=-1)
out = (value * weight).sum(dim=1)
out = self.fc(out)
return out
class NNAttention(nn.Module):
def __init__(self, in_dim, out_dim):
super().__init__()
self.q_net = nn.Linear(in_dim, out_dim)
self.k_net = nn.Linear(in_dim, out_dim)
self.v_net = nn.Linear(in_dim, out_dim)
def forward(self, Q, K, V):
q = self.q_net(Q)
k = self.k_net(K)
v = self.v_net(V)
attn = torch.einsum("ijk,ilk->ijl", q, k)
attn = attn.to(Q.device)
attn_prob = torch.softmax(attn, dim=-1)
attn_vec = torch.einsum("ijk,ikl->ijl", attn_prob, v)
return attn_vec
class Reshape(nn.Module):
def __init__(self, *args):
super(Reshape, self).__init__()
self.shape = args
def forward(self, x):
return x.view(self.shape)
class DARNN(nn.Module):
def __init__(self, device="cpu", **kargs):
super().__init__()
self.emb_dim = kargs["emb_dim"]
self.hidden_size = kargs["hidden_size"]
self.num_layers = kargs["num_layers"]
self.is_bidir = kargs["is_bidir"]
self.dropout = kargs["dropout"]
self.seq_len = kargs["seq_len"]
self.interval = kargs["interval"]
self.today_length = 238
self.prev_length = 240
self.input_length = 480
self.input_size = 6
self.rnn = nn.LSTM(
input_size=self.input_size + self.emb_dim,
hidden_size=self.hidden_size,
num_layers=self.num_layers,
batch_first=True,
bidirectional=self.is_bidir,
dropout=self.dropout,
)
self.prev_rnn = nn.LSTM(
input_size=self.input_size,
hidden_size=self.hidden_size,
num_layers=self.num_layers,
batch_first=True,
bidirectional=self.is_bidir,
dropout=self.dropout,
)
self.fc_out = nn.Linear(in_features=self.hidden_size * 2, out_features=1)
self.attention = NNAttention(self.hidden_size, self.hidden_size)
self.act_out = nn.Sigmoid()
if self.emb_dim != 0:
self.pos_emb = nn.Embedding(self.input_length, self.emb_dim)
def forward(self, inputs):
inputs = inputs.view(-1, self.input_length, self.input_size) # [B, T, F]
today_input = inputs[:, : self.today_length, :]
today_input = torch.cat((torch.zeros_like(today_input[:, :1, :]), today_input), dim=1)
prev_input = inputs[:, 240 : 240 + self.prev_length, :]
if self.emb_dim != 0:
embedding = self.pos_emb(torch.arange(end=self.today_length + 1, device=inputs.device))
embedding = embedding.repeat([today_input.size()[0], 1, 1])
today_input = torch.cat((today_input, embedding), dim=-1)
prev_outs, _ = self.prev_rnn(prev_input)
today_outs, _ = self.rnn(today_input)
outs = self.attention(today_outs, prev_outs, prev_outs)
outs = torch.cat((today_outs, outs), dim=-1)
outs = outs[:, range(0, self.seq_len * self.interval, self.interval), :]
# outs = self.fc_out(outs).squeeze()
return self.act_out(self.fc_out(outs).squeeze(-1)), outs
class Transpose(nn.Module):
def __init__(self, dim1=0, dim2=1):
super().__init__()
self.dim1 = dim1
self.dim2 = dim2
def forward(self, x):
return x.transpose(self.dim1, self.dim2)
class SelfAttention(nn.Module):
def __init__(self, *args, **kargs):
super().__init__()
self.attention = nn.MultiheadAttention(*args, **kargs)
def forward(self, x):
return self.attention(x, x, x)[0]
def onehot_enc(y, len):
y = y.unsqueeze(-1)
y_onehot = torch.zeros(y.shape[0], len)
# y_onehot.zero_()
y_onehot.scatter(1, y, 1)
return y_onehot
def sequence_mask(lengths, maxlen=None, dtype=torch.bool, device=None):
if maxlen is None:
maxlen = lengths.max()
mask = ~(torch.ones((len(lengths), maxlen), device=device).cumsum(dim=1).t() > lengths).t()
mask.type(dtype)
return mask

View File

@@ -0,0 +1,10 @@
gym==0.17.3
torch==1.6.0
numba==0.51.2
numpy==1.19.1
pandas==1.1.3
tqdm==4.50.2
tianshou==0.3.0.post1
env==0.1.0
PyYAML==5.4.1
redis==3.5.3