From c26bee126bc920654b5ab9526a90314bd835c595 Mon Sep 17 00:00:00 2001 From: Yuge Zhang Date: Fri, 28 May 2021 17:31:08 +0800 Subject: [PATCH 01/28] Support loading for backtest --- examples/multi_level_trading/workflow.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/examples/multi_level_trading/workflow.py b/examples/multi_level_trading/workflow.py index 8096fc76f..2b70d4411 100644 --- a/examples/multi_level_trading/workflow.py +++ b/examples/multi_level_trading/workflow.py @@ -1,6 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. +from typing import Optional import qlib import fire @@ -124,11 +125,17 @@ class MultiLevelTradingWorkflow: sr = SignalRecord(model, dataset, recorder) sr.generate() - def backtest(self): + def _load_model(self, load): + return R.get_recorder(load, experiment_name="train").load_object("params.pkl") + + def backtest(self, load_model: Optional[str] = None): self._init_qlib() model = init_instance_by_config(self.task["model"]) dataset = init_instance_by_config(self.task["dataset"]) - self._train_model(model, dataset) + if load_model is None: + self._train_model(model, dataset) + else: + model = self._load_model(load_model) strategy_config = { "class": "TopkDropoutStrategy", "module_path": "qlib.contrib.strategy.model_strategy", From d3dac068df5e21d54bb453bb1b9a3eaacf389a06 Mon Sep 17 00:00:00 2001 From: Yuge Zhang Date: Tue, 1 Jun 2021 11:33:44 +0800 Subject: [PATCH 02/28] Update simple playground --- qlib/strategy/__init__.py | 2 + qlib/strategy/base.py | 2 + rl_playground.py | 137 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 141 insertions(+) create mode 100644 rl_playground.py diff --git a/qlib/strategy/__init__.py b/qlib/strategy/__init__.py index 59e481eb9..e3fcd8e26 100644 --- a/qlib/strategy/__init__.py +++ b/qlib/strategy/__init__.py @@ -1,2 +1,4 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. + +from .base import * diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py index 7828db609..37897da5a 100644 --- a/qlib/strategy/base.py +++ b/qlib/strategy/base.py @@ -7,6 +7,8 @@ from ..data.dataset.utils import convert_index_format from ..rl.interpreter import ActionInterpreter, StateInterpreter from ..utils import init_instance_by_config +__all__ = ['BaseStrategy', 'ModelStrategy', 'RLStrategy', 'RLIntStrategy'] + class BaseStrategy: """Base strategy for trading""" diff --git a/rl_playground.py b/rl_playground.py new file mode 100644 index 000000000..3a4291495 --- /dev/null +++ b/rl_playground.py @@ -0,0 +1,137 @@ +import logging +import pickle +from enum import Enum +from typing import Iterable, Optional, Any + +import gym +import numpy as np + +import torch +from torch.utils.data import Dataset + +from qlib.backtest import get_exchange, Account, BaseExecutor +from qlib.rl.interpreter import StateInterpreter, ActionInterpreter +from qlib.utils import init_instance_by_config + + +def get_executor(start_time, end_time, executor, benchmark="SH000300", account=1e9, exchange_kwargs={}): + trade_account = Account( + init_cash=account, + benchmark_config={ + "benchmark": benchmark, + "start_time": start_time, + "end_time": end_time, + }, + ) + trade_exchange = get_exchange(**exchange_kwargs) + + common_infra = { + "trade_account": trade_account, + "trade_exchange": trade_exchange, + } + + trade_executor = init_instance_by_config(executor, accept_types=BaseExecutor, common_infra=common_infra) + + return common_infra, trade_executor + + +class QlibOrderDataset(Dataset): + def __init__(self, order_file): + with open(order_file, 'rb') as f: + self.orders = pickle.load(f) + + def __len__(self): + return len(self.orders) + + def __getitem__(self, index): + return self.orders[index] + + +class OrderEnv(gym.Env): + def __init__(self, + state_interpreter: StateInterpreter, + action_interpreter: ActionInterpreter, + reward: Any, + dataloader: Iterable, + executor: BaseExecutor): + self.action_interpreter = action_interpreter + self.state_interpreter = state_interpreter + self.reward = reward + self.dataloader = dataloader + self.executor = executor + + @property + def action_space(self): + return self.action.action_space + + @property + def observation_space(self): + return self.observation.observation_space + + def reset(self): + try: + self.cur_order = next(self.dataloader) + except StopIteration: + self.dataloader = None + return None + + self.executor.reset(start_time=self.cur_order.start_time, end_time=self.cur_order.end_time) + self.level_infra = self.executor.get_level_infra() + self.execute_result = [] + + # TODO: how to fetch data after feature engineering? + + # TODO: can be rewritten as dataclasses.asdict(self.cur_order) is Order is written to be a dataclass + return self.state_interpreter(self.cur_order, self.level_infra) + + def step(self, action): + assert self.dataloader is not None + + assert not self.executor.finished() + + trade_decision = self.action_interpreter(action) + self.execute_result.extend(self.executor.execute(trade_decision)) + reward, rew_info = self.reward() + + done = self.executor.finished() + info = { + 'action_history': self.action_history, + 'category': self.ep_state.flow_dir.value, + 'reward': rew_info + } + if self.ep_state.done: + info['logs'] = self.ep_state.logs() + info['index'] = { + 'ins': self._sample.ins, + 'date': self._sample.date + } + + # TODO: how to collect metrics + return self.state_interpreter(self.cur_order, self.level_infra), reward, done, info + + +def _main(): + executor_config = { + "class": "SimulatorExecutor", + "module_path": "qlib.backtest.executor", + "kwargs": { + "time_per_step": "day", + "verbose": True, + "generate_report": True, + } + } + # TODO: why is there a benchmark? + trade_start_time = "2017-01-01" + trade_end_time = "2020-08-01" + benchmark = "SH000300" + executor = get_executor( + trade_start_time, trade_end_time, executor_config, + benchmark, 1000000000, exchange_kwargs={ + "freq": "day", + "limit_threshold": 0.095, + "deal_price": "close", + "open_cost": 0.0005, + "close_cost": 0.0015, + "min_cost": 5, + } + ) From 449e3f40c88ea6acb9ad8884c2a52515bf54b5af Mon Sep 17 00:00:00 2001 From: Yuge Zhang Date: Tue, 1 Jun 2021 17:51:29 +0800 Subject: [PATCH 03/28] Update init in backtest --- qlib/backtest/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/qlib/backtest/__init__.py b/qlib/backtest/__init__.py index 33c2cb2d8..1d9e91bb3 100644 --- a/qlib/backtest/__init__.py +++ b/qlib/backtest/__init__.py @@ -6,6 +6,7 @@ from .exchange import Exchange from .executor import BaseExecutor from .backtest import backtest as backtest_func from .backtest import collect_data as data_generator +from .order import Order from .utils import CommonInfrastructure from ..strategy.base import BaseStrategy From 83535bff6af1e6b288f9d00110424b547afd55a5 Mon Sep 17 00:00:00 2001 From: Yuge Zhang Date: Tue, 1 Jun 2021 18:08:11 +0800 Subject: [PATCH 04/28] Playground checkpoint --- rl_playground.py | 307 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 263 insertions(+), 44 deletions(-) diff --git a/rl_playground.py b/rl_playground.py index 3a4291495..de1fb15dd 100644 --- a/rl_playground.py +++ b/rl_playground.py @@ -1,17 +1,20 @@ -import logging import pickle -from enum import Enum -from typing import Iterable, Optional, Any +from dataclasses import dataclass +from typing import Iterable, Any -import gym import numpy as np - -import torch -from torch.utils.data import Dataset - -from qlib.backtest import get_exchange, Account, BaseExecutor +import gym +import qlib +from qlib.backtest import get_exchange, Account, BaseExecutor, CommonInfrastructure, Order +from qlib.config import REG_CN +from qlib.data import D from qlib.rl.interpreter import StateInterpreter, ActionInterpreter -from qlib.utils import init_instance_by_config +from qlib.tests.data import GetData +from qlib.utils import init_instance_by_config, exists_qlib_data +from torch.utils.data import Dataset, DataLoader +from tianshou.data import Batch, Collector +from tianshou.env import DummyVectorEnv +from tianshou.policy import BasePolicy def get_executor(start_time, end_time, executor, benchmark="SH000300", account=1e9, exchange_kwargs={}): @@ -25,14 +28,10 @@ def get_executor(start_time, end_time, executor, benchmark="SH000300", account=1 ) trade_exchange = get_exchange(**exchange_kwargs) - common_infra = { - "trade_account": trade_account, - "trade_exchange": trade_exchange, - } - + common_infra = CommonInfrastructure(trade_account=trade_account, trade_exchange=trade_exchange) trade_executor = init_instance_by_config(executor, accept_types=BaseExecutor, common_infra=common_infra) - return common_infra, trade_executor + return trade_executor class QlibOrderDataset(Dataset): @@ -47,19 +46,180 @@ class QlibOrderDataset(Dataset): return self.orders[index] -class OrderEnv(gym.Env): +class DummyCallable: + def __call__(self, *args, **kwargs): + if args: + return args[0] + if kwargs: + for v in kwargs.values(): + return v + + +class DummyPolicy(BasePolicy): + def forward(self, batch, state=None, **kwargs): + return Batch(act=0) + + def learn(self, *args, **kwargs): + pass + + +@dataclass +class EpisodicState: + """ + A simplified data structure for RL-related components to process observations and rewards + """ + # requirements + start_time: int + end_time: int + num_step: int + time_per_step: int + target: float + target_limit: float + vol_limit: Optional[float] + flow_dir: int + market_price: np.ndarray + market_vol: np.ndarray + + # agent state + cur_time: int = -1 + cur_step: int = 0 + done: bool = False + position: Optional[float] = None + exec_vol: Optional[np.ndarray] = None + last_step_duration: Optional[int] = None + position_history: Optional[np.ndarray] = None + + # calculated statistics + turnover: Optional[float] = None + baseline_twap: Optional[float] = None + baseline_vwap: Optional[float] = None + exec_avg_price: Optional[float] = None + pa_twap: Optional[float] = None + pa_vwap: Optional[float] = None + fulfill_rate: Optional[float] = None + + def __post_init__(self): + assert self.target >= 0 + self.cur_time = self.start_time + self.position = self.target + self.position_history = np.full((self.num_step + 1), np.nan) + self.position_history[0] = self.position + self.baseline_twap = np.mean(self.market_price) + if self.market_vol.sum() == 0: + self.baseline_vwap = np.mean(self.market_price) + else: + self.baseline_vwap = np.average(self.market_price, weights=self.market_vol) + + def update_stats(self): + market_price = self.market_price[:len(self.exec_vol)] + self.turnover = (self.exec_vol * market_price).sum() + # exec_vol can be zero + if np.isclose(self.exec_vol.sum(), 0): + self.exec_avg_price = market_price[0] + else: + self.exec_avg_price = np.average(market_price, weights=self.exec_vol) + self.pa_twap = price_advantage(self.exec_avg_price, self.baseline_twap, self.flow_dir) + self.pa_vwap = price_advantage(self.exec_avg_price, self.baseline_vwap, self.flow_dir) + self.fulfill_rate = (self.target - self.position) / self.target_limit + if abs(self.fulfill_rate - 1.0) < EPSILON: + self.fulfill_rate = 1.0 + self.fulfill_rate *= 100 + + def logs(self): + logs = { + 'stop_time': self.cur_time - self.start_time, + 'stop_step': self.cur_step, + 'turnover': self.turnover, + 'baseline_twap': self.baseline_twap, + 'baseline_vwap': self.baseline_vwap, + 'exec_avg_price': self.exec_avg_price, + 'pa_twap': self.pa_twap, + 'pa_vwap': self.pa_vwap, + 'ffr': self.fulfill_rate + } + return logs + + def next_duration(self) -> int: + return min(self.time_per_step, self.end_time - self.cur_time) + + def step(self, exec_vol): + self.last_step_duration = len(exec_vol) + self.position -= exec_vol.sum() + assert self.position > -EPSILON and (exec_vol > -EPSILON).all(), \ + f'Execution volume is invalid: {exec_vol} (position = {self.position})' + self.position_history[self.cur_step + 1] = self.position + self.cur_time += self.last_step_duration + self.cur_step += 1 + if self.cur_step == self.num_step: + assert self.cur_time == self.end_time + if self.exec_vol is None: + self.exec_vol = exec_vol + else: + self.exec_vol = np.concatenate((self.exec_vol, exec_vol)) + + self.done = self.position < EPSILON or self.cur_step == self.num_step + if self.done: + self.update_stats() + + l, r = self.cur_time - self.last_step_duration - self.start_time, self.cur_time - self.start_time + assert 0 <= l < r + return StepState(self.exec_vol[l:r], self.market_vol[l:r], self.market_price[l:r], self) + + +@dataclass +class StepState: + exec_vol: np.ndarray + market_vol: np.ndarray + market_price: np.ndarray + + # episode info + episode_state: EpisodicState + + # calculated statistics + turnover: Optional[float] = None + exec_avg_price: Optional[float] = None + pa_twap: Optional[float] = None + pa_vwap: Optional[float] = None + + def __post_init__(self): + assert len(self.exec_vol) == len(self.market_price) == len(self.market_vol) + self.turnover = (self.exec_vol * self.market_price).sum() + if np.isclose(self.market_vol.sum(), 0): + self.exec_avg_price = self.market_price[0] + else: + self.exec_avg_price = np.average(self.market_price, weights=self.market_vol) + self.pa_twap = price_advantage(self.exec_avg_price, self.episode_state.baseline_twap, + self.episode_state.flow_dir) + self.pa_vwap = price_advantage(self.exec_avg_price, self.episode_state.baseline_vwap, + self.episode_state.flow_dir) + + +def price_advantage(exec_price: float, baseline_price: float, flow: FlowDirection) -> float: + if baseline_price == 0: + return 0. + if flow == FlowDirection.ACQUIRE: + return (1 - exec_price / baseline_price) * 10000 + else: + return (exec_price / baseline_price - 1) * 10000 + + + +class SingleOrderEnv(gym.Env): + MAX_STEPS = 10 def __init__(self, - state_interpreter: StateInterpreter, - action_interpreter: ActionInterpreter, + observation: StateInterpreter, + action: ActionInterpreter, reward: Any, dataloader: Iterable, executor: BaseExecutor): - self.action_interpreter = action_interpreter - self.state_interpreter = state_interpreter + self.action = action + self.observation = observation self.reward = reward self.dataloader = dataloader self.executor = executor + self.inner_frequency = self.executor.get_all_executor()[-1].time_per_step + @property def action_space(self): return self.action.action_space @@ -68,32 +228,53 @@ class OrderEnv(gym.Env): def observation_space(self): return self.observation.observation_space + def retrieve_data(self, cur_order: Order): + return D.features( + [cur_order.stock_id], + ['$open', '$close', '$high', '$low', '$volume'], + start_time=cur_order.start_time.date(), + end_time=cur_order.end_time.date(), + freq=self.inner_frequency + ) + + def initialize_state(self): + self.executor.reset(start_time=self.cur_order.start_time, end_time=self.cur_order.end_time) + return EpisodicState() + + def update_state(self, action): + trade_decision = action + execute_result = self.executor.execute(trade_decision) + def reset(self): try: - self.cur_order = next(self.dataloader) + cur_order = next(self.dataloader) except StopIteration: self.dataloader = None return None - self.executor.reset(start_time=self.cur_order.start_time, end_time=self.cur_order.end_time) - self.level_infra = self.executor.get_level_infra() + self.cur_sample = self._retrieve_data(cur_order) self.execute_result = [] + self.ep_state = self.initialize_state() + + self.action_history = np.full(self.MAX_STEPS, np.nan) + return self.observation(self.cur_sample, self.ep_state) + # TODO: how to fetch data after feature engineering? # TODO: can be rewritten as dataclasses.asdict(self.cur_order) is Order is written to be a dataclass - return self.state_interpreter(self.cur_order, self.level_infra) + return self.observation def step(self, action): assert self.dataloader is not None assert not self.executor.finished() - trade_decision = self.action_interpreter(action) - self.execute_result.extend(self.executor.execute(trade_decision)) - reward, rew_info = self.reward() + exec_vol = self.action(action, self.ep_state) + step_state = self.ep_state.step(exec_vol) + + reward, rew_info = self.reward(self.ep_state, step_state) - done = self.executor.finished() info = { 'action_history': self.action_history, 'category': self.ep_state.flow_dir.value, @@ -102,31 +283,45 @@ class OrderEnv(gym.Env): if self.ep_state.done: info['logs'] = self.ep_state.logs() info['index'] = { - 'ins': self._sample.ins, - 'date': self._sample.date + 'ins': self.cur_sample.ins, + 'date': self.cur_sample.date } - # TODO: how to collect metrics - return self.state_interpreter(self.cur_order, self.level_infra), reward, done, info + return self.observation(self.cur_sample, self.ep_state), reward, self.ep_state.done, info + + +def _init_qlib(): + provider_uri = "~/.qlib/qlib_data/cn_data" # target_dir + if not exists_qlib_data(provider_uri): + print(f"Qlib data is not found in {provider_uri}") + GetData().qlib_data(target_dir=provider_uri, region=REG_CN) + qlib.init(provider_uri=provider_uri, region=REG_CN) def _main(): - executor_config = { - "class": "SimulatorExecutor", - "module_path": "qlib.backtest.executor", - "kwargs": { - "time_per_step": "day", - "verbose": True, - "generate_report": True, - } - } + _init_qlib() + # TODO: why is there a benchmark? trade_start_time = "2017-01-01" trade_end_time = "2020-08-01" benchmark = "SH000300" + time_per_step = "day" + executor_config = { + "class": "SimulatorExecutor", + "module_path": "qlib.backtest.executor", + "kwargs": { + "time_per_step": time_per_step, + "verbose": True, + "generate_report": False, + } + } executor = get_executor( - trade_start_time, trade_end_time, executor_config, - benchmark, 1000000000, exchange_kwargs={ + trade_start_time, + trade_end_time, + executor_config, + benchmark, + 1000000000, + exchange_kwargs={ "freq": "day", "limit_threshold": 0.095, "deal_price": "close", @@ -135,3 +330,27 @@ def _main(): "min_cost": 5, } ) + + import pdb; pdb.set_trace() + + observation = DummyCallable() + action = DummyCallable() + reward_fn = DummyCallable() + # TODO: this probably won't work with multiprocess + dataloader = iter(DataLoader(QlibOrderDataset('rl.pkl'), batch_size=None, shuffle=True)) + + def dummy_env(): return OrderEnv(observation, action, reward_fn, dataloader, executor) + policy = DummyPolicy() + + # env = dummy_env() + # obs = env.reset() + # print(obs.__dict__) + + envs = DummyVectorEnv([dummy_env for _ in range(4)]) + test_collector = Collector(policy, envs) + policy.eval() + test_collector.collect(n_episode=10) + + +if __name__ == '__main__': + _main() From 3200bb88c85a754b0282832741e3e0a2258e88b1 Mon Sep 17 00:00:00 2001 From: Yuge Zhang Date: Wed, 2 Jun 2021 15:11:38 +0800 Subject: [PATCH 05/28] Update an initial version of RL --- rl_playground.py | 293 +++++++++++++++++++++++++++++++---------------- 1 file changed, 194 insertions(+), 99 deletions(-) diff --git a/rl_playground.py b/rl_playground.py index de1fb15dd..cac9134c6 100644 --- a/rl_playground.py +++ b/rl_playground.py @@ -1,10 +1,12 @@ import pickle -from dataclasses import dataclass -from typing import Iterable, Any +from dataclasses import dataclass, asdict +from typing import Iterable, Any, Optional, Tuple, Dict -import numpy as np import gym +import numpy as np +import pandas as pd import qlib +from gym import spaces from qlib.backtest import get_exchange, Account, BaseExecutor, CommonInfrastructure, Order from qlib.config import REG_CN from qlib.data import D @@ -17,7 +19,10 @@ from tianshou.env import DummyVectorEnv from tianshou.policy import BasePolicy -def get_executor(start_time, end_time, executor, benchmark="SH000300", account=1e9, exchange_kwargs={}): +MAX_STEPS = 10 + + +def get_executor(start_time, end_time, executor, benchmark="SH000300", account=1e9, exchange_kwargs={}) -> BaseExecutor: trade_account = Account( init_cash=account, benchmark_config={ @@ -34,6 +39,19 @@ def get_executor(start_time, end_time, executor, benchmark="SH000300", account=1 return trade_executor +def price_advantage(exec_price: float, baseline_price: float, direction: int) -> float: + if baseline_price == 0: + return 0. + if direction == 1: + return (1 - exec_price / baseline_price) * 10000 + else: + return (exec_price / baseline_price - 1) * 10000 + + +def _to_int32(val): return np.array(int(val), dtype=np.int32) +def _to_float32(val): return np.array(val, dtype=np.float32) + + class QlibOrderDataset(Dataset): def __init__(self, order_file): with open(order_file, 'rb') as f: @@ -46,18 +64,10 @@ class QlibOrderDataset(Dataset): return self.orders[index] -class DummyCallable: - def __call__(self, *args, **kwargs): - if args: - return args[0] - if kwargs: - for v in kwargs.values(): - return v - - class DummyPolicy(BasePolicy): def forward(self, batch, state=None, **kwargs): - return Batch(act=0) + print(batch) + return Batch(act=np.random.randint(5)) def learn(self, *args, **kwargs): pass @@ -69,20 +79,22 @@ class EpisodicState: A simplified data structure for RL-related components to process observations and rewards """ # requirements - start_time: int - end_time: int - num_step: int - time_per_step: int + stock_id: int + start_time: pd.Timestamp + end_time: pd.Timestamp + direction: int target: float - target_limit: float - vol_limit: Optional[float] - flow_dir: int + num_step: int + + # simplified market data used to calculate backtest metrics + # this may contains information from future so be careful market_price: np.ndarray market_vol: np.ndarray # agent state - cur_time: int = -1 + cur_time: Optional[pd.Timestamp] = None cur_step: int = 0 + cur_tick: int = 0 # tick is the most fine-grained time unit (typically minute) done: bool = False position: Optional[float] = None exec_vol: Optional[np.ndarray] = None @@ -100,6 +112,7 @@ class EpisodicState: def __post_init__(self): assert self.target >= 0 + assert len(self.market_price) == len(self.market_vol) self.cur_time = self.start_time self.position = self.target self.position_history = np.full((self.num_step + 1), np.nan) @@ -118,10 +131,10 @@ class EpisodicState: self.exec_avg_price = market_price[0] else: self.exec_avg_price = np.average(market_price, weights=self.exec_vol) - self.pa_twap = price_advantage(self.exec_avg_price, self.baseline_twap, self.flow_dir) - self.pa_vwap = price_advantage(self.exec_avg_price, self.baseline_vwap, self.flow_dir) - self.fulfill_rate = (self.target - self.position) / self.target_limit - if abs(self.fulfill_rate - 1.0) < EPSILON: + self.pa_twap = price_advantage(self.exec_avg_price, self.baseline_twap, self.direction) + self.pa_vwap = price_advantage(self.exec_avg_price, self.baseline_vwap, self.direction) + self.fulfill_rate = (self.target - self.position) / self.target + if abs(self.fulfill_rate - 1.0) < 1e-5: self.fulfill_rate = 1.0 self.fulfill_rate *= 100 @@ -139,35 +152,10 @@ class EpisodicState: } return logs - def next_duration(self) -> int: - return min(self.time_per_step, self.end_time - self.cur_time) - - def step(self, exec_vol): - self.last_step_duration = len(exec_vol) - self.position -= exec_vol.sum() - assert self.position > -EPSILON and (exec_vol > -EPSILON).all(), \ - f'Execution volume is invalid: {exec_vol} (position = {self.position})' - self.position_history[self.cur_step + 1] = self.position - self.cur_time += self.last_step_duration - self.cur_step += 1 - if self.cur_step == self.num_step: - assert self.cur_time == self.end_time - if self.exec_vol is None: - self.exec_vol = exec_vol - else: - self.exec_vol = np.concatenate((self.exec_vol, exec_vol)) - - self.done = self.position < EPSILON or self.cur_step == self.num_step - if self.done: - self.update_stats() - - l, r = self.cur_time - self.last_step_duration - self.start_time, self.cur_time - self.start_time - assert 0 <= l < r - return StepState(self.exec_vol[l:r], self.market_vol[l:r], self.market_price[l:r], self) - @dataclass class StepState: + # market info and execution volume for current step exec_vol: np.ndarray market_vol: np.ndarray market_price: np.ndarray @@ -189,23 +177,109 @@ class StepState: else: self.exec_avg_price = np.average(self.market_price, weights=self.market_vol) self.pa_twap = price_advantage(self.exec_avg_price, self.episode_state.baseline_twap, - self.episode_state.flow_dir) + self.episode_state.direction) self.pa_vwap = price_advantage(self.exec_avg_price, self.episode_state.baseline_vwap, - self.episode_state.flow_dir) + self.episode_state.direction) -def price_advantage(exec_price: float, baseline_price: float, flow: FlowDirection) -> float: - if baseline_price == 0: +class Observation: + def __init__(self, time_per_step): + self.time_per_step = time_per_step + + def __call__(self, ep_state: EpisodicState) -> Any: + obs = self.observe(ep_state) + if not self.validate(obs): + raise ValueError(f'Observation space does not contain obs. Space: {self.observation_space} Sample: {obs}') + return obs + + def validate(self, obs: Any) -> bool: + return self.observation_space.contains(obs) + + @property + def observation_space(self): + space = { + 'direction': spaces.Discrete(2), + 'cur_step': spaces.Box(0, MAX_STEPS - 1, shape=(), dtype=np.int32), + 'num_step': spaces.Box(MAX_STEPS, MAX_STEPS, shape=(), dtype=np.int32), + 'target': spaces.Box(-1e-5, np.inf, shape=()), + 'position': spaces.Box(-1e-5, np.inf, shape=()), + 'features': spaces.Box(-np.inf, np.inf, shape=(5, )) + } + return spaces.Dict(space) + + def observe(self, ep_state: EpisodicState) -> Any: + return { + 'acquiring': _to_int32(ep_state.direction), + 'cur_step': _to_int32(min(ep_state.cur_step, ep_state.num_step - 1)), + 'num_step': _to_int32(ep_state.num_step), + 'target': _to_float32(ep_state.target), + 'position': _to_float32(ep_state.position), + 'features': D.features( + [ep_state.stock_id], + ['$open', '$close', '$high', '$low', '$volume'], + start_time=ep_state.start_time, + end_time=ep_state.end_time, + freq=self.time_per_step + ) + } + + +class Action: + @property + def action_space(self): + return spaces.Discrete(5) + + def __call__(self, action: Any, ep_state: EpisodicState) -> Any: + if not self.validate(action): + raise ValueError(f'Action space does not contain action. Space: {self.action_space} Sample: {action}') + act_ = self.to_volume(action, ep_state) + return act_ + + def validate(self, action: Any) -> bool: + return self.action_space.contains(action) + + def to_volume(self, action: Any, ep_state: EpisodicState): + exec_vol = ep_state.position / 5 * action + if ep_state.cur_step + 1 >= ep_state.num_step: + exec_vol = ep_state.position + # TODO: might need to check whether the stock is tradable or whether it satisfies trade unit? + return exec_vol + + +class Reward: + weight = 1.0 + + def __call__(self, ep_state: EpisodicState, st_state: StepState) -> Tuple[float, Dict[str, float]]: + rew, info = 0., {} + if ep_state.done: + ep_rew, ep_info = self._to_tuple(self.episode_end(ep_state)) + rew += ep_rew + info.update({f'ep/{k}': v for k, v in ep_info.items()}) + st_rew, st_info = self._to_tuple(self.step_end(ep_state, st_state)) + rew += st_rew + info.update({f'st/{k}': v for k, v in st_info.items()}) + return rew * self.weight, info + + @staticmethod + def _to_tuple(x): + if isinstance(x, tuple): + return x + return x, {} + + def episode_end(self, ep_state: EpisodicState) -> Tuple[float, Dict[str, float]]: return 0. - if flow == FlowDirection.ACQUIRE: - return (1 - exec_price / baseline_price) * 10000 - else: - return (exec_price / baseline_price - 1) * 10000 + + def step_end(self, ep_state: EpisodicState, st_state: StepState) -> Tuple[float, Dict[str, float]]: + assert ep_state.target > 0 + baseline_price = st_state.pa_twap + pa = baseline_price * st_state.exec_vol.sum() / ep_state.target + penalty = -self.penalty * ((st_state.exec_vol / ep_state.target) ** 2).sum() + reward = pa + penalty + return reward, {'pa': pa, 'penalty': penalty} class SingleOrderEnv(gym.Env): - MAX_STEPS = 10 def __init__(self, observation: StateInterpreter, action: ActionInterpreter, @@ -228,50 +302,73 @@ class SingleOrderEnv(gym.Env): def observation_space(self): return self.observation.observation_space - def retrieve_data(self, cur_order: Order): + def retrieve_backtest_data(self, field: str): return D.features( - [cur_order.stock_id], + [self.cur_order.stock_id], ['$open', '$close', '$high', '$low', '$volume'], - start_time=cur_order.start_time.date(), - end_time=cur_order.end_time.date(), + start_time=self.cur_order.start_time, + end_time=self.cur_order.end_time, freq=self.inner_frequency - ) + )[field].to_numpy() def initialize_state(self): self.executor.reset(start_time=self.cur_order.start_time, end_time=self.cur_order.end_time) - return EpisodicState() + return EpisodicState( + stock_id=self.cur_order.stock_id, + start_time=self.cur_order.start_time, + end_time=self.cur_order.end_time, + direction=self.cur_order.direction, + target=self.cur_order.amount, + num_step=self.executor.trade_calendar.get_trade_len(), + market_price=self.retrieve_backtest_data('$close'), + market_vol=self.retrieve_backtest_data('$volume'), + ) - def update_state(self, action): - trade_decision = action - execute_result = self.executor.execute(trade_decision) + def update_state(self, exec_vol): + trade_step = self.trade_calendar.get_trade_step() + trade_start_time = self.executor.trade_calendar.get_step_time(trade_step) + trade_end_time = self.executor.trade_calendar.get_step_time(trade_step, shift=1) + trade_decision = Order(**asdict(self.cur_order), + start_time=trade_start_time, end_time=trade_end_time, amount=exec_vol) + execute_result = self.executor.execute([trade_decision]) + cur_tick = self.ep_state.cur_tick + + inner_exec_vol = np.array([order.deal_amount for order, _, __, ___ in execute_result]) + ticks_this_step = len(inner_exec_vol) + state = self.ep_state + state.cur_step = trade_step = self.executor.trade_calendar.get_trade_step() + state.cur_time = self.executor.trade_calendar.get_step_time(trade_step) + state.cur_tick += ticks_this_step + state.position -= np.sum(inner_exec_vol) + state.position_history[trade_step] = state.position + state.exec_vol = inner_exec_vol if state.exec_vol is None else np.concatenate((state.exec_vol, inner_exec_vol)) + + state.done = self.executor.finished() + if state.done: + state.update_stats() + + l, r = cur_tick, cur_tick + ticks_this_step + assert 0 <= l < r + return StepState(inner_exec_vol, state.market_vol[l:r], state.market_price[l:r], state) def reset(self): try: - cur_order = next(self.dataloader) + self.cur_order = next(self.dataloader) except StopIteration: self.dataloader = None return None - self.cur_sample = self._retrieve_data(cur_order) self.execute_result = [] self.ep_state = self.initialize_state() - self.action_history = np.full(self.MAX_STEPS, np.nan) + self.action_history = np.full(self.ep_state.num_step, np.nan) return self.observation(self.cur_sample, self.ep_state) - - # TODO: how to fetch data after feature engineering? - - # TODO: can be rewritten as dataclasses.asdict(self.cur_order) is Order is written to be a dataclass - return self.observation - def step(self, action): assert self.dataloader is not None - assert not self.executor.finished() - exec_vol = self.action(action, self.ep_state) - step_state = self.ep_state.step(exec_vol) + step_state = self.update_state(exec_vol) reward, rew_info = self.reward(self.ep_state, step_state) @@ -283,8 +380,8 @@ class SingleOrderEnv(gym.Env): if self.ep_state.done: info['logs'] = self.ep_state.logs() info['index'] = { - 'ins': self.cur_sample.ins, - 'date': self.cur_sample.date + 'ins': self.ep_state.stock_id, + 'date': self.ep_state.start_time, } return self.observation(self.cur_sample, self.ep_state), reward, self.ep_state.done, info @@ -331,25 +428,23 @@ def _main(): } ) - import pdb; pdb.set_trace() + observation = Observation(time_per_step) + action = Action() + reward_fn = Reward() - observation = DummyCallable() - action = DummyCallable() - reward_fn = DummyCallable() - # TODO: this probably won't work with multiprocess - dataloader = iter(DataLoader(QlibOrderDataset('rl.pkl'), batch_size=None, shuffle=True)) - - def dummy_env(): return OrderEnv(observation, action, reward_fn, dataloader, executor) + def dummy_env(): return SingleOrderEnv( + observation, action, reward_fn, + DataLoader(QlibOrderDataset('rl.pkl'), batch_size=None, shuffle=True), executor) policy = DummyPolicy() - # env = dummy_env() - # obs = env.reset() - # print(obs.__dict__) + env = dummy_env() + obs = env.reset() + print(obs) - envs = DummyVectorEnv([dummy_env for _ in range(4)]) - test_collector = Collector(policy, envs) - policy.eval() - test_collector.collect(n_episode=10) + # envs = DummyVectorEnv([dummy_env for _ in range(4)]) + # test_collector = Collector(policy, envs) + # policy.eval() + # test_collector.collect(n_episode=10) if __name__ == '__main__': From d515efb46e069a7334e5ee26cebb6a3adffc7908 Mon Sep 17 00:00:00 2001 From: Yuge Zhang Date: Wed, 2 Jun 2021 16:41:18 +0800 Subject: [PATCH 06/28] Finish RL dummy example --- qlib/backtest/order.py | 4 +- rl_playground.py | 345 +++++++++++++++++++++-------------------- 2 files changed, 183 insertions(+), 166 deletions(-) diff --git a/qlib/backtest/order.py b/qlib/backtest/order.py index e4bf41f1e..47a859aa3 100644 --- a/qlib/backtest/order.py +++ b/qlib/backtest/order.py @@ -2,7 +2,7 @@ # Licensed under the MIT License. import pandas as pd from dataclasses import dataclass, field -from typing import ClassVar +from typing import ClassVar, Optional @dataclass @@ -26,7 +26,7 @@ class Order: end_time: pd.Timestamp direction: int factor: float - deal_amount: float = field(init=False) + deal_amount: Optional[float] = None SELL: ClassVar[int] = 0 BUY: ClassVar[int] = 1 diff --git a/rl_playground.py b/rl_playground.py index cac9134c6..482615215 100644 --- a/rl_playground.py +++ b/rl_playground.py @@ -1,5 +1,6 @@ import pickle from dataclasses import dataclass, asdict +from pprint import pprint from typing import Iterable, Any, Optional, Tuple, Dict import gym @@ -22,7 +23,7 @@ from tianshou.policy import BasePolicy MAX_STEPS = 10 -def get_executor(start_time, end_time, executor, benchmark="SH000300", account=1e9, exchange_kwargs={}) -> BaseExecutor: +def get_executor(start_time, end_time, executor, exchange, benchmark="SH000300", account=1e9) -> BaseExecutor: trade_account = Account( init_cash=account, benchmark_config={ @@ -31,9 +32,8 @@ def get_executor(start_time, end_time, executor, benchmark="SH000300", account=1 "end_time": end_time, }, ) - trade_exchange = get_exchange(**exchange_kwargs) - common_infra = CommonInfrastructure(trade_account=trade_account, trade_exchange=trade_exchange) + common_infra = CommonInfrastructure(trade_account=trade_account, trade_exchange=exchange) trade_executor = init_instance_by_config(executor, accept_types=BaseExecutor, common_infra=common_infra) return trade_executor @@ -48,31 +48,6 @@ def price_advantage(exec_price: float, baseline_price: float, direction: int) -> return (exec_price / baseline_price - 1) * 10000 -def _to_int32(val): return np.array(int(val), dtype=np.int32) -def _to_float32(val): return np.array(val, dtype=np.float32) - - -class QlibOrderDataset(Dataset): - def __init__(self, order_file): - with open(order_file, 'rb') as f: - self.orders = pickle.load(f) - - def __len__(self): - return len(self.orders) - - def __getitem__(self, index): - return self.orders[index] - - -class DummyPolicy(BasePolicy): - def forward(self, batch, state=None, **kwargs): - print(batch) - return Batch(act=np.random.randint(5)) - - def learn(self, *args, **kwargs): - pass - - @dataclass class EpisodicState: """ @@ -182,103 +157,6 @@ class StepState: self.episode_state.direction) -class Observation: - def __init__(self, time_per_step): - self.time_per_step = time_per_step - - def __call__(self, ep_state: EpisodicState) -> Any: - obs = self.observe(ep_state) - if not self.validate(obs): - raise ValueError(f'Observation space does not contain obs. Space: {self.observation_space} Sample: {obs}') - return obs - - def validate(self, obs: Any) -> bool: - return self.observation_space.contains(obs) - - @property - def observation_space(self): - space = { - 'direction': spaces.Discrete(2), - 'cur_step': spaces.Box(0, MAX_STEPS - 1, shape=(), dtype=np.int32), - 'num_step': spaces.Box(MAX_STEPS, MAX_STEPS, shape=(), dtype=np.int32), - 'target': spaces.Box(-1e-5, np.inf, shape=()), - 'position': spaces.Box(-1e-5, np.inf, shape=()), - 'features': spaces.Box(-np.inf, np.inf, shape=(5, )) - } - return spaces.Dict(space) - - def observe(self, ep_state: EpisodicState) -> Any: - return { - 'acquiring': _to_int32(ep_state.direction), - 'cur_step': _to_int32(min(ep_state.cur_step, ep_state.num_step - 1)), - 'num_step': _to_int32(ep_state.num_step), - 'target': _to_float32(ep_state.target), - 'position': _to_float32(ep_state.position), - 'features': D.features( - [ep_state.stock_id], - ['$open', '$close', '$high', '$low', '$volume'], - start_time=ep_state.start_time, - end_time=ep_state.end_time, - freq=self.time_per_step - ) - } - - -class Action: - @property - def action_space(self): - return spaces.Discrete(5) - - def __call__(self, action: Any, ep_state: EpisodicState) -> Any: - if not self.validate(action): - raise ValueError(f'Action space does not contain action. Space: {self.action_space} Sample: {action}') - act_ = self.to_volume(action, ep_state) - return act_ - - def validate(self, action: Any) -> bool: - return self.action_space.contains(action) - - def to_volume(self, action: Any, ep_state: EpisodicState): - exec_vol = ep_state.position / 5 * action - if ep_state.cur_step + 1 >= ep_state.num_step: - exec_vol = ep_state.position - # TODO: might need to check whether the stock is tradable or whether it satisfies trade unit? - return exec_vol - - -class Reward: - weight = 1.0 - - def __call__(self, ep_state: EpisodicState, st_state: StepState) -> Tuple[float, Dict[str, float]]: - rew, info = 0., {} - if ep_state.done: - ep_rew, ep_info = self._to_tuple(self.episode_end(ep_state)) - rew += ep_rew - info.update({f'ep/{k}': v for k, v in ep_info.items()}) - st_rew, st_info = self._to_tuple(self.step_end(ep_state, st_state)) - rew += st_rew - info.update({f'st/{k}': v for k, v in st_info.items()}) - return rew * self.weight, info - - @staticmethod - def _to_tuple(x): - if isinstance(x, tuple): - return x - return x, {} - - def episode_end(self, ep_state: EpisodicState) -> Tuple[float, Dict[str, float]]: - return 0. - - def step_end(self, ep_state: EpisodicState, st_state: StepState) -> Tuple[float, Dict[str, float]]: - assert ep_state.target > 0 - baseline_price = st_state.pa_twap - pa = baseline_price * st_state.exec_vol.sum() / ep_state.target - penalty = -self.penalty * ((st_state.exec_vol / ep_state.target) ** 2).sum() - reward = pa + penalty - return reward, {'pa': pa, 'penalty': penalty} - - - class SingleOrderEnv(gym.Env): def __init__(self, observation: StateInterpreter, @@ -313,7 +191,7 @@ class SingleOrderEnv(gym.Env): def initialize_state(self): self.executor.reset(start_time=self.cur_order.start_time, end_time=self.cur_order.end_time) - return EpisodicState( + state = EpisodicState( stock_id=self.cur_order.stock_id, start_time=self.cur_order.start_time, end_time=self.cur_order.end_time, @@ -323,29 +201,37 @@ class SingleOrderEnv(gym.Env): market_price=self.retrieve_backtest_data('$close'), market_vol=self.retrieve_backtest_data('$volume'), ) + state.cur_step = self.executor.trade_calendar.get_trade_step() + assert state.cur_step == 0 + state.cur_time, _ = self.executor.trade_calendar.get_step_time(state.cur_step) + return state def update_state(self, exec_vol): - trade_step = self.trade_calendar.get_trade_step() - trade_start_time = self.executor.trade_calendar.get_step_time(trade_step) - trade_end_time = self.executor.trade_calendar.get_step_time(trade_step, shift=1) - trade_decision = Order(**asdict(self.cur_order), - start_time=trade_start_time, end_time=trade_end_time, amount=exec_vol) + calendar = self.executor.trade_calendar + state = self.ep_state + + trade_step = calendar.get_trade_step() + trade_start_time, trade_end_time = calendar.get_step_time(trade_step) + order_kwargs = asdict(self.cur_order) + order_kwargs.update(start_time=trade_start_time, end_time=trade_end_time, amount=exec_vol) + trade_decision = Order(**order_kwargs) execute_result = self.executor.execute([trade_decision]) - cur_tick = self.ep_state.cur_tick + cur_tick = state.cur_tick inner_exec_vol = np.array([order.deal_amount for order, _, __, ___ in execute_result]) ticks_this_step = len(inner_exec_vol) - state = self.ep_state - state.cur_step = trade_step = self.executor.trade_calendar.get_trade_step() - state.cur_time = self.executor.trade_calendar.get_step_time(trade_step) + state.cur_step = trade_step = calendar.get_trade_step() state.cur_tick += ticks_this_step state.position -= np.sum(inner_exec_vol) state.position_history[trade_step] = state.position - state.exec_vol = inner_exec_vol if state.exec_vol is None else np.concatenate((state.exec_vol, inner_exec_vol)) - state.done = self.executor.finished() + state.exec_vol = inner_exec_vol if state.exec_vol is None else \ + np.concatenate((state.exec_vol, inner_exec_vol)) + if state.done: state.update_stats() + else: + state.cur_time, _ = calendar.get_step_time(trade_step) l, r = cur_tick, cur_tick + ticks_this_step assert 0 <= l < r @@ -362,19 +248,23 @@ class SingleOrderEnv(gym.Env): self.ep_state = self.initialize_state() self.action_history = np.full(self.ep_state.num_step, np.nan) - return self.observation(self.cur_sample, self.ep_state) + return self.observation(self.ep_state) def step(self, action): assert self.dataloader is not None + assert not self.executor.finished() + self.action_history[self.ep_state.cur_step] = action exec_vol = self.action(action, self.ep_state) step_state = self.update_state(exec_vol) + if self.executor.finished(): + assert self.ep_state.done reward, rew_info = self.reward(self.ep_state, step_state) info = { 'action_history': self.action_history, - 'category': self.ep_state.flow_dir.value, + 'category': self.ep_state.direction, 'reward': rew_info } if self.ep_state.done: @@ -383,8 +273,9 @@ class SingleOrderEnv(gym.Env): 'ins': self.ep_state.stock_id, 'date': self.ep_state.start_time, } + pprint(info) - return self.observation(self.cur_sample, self.ep_state), reward, self.ep_state.done, info + return self.observation(self.ep_state), reward, self.ep_state.done, info def _init_qlib(): @@ -412,39 +303,165 @@ def _main(): "generate_report": False, } } - executor = get_executor( - trade_start_time, - trade_end_time, - executor_config, - benchmark, - 1000000000, - exchange_kwargs={ - "freq": "day", - "limit_threshold": 0.095, - "deal_price": "close", - "open_cost": 0.0005, - "close_cost": 0.0015, - "min_cost": 5, - } + exchange = get_exchange( + freq="day", + limit_threshold=0.095, + deal_price="close", + open_cost=0.0005, + close_cost=0.0015, + min_cost=5 ) observation = Observation(time_per_step) action = Action() reward_fn = Reward() - def dummy_env(): return SingleOrderEnv( - observation, action, reward_fn, - DataLoader(QlibOrderDataset('rl.pkl'), batch_size=None, shuffle=True), executor) + def dummy_env(): + executor = get_executor( + trade_start_time, + trade_end_time, + executor_config, + exchange, + benchmark, + 1000000000, + ) + return SingleOrderEnv( + observation, action, reward_fn, + iter(DataLoader(QlibOrderDataset('rl.pkl'), batch_size=None, shuffle=True)), executor) + policy = DummyPolicy() - env = dummy_env() - obs = env.reset() - print(obs) + envs = DummyVectorEnv([dummy_env for _ in range(4)]) + test_collector = Collector(policy, envs) + policy.eval() + test_collector.collect(n_episode=10) - # envs = DummyVectorEnv([dummy_env for _ in range(4)]) - # test_collector = Collector(policy, envs) - # policy.eval() - # test_collector.collect(n_episode=10) + +### This is a full RL strategy ### + + +class QlibOrderDataset(Dataset): + def __init__(self, order_file): + with open(order_file, 'rb') as f: + self.orders = pickle.load(f) + + def __len__(self): + return len(self.orders) + + def __getitem__(self, index): + return self.orders[index] + + +class DummyPolicy(BasePolicy): + def forward(self, batch, state=None, **kwargs): + return Batch(act=np.random.randint(0, 5, size=(len(batch), ))) + + def learn(self, *args, **kwargs): + pass + + +class Observation: + def __init__(self, time_per_step): + self.time_per_step = time_per_step + + def __call__(self, ep_state: EpisodicState) -> Any: + obs = self.observe(ep_state) + if not self.validate(obs): + raise ValueError(f'Observation space does not contain obs. Space: {self.observation_space} Sample: {obs}') + return obs + + def validate(self, obs: Any) -> bool: + return self.observation_space.contains(obs) + + @property + def observation_space(self): + space = { + 'direction': spaces.Discrete(2), + 'cur_step': spaces.Box(0, MAX_STEPS, shape=(), dtype=np.int32), + 'num_step': spaces.Box(0, MAX_STEPS, shape=(), dtype=np.int32), + 'target': spaces.Box(-1e-5, np.inf, shape=()), + 'position': spaces.Box(-1e-5, np.inf, shape=()), + 'features': spaces.Box(-np.inf, np.inf, shape=(5, )) + } + return spaces.Dict(space) + + def observe(self, ep_state: EpisodicState) -> Any: + return { + 'direction': _to_int32(ep_state.direction), + 'cur_step': _to_int32(min(ep_state.cur_step, ep_state.num_step - 1)), + 'num_step': _to_int32(ep_state.num_step), + 'target': _to_float32(ep_state.target), + 'position': _to_float32(ep_state.position), + 'features': D.features( + [ep_state.stock_id], + ['$open', '$close', '$high', '$low', '$volume'], + start_time=ep_state.start_time, + end_time=ep_state.end_time, + freq=self.time_per_step + ).loc[(ep_state.stock_id, ep_state.cur_time)].to_numpy(), + } + + +class Action: + denominator = 4 + + @property + def action_space(self): + return spaces.Discrete(self.denominator + 1) + + def __call__(self, action: Any, ep_state: EpisodicState) -> Any: + if not self.validate(action): + raise ValueError(f'Action space does not contain action. Space: {self.action_space} Sample: {action}') + act_ = self.to_volume(action, ep_state) + return act_ + + def validate(self, action: Any) -> bool: + return self.action_space.contains(action) + + def to_volume(self, action: Any, ep_state: EpisodicState): + exec_vol = ep_state.position / self.denominator * action + if ep_state.cur_step + 1 >= ep_state.num_step: + exec_vol = ep_state.position + # TODO: might need to check whether the stock is tradable or whether it satisfies trade unit? + return exec_vol + + +class Reward: + weight = 1.0 + + def __call__(self, ep_state: EpisodicState, st_state: StepState) -> Tuple[float, Dict[str, float]]: + rew, info = 0., {} + if ep_state.done: + ep_rew, ep_info = self._to_tuple(self.episode_end(ep_state)) + rew += ep_rew + info.update({f'ep/{k}': v for k, v in ep_info.items()}) + st_rew, st_info = self._to_tuple(self.step_end(ep_state, st_state)) + rew += st_rew + info.update({f'st/{k}': v for k, v in st_info.items()}) + return rew * self.weight, info + + @staticmethod + def _to_tuple(x): + if isinstance(x, tuple): + return x + return x, {} + + def episode_end(self, ep_state: EpisodicState) -> Tuple[float, Dict[str, float]]: + return 0. + + def step_end(self, ep_state: EpisodicState, st_state: StepState) -> Tuple[float, Dict[str, float]]: + assert ep_state.target > 0 + baseline_price = st_state.pa_twap + pa = baseline_price * st_state.exec_vol.sum() / ep_state.target + penalty = -100 * ((st_state.exec_vol / ep_state.target) ** 2).sum() # penalize too much volume at one step + reward = pa + penalty + return reward, {'pa': pa, 'penalty': penalty} + + +def _to_int32(val): return np.array(int(val), dtype=np.int32) +def _to_float32(val): return np.array(val, dtype=np.float32) + +### End of RL strategy ### if __name__ == '__main__': From cc8339acd925a2df0027ae64bb0b8a4a360ed504 Mon Sep 17 00:00:00 2001 From: Yuge Zhang Date: Wed, 2 Jun 2021 16:49:52 +0800 Subject: [PATCH 07/28] Add a few comments --- rl_orders | Bin 0 -> 3464 bytes rl_playground.py | 16 +++++++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) create mode 100644 rl_orders diff --git a/rl_orders b/rl_orders new file mode 100644 index 0000000000000000000000000000000000000000..7902b901c000bfd82fb7fcc0386c588f3f78cbb4 GIT binary patch literal 3464 zcmai$eM}Q~7{^;->?j+k)cJzoL`)bn&F}ioCT}CMX-(Nc=HkTe`a-UQzU+G4Bus`Q zI>BQSJ%TO}L`B0UZjDnjmStp_EHR5j&`k+(i2|Dw#>|9GmWBPU9rq*4-LLmYFYP_~ z-1m8&@9%k97u&MuNk#Z7=QFwFx2oKBjh%8-vaSHD@i9&p!*h=nhwn%DXZG@YU=$Hx zeU3_-+sSi8=}SfcOhCtTag@gw^s+p+%p3Iht2GWE zVH$dr2E9}wiP)^8GkM_zf_6G4Qc*e%7IFswTD`%@(*z_fDI|2)$?me_$pzt9dbveG zub@-uH2P_Jtwl+v!=Uwrv709+a%EvAg9!UZb-%Y|UHDQ}%7Dpe7Gb78?v?ND zOvJ;c2G2?=6Z~Qzvqjjb>Wh7UcMV5+Dwk$~(<%vrH3Pt?6Je)8NAJDdB)qbph&f3Z zR7L`Tl>J^@(iBX}!o$vOJcNOvZZ?uRz2~V7cv$N`ZODF0d10vz53|RWOTvWXWLR*}S6m-_MO`qBw?_Vz zOvwfaHPdTR&GPqMiNE!58D3eX@~EUTAx)hQS%Yc(Zt9o#=kT!BHPTZ$G zyve?R77v@K@XIPQk;rE=LqiGwG)HMbd#Oea21ql4)sN1mzA(KGudM4wP7)?GK&vN3 zpB-%P?w@Rk#lub{zbe^Hp=M?if;GDQ>ANoK@vzksJ+jK+zXnm+{qJwi7HzqS_u2N2 z?U8{0?M*?M&Wx<__>JOEE~Uiam2G|PUCA_e=-?4BqrI6j=WQ+x#p7X*KD#TKrl5?> zMmylzdy8|`Em7uK_o75;Wx{bHkuvqQnz8(kpTaA9N7X5>%z(zWZ+emj5i%p_1m(_;NTX0w)?lfma&mM zJgkmgFAFm**a5@A(LP&wl!fx_;d;4l0>gssba_1WTw7=aU$fym6_POMGq?lL)OfK! zcs@f#;?qp&|4=qfok=U!?D*>1*`lPCcvwS^^mYpGO%nNxFPkXaoCNT&+GB*IvhX|u zNLk6u!L7u)O?cSd99D7(3pInIR!q~dZQOS8Oq80b_n(qe28RZM{b0Er^8OlS1gRdL Jo`< @dataclass class EpisodicState: """ - A simplified data structure for RL-related components to process observations and rewards + A simplified data structure as the input of RL-related components to calculate observations and rewards. + Some of the metrics info are calculated on-the-fly in this class. """ # requirements stock_id: int @@ -181,6 +182,7 @@ class SingleOrderEnv(gym.Env): return self.observation.observation_space def retrieve_backtest_data(self, field: str): + # Retrieve backtest data for RL-specific use (including reward calculation) return D.features( [self.cur_order.stock_id], ['$open', '$close', '$high', '$low', '$volume'], @@ -190,6 +192,7 @@ class SingleOrderEnv(gym.Env): )[field].to_numpy() def initialize_state(self): + # Synchronous state for executor to EpisodicState self.executor.reset(start_time=self.cur_order.start_time, end_time=self.cur_order.end_time) state = EpisodicState( stock_id=self.cur_order.stock_id, @@ -207,6 +210,7 @@ class SingleOrderEnv(gym.Env): return state def update_state(self, exec_vol): + # Synchronous exec_vol to executor and synchronous back to EpisodicState calendar = self.executor.trade_calendar state = self.ep_state @@ -273,6 +277,7 @@ class SingleOrderEnv(gym.Env): 'ins': self.ep_state.stock_id, 'date': self.ep_state.start_time, } + # TODO: collect logs pprint(info) return self.observation(self.ep_state), reward, self.ep_state.done, info @@ -327,13 +332,18 @@ def _main(): ) return SingleOrderEnv( observation, action, reward_fn, - iter(DataLoader(QlibOrderDataset('rl.pkl'), batch_size=None, shuffle=True)), executor) + iter(DataLoader(QlibOrderDataset('rl_orders'), batch_size=None, shuffle=True)), executor) policy = DummyPolicy() + # This can not be replaced with SubprocVectorEnv + # File "/xxx/qlib/qlib/data/data.py", line 462, in dataset_processor + # p = Pool(processes=workers) + # AssertionError: daemonic processes are not allowed to have children envs = DummyVectorEnv([dummy_env for _ in range(4)]) test_collector = Collector(policy, envs) policy.eval() + # TODO: create a queue for all orders and make it auto-complete when all the orders are processed test_collector.collect(n_episode=10) From 231440561324e3592e7eb5ed82fafe8a2a9d55ce Mon Sep 17 00:00:00 2001 From: Yuge Zhang Date: Wed, 2 Jun 2021 16:53:39 +0800 Subject: [PATCH 08/28] Rename files --- .../nested_decision_execution/assets/orders | Bin .../nested_decision_execution/rl_dummy.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename rl_orders => examples/nested_decision_execution/assets/orders (100%) rename rl_playground.py => examples/nested_decision_execution/rl_dummy.py (99%) diff --git a/rl_orders b/examples/nested_decision_execution/assets/orders similarity index 100% rename from rl_orders rename to examples/nested_decision_execution/assets/orders diff --git a/rl_playground.py b/examples/nested_decision_execution/rl_dummy.py similarity index 99% rename from rl_playground.py rename to examples/nested_decision_execution/rl_dummy.py index fa2022dcb..1ea444cdf 100644 --- a/rl_playground.py +++ b/examples/nested_decision_execution/rl_dummy.py @@ -332,7 +332,7 @@ def _main(): ) return SingleOrderEnv( observation, action, reward_fn, - iter(DataLoader(QlibOrderDataset('rl_orders'), batch_size=None, shuffle=True)), executor) + iter(DataLoader(QlibOrderDataset('assets/orders'), batch_size=None, shuffle=True)), executor) policy = DummyPolicy() From f5ac6230e13e80b1eee1a33ecb0e590b3e072758 Mon Sep 17 00:00:00 2001 From: Yuge Zhang Date: Wed, 2 Jun 2021 22:04:54 +0800 Subject: [PATCH 09/28] Refactor for strategy --- .../nested_decision_execution/rl_dummy.py | 134 ++++++++++-------- 1 file changed, 71 insertions(+), 63 deletions(-) diff --git a/examples/nested_decision_execution/rl_dummy.py b/examples/nested_decision_execution/rl_dummy.py index 1ea444cdf..3eec91789 100644 --- a/examples/nested_decision_execution/rl_dummy.py +++ b/examples/nested_decision_execution/rl_dummy.py @@ -1,7 +1,7 @@ import pickle from dataclasses import dataclass, asdict from pprint import pprint -from typing import Iterable, Any, Optional, Tuple, Dict +from typing import Iterable, Any, Optional, Tuple, Dict, List import gym import numpy as np @@ -128,6 +128,48 @@ class EpisodicState: } return logs + @classmethod + def from_order_and_executor(cls, order: Order, executor: BaseExecutor, frequency: str) -> "EpisodicState": + # Synchronous state for executor to EpisodicState + executor.reset(start_time=order.start_time, end_time=order.end_time) + state = cls( + stock_id=order.stock_id, + start_time=order.start_time, + end_time=order.end_time, + direction=order.direction, + target=order.amount, + num_step=executor.trade_calendar.get_trade_len(), + market_price=_retrieve_backtest_data(order, '$close', frequency), + market_vol=_retrieve_backtest_data(order, '$volume', frequency), + ) + state.cur_step = executor.trade_calendar.get_trade_step() + assert state.cur_step == 0 + state.cur_time, _ = executor.trade_calendar.get_step_time(state.cur_step) + return state + + def update(self, execute_result: List[Order], executor: BaseExecutor) -> "StepState": + exec_vol = np.array([order.deal_amount for order, _, __, ___ in execute_result]) + # Synchronous exec_vol to executor and synchronous back to EpisodicState + calendar = executor.trade_calendar + cur_tick = self.cur_tick + ticks_this_step = len(exec_vol) + self.cur_step = trade_step = calendar.get_trade_step() + self.cur_tick += ticks_this_step + self.position -= np.sum(exec_vol) + self.position_history[trade_step] = self.position + self.done = executor.finished() + self.exec_vol = exec_vol if self.exec_vol is None else \ + np.concatenate((self.exec_vol, exec_vol)) + + if self.done: + self.update_stats() + else: + self.cur_time, _ = calendar.get_step_time(trade_step) + + l, r = cur_tick, cur_tick + ticks_this_step + assert 0 <= l < r + return StepState(exec_vol, self.market_vol[l:r], self.market_price[l:r], self) + @dataclass class StepState: @@ -158,6 +200,28 @@ class StepState: self.episode_state.direction) +def _retrieve_backtest_data(order: Order, field: str, frequency: str) -> np.ndarray: + # Retrieve backtest data for RL-specific use (including reward calculation) + return D.features( + [order.stock_id], + ['$open', '$close', '$high', '$low', '$volume'], + start_time=order.start_time, + end_time=order.end_time, + freq=frequency + )[field].to_numpy() + + +def create_sub_order(exec_vol: float, executor: BaseExecutor, original_order: Order) -> Order: + # Convert a real number to an order + calendar = executor.trade_calendar + trade_step = calendar.get_trade_step() + trade_start_time, trade_end_time = calendar.get_step_time(trade_step) + order_kwargs = asdict(original_order) + order_kwargs.update(start_time=trade_start_time, end_time=trade_end_time, amount=exec_vol) + trade_decision = Order(**order_kwargs) + return trade_decision + + class SingleOrderEnv(gym.Env): def __init__(self, observation: StateInterpreter, @@ -181,66 +245,6 @@ class SingleOrderEnv(gym.Env): def observation_space(self): return self.observation.observation_space - def retrieve_backtest_data(self, field: str): - # Retrieve backtest data for RL-specific use (including reward calculation) - return D.features( - [self.cur_order.stock_id], - ['$open', '$close', '$high', '$low', '$volume'], - start_time=self.cur_order.start_time, - end_time=self.cur_order.end_time, - freq=self.inner_frequency - )[field].to_numpy() - - def initialize_state(self): - # Synchronous state for executor to EpisodicState - self.executor.reset(start_time=self.cur_order.start_time, end_time=self.cur_order.end_time) - state = EpisodicState( - stock_id=self.cur_order.stock_id, - start_time=self.cur_order.start_time, - end_time=self.cur_order.end_time, - direction=self.cur_order.direction, - target=self.cur_order.amount, - num_step=self.executor.trade_calendar.get_trade_len(), - market_price=self.retrieve_backtest_data('$close'), - market_vol=self.retrieve_backtest_data('$volume'), - ) - state.cur_step = self.executor.trade_calendar.get_trade_step() - assert state.cur_step == 0 - state.cur_time, _ = self.executor.trade_calendar.get_step_time(state.cur_step) - return state - - def update_state(self, exec_vol): - # Synchronous exec_vol to executor and synchronous back to EpisodicState - calendar = self.executor.trade_calendar - state = self.ep_state - - trade_step = calendar.get_trade_step() - trade_start_time, trade_end_time = calendar.get_step_time(trade_step) - order_kwargs = asdict(self.cur_order) - order_kwargs.update(start_time=trade_start_time, end_time=trade_end_time, amount=exec_vol) - trade_decision = Order(**order_kwargs) - execute_result = self.executor.execute([trade_decision]) - cur_tick = state.cur_tick - - inner_exec_vol = np.array([order.deal_amount for order, _, __, ___ in execute_result]) - ticks_this_step = len(inner_exec_vol) - state.cur_step = trade_step = calendar.get_trade_step() - state.cur_tick += ticks_this_step - state.position -= np.sum(inner_exec_vol) - state.position_history[trade_step] = state.position - state.done = self.executor.finished() - state.exec_vol = inner_exec_vol if state.exec_vol is None else \ - np.concatenate((state.exec_vol, inner_exec_vol)) - - if state.done: - state.update_stats() - else: - state.cur_time, _ = calendar.get_step_time(trade_step) - - l, r = cur_tick, cur_tick + ticks_this_step - assert 0 <= l < r - return StepState(inner_exec_vol, state.market_vol[l:r], state.market_price[l:r], state) - def reset(self): try: self.cur_order = next(self.dataloader) @@ -249,7 +253,9 @@ class SingleOrderEnv(gym.Env): return None self.execute_result = [] - self.ep_state = self.initialize_state() + self.ep_state = EpisodicState.from_order_and_executor( + self.cur_order, self.executor, self.inner_frequency + ) self.action_history = np.full(self.ep_state.num_step, np.nan) return self.observation(self.ep_state) @@ -260,7 +266,9 @@ class SingleOrderEnv(gym.Env): self.action_history[self.ep_state.cur_step] = action exec_vol = self.action(action, self.ep_state) - step_state = self.update_state(exec_vol) + trade_decision = create_sub_order(exec_vol, self.executor, self.cur_order) + execute_result = self.executor.execute([trade_decision]) + step_state = self.ep_state.update(execute_result, self.executor) if self.executor.finished(): assert self.ep_state.done From bf02fc23f8a63e901ba969b546bc45366f6038d7 Mon Sep 17 00:00:00 2001 From: Yuge Zhang Date: Wed, 2 Jun 2021 23:20:27 +0800 Subject: [PATCH 10/28] Add RL strategy demo --- .../nested_decision_execution/rl_dummy.py | 78 +++++++++++++++---- qlib/backtest/__init__.py | 1 + 2 files changed, 62 insertions(+), 17 deletions(-) diff --git a/examples/nested_decision_execution/rl_dummy.py b/examples/nested_decision_execution/rl_dummy.py index 3eec91789..61f1bba59 100644 --- a/examples/nested_decision_execution/rl_dummy.py +++ b/examples/nested_decision_execution/rl_dummy.py @@ -1,17 +1,19 @@ import pickle +from collections import OrderedDict, defaultdict from dataclasses import dataclass, asdict from pprint import pprint -from typing import Iterable, Any, Optional, Tuple, Dict, List +from typing import Iterable, Any, Optional, OrderedDict, Tuple, Dict, List import gym import numpy as np import pandas as pd import qlib from gym import spaces -from qlib.backtest import get_exchange, Account, BaseExecutor, CommonInfrastructure, Order +from qlib.backtest import get_exchange, Account, BaseExecutor, CommonInfrastructure, Order, TradeCalendarManager from qlib.config import REG_CN from qlib.data import D from qlib.rl.interpreter import StateInterpreter, ActionInterpreter +from qlib.strategy import BaseStrategy from qlib.tests.data import GetData from qlib.utils import init_instance_by_config, exists_qlib_data from torch.utils.data import Dataset, DataLoader @@ -129,35 +131,36 @@ class EpisodicState: return logs @classmethod - def from_order_and_executor(cls, order: Order, executor: BaseExecutor, frequency: str) -> "EpisodicState": + def from_order_and_executor(cls, order: Order, calendar: TradeCalendarManager, frequency: str) -> "EpisodicState": # Synchronous state for executor to EpisodicState - executor.reset(start_time=order.start_time, end_time=order.end_time) state = cls( stock_id=order.stock_id, start_time=order.start_time, end_time=order.end_time, direction=order.direction, target=order.amount, - num_step=executor.trade_calendar.get_trade_len(), + num_step=calendar.get_trade_len(), market_price=_retrieve_backtest_data(order, '$close', frequency), market_vol=_retrieve_backtest_data(order, '$volume', frequency), ) - state.cur_step = executor.trade_calendar.get_trade_step() + state.cur_step = calendar.get_trade_step() assert state.cur_step == 0 - state.cur_time, _ = executor.trade_calendar.get_step_time(state.cur_step) + state.cur_time, _ = calendar.get_step_time(state.cur_step) return state - def update(self, execute_result: List[Order], executor: BaseExecutor) -> "StepState": + def update(self, execute_result: List[Order], calendar: TradeCalendarManager, done: Optional[bool] = None) -> "StepState": exec_vol = np.array([order.deal_amount for order, _, __, ___ in execute_result]) # Synchronous exec_vol to executor and synchronous back to EpisodicState - calendar = executor.trade_calendar cur_tick = self.cur_tick ticks_this_step = len(exec_vol) self.cur_step = trade_step = calendar.get_trade_step() self.cur_tick += ticks_this_step self.position -= np.sum(exec_vol) self.position_history[trade_step] = self.position - self.done = executor.finished() + if done is not None: + self.done = done + else: + self.done = self.position < 1e-5 self.exec_vol = exec_vol if self.exec_vol is None else \ np.concatenate((self.exec_vol, exec_vol)) @@ -211,9 +214,8 @@ def _retrieve_backtest_data(order: Order, field: str, frequency: str) -> np.ndar )[field].to_numpy() -def create_sub_order(exec_vol: float, executor: BaseExecutor, original_order: Order) -> Order: +def create_sub_order(exec_vol: float, calendar: TradeCalendarManager, original_order: Order) -> Order: # Convert a real number to an order - calendar = executor.trade_calendar trade_step = calendar.get_trade_step() trade_start_time, trade_end_time = calendar.get_step_time(trade_step) order_kwargs = asdict(original_order) @@ -253,8 +255,9 @@ class SingleOrderEnv(gym.Env): return None self.execute_result = [] + self.executor.reset(start_time=self.cur_order.start_time, end_time=self.cur_order.end_time) self.ep_state = EpisodicState.from_order_and_executor( - self.cur_order, self.executor, self.inner_frequency + self.cur_order, self.executor.trade_calendar, self.inner_frequency ) self.action_history = np.full(self.ep_state.num_step, np.nan) @@ -266,9 +269,9 @@ class SingleOrderEnv(gym.Env): self.action_history[self.ep_state.cur_step] = action exec_vol = self.action(action, self.ep_state) - trade_decision = create_sub_order(exec_vol, self.executor, self.cur_order) + trade_decision = create_sub_order(exec_vol, self.executor.trade_calendar, self.cur_order) execute_result = self.executor.execute([trade_decision]) - step_state = self.ep_state.update(execute_result, self.executor) + step_state = self.ep_state.update(execute_result, self.executor.trade_calendar) if self.executor.finished(): assert self.ep_state.done @@ -291,6 +294,47 @@ class SingleOrderEnv(gym.Env): return self.observation(self.ep_state), reward, self.ep_state.done, info +class RLStrategy(BaseStrategy): + """When inference and do the backtest from end to end, use this strategy.""" + # TODO This strategy is still for code demo purpose only. + # It has not been end-to-end tested. + + def __init__( + self, + observation: "Observation", + action: "Action", + policy: BasePolicy, + **kwargs + ): + super().__init__(**kwargs) + self.observation = observation + self.action = action + self.policy = policy + + def reset(self, outer_trade_decision: List[Order] = None, **kwargs): + super().reset(outer_trade_decision=outer_trade_decision, **kwargs) + if outer_trade_decision is not None: + self.states = OrderedDict() # explicitly make it ordered + for order in outer_trade_decision: + # TODO: how to get inner frequency + state = EpisodicState.from_order_and_executor(order, self.trade_calendar, "day") + self.states[order.stock_id, order.direction] = state + + def generate_trade_decision(self, execute_result=None): + # apply results from the last step + if execute_result is not None: + orders = defaultdict(list) + for order, _, __, in execute_result: + orders[order.stock_id, order.direction].append(order) + for (stock_id, direction), state in self.states.items(): + state.update(orders[stock_id, direction]) + + obs_batch = Batch([{"obs": self.observation(state)} for state in self.states.values()]) + act = self.policy(obs_batch) + exec_vols = [self.action(a) for a in act.act] + return [create_sub_order(v, self.trade_calendar, order) for v in exec_vols] + + def _init_qlib(): provider_uri = "~/.qlib/qlib_data/cn_data" # target_dir if not exists_qlib_data(provider_uri): @@ -299,7 +343,7 @@ def _init_qlib(): qlib.init(provider_uri=provider_uri, region=REG_CN) -def _main(): +def _main_tianshou(): _init_qlib() # TODO: why is there a benchmark? @@ -483,4 +527,4 @@ def _to_float32(val): return np.array(val, dtype=np.float32) if __name__ == '__main__': - _main() + _main_tianshou() diff --git a/qlib/backtest/__init__.py b/qlib/backtest/__init__.py index f80f7ebeb..c053269ef 100644 --- a/qlib/backtest/__init__.py +++ b/qlib/backtest/__init__.py @@ -7,6 +7,7 @@ from .executor import BaseExecutor from .backtest import backtest as backtest_func from .backtest import collect_data as data_generator from .order import Order +from .utils import TradeCalendarManager from .utils import CommonInfrastructure from .order import Order From c43805eff60475eddc5f3f17ce39936cc81de335 Mon Sep 17 00:00:00 2001 From: Yuge Zhang Date: Fri, 4 Jun 2021 12:20:27 +0800 Subject: [PATCH 11/28] Update end-to-end example and requirements --- .../requirements.txt | 2 + .../nested_decision_execution/rl_dummy.py | 175 +++++++++++------- 2 files changed, 113 insertions(+), 64 deletions(-) create mode 100644 examples/nested_decision_execution/requirements.txt diff --git a/examples/nested_decision_execution/requirements.txt b/examples/nested_decision_execution/requirements.txt new file mode 100644 index 000000000..2ad0a826f --- /dev/null +++ b/examples/nested_decision_execution/requirements.txt @@ -0,0 +1,2 @@ +tianshou>=0.4.1 +torch>=1.8.0 diff --git a/examples/nested_decision_execution/rl_dummy.py b/examples/nested_decision_execution/rl_dummy.py index 61f1bba59..4a8f50ad0 100644 --- a/examples/nested_decision_execution/rl_dummy.py +++ b/examples/nested_decision_execution/rl_dummy.py @@ -4,12 +4,14 @@ from dataclasses import dataclass, asdict from pprint import pprint from typing import Iterable, Any, Optional, OrderedDict, Tuple, Dict, List +import fire import gym import numpy as np import pandas as pd import qlib from gym import spaces -from qlib.backtest import get_exchange, Account, BaseExecutor, CommonInfrastructure, Order, TradeCalendarManager +from qlib.backtest import get_exchange, Account, BaseExecutor, CommonInfrastructure, Order, TradeCalendarManager, backtest_func +from qlib.backtest.executor import NestedExecutor, SimulatorExecutor from qlib.config import REG_CN from qlib.data import D from qlib.rl.interpreter import StateInterpreter, ActionInterpreter @@ -21,6 +23,8 @@ from tianshou.data import Batch, Collector from tianshou.env import DummyVectorEnv, SubprocVectorEnv from tianshou.policy import BasePolicy +from workflow import NestedDecisonExecutionWorkflow + MAX_STEPS = 10 @@ -324,79 +328,122 @@ class RLStrategy(BaseStrategy): # apply results from the last step if execute_result is not None: orders = defaultdict(list) - for order, _, __, in execute_result: - orders[order.stock_id, order.direction].append(order) + for e in execute_result: + orders[e[0].stock_id, e[0].direction].append(e) for (stock_id, direction), state in self.states.items(): - state.update(orders[stock_id, direction]) - + state.update(orders[stock_id, direction], self.trade_calendar) + + if not self.states: + return [] + obs_batch = Batch([{"obs": self.observation(state)} for state in self.states.values()]) act = self.policy(obs_batch) - exec_vols = [self.action(a) for a in act.act] - return [create_sub_order(v, self.trade_calendar, order) for v in exec_vols] + exec_vols = [self.action(a, s) for a, s in zip(act.act, self.states.values())] + return [create_sub_order(v, self.trade_calendar, o) for v, o in zip(exec_vols, self.outer_trade_decision)] -def _init_qlib(): - provider_uri = "~/.qlib/qlib_data/cn_data" # target_dir - if not exists_qlib_data(provider_uri): - print(f"Qlib data is not found in {provider_uri}") - GetData().qlib_data(target_dir=provider_uri, region=REG_CN) - qlib.init(provider_uri=provider_uri, region=REG_CN) +class RlWorkflow(NestedDecisonExecutionWorkflow): + def tianshou(self): + self._init_qlib() -def _main_tianshou(): - _init_qlib() - - # TODO: why is there a benchmark? - trade_start_time = "2017-01-01" - trade_end_time = "2020-08-01" - benchmark = "SH000300" - time_per_step = "day" - executor_config = { - "class": "SimulatorExecutor", - "module_path": "qlib.backtest.executor", - "kwargs": { - "time_per_step": time_per_step, - "verbose": True, - "generate_report": False, + # TODO: why is there a benchmark? + trade_start_time = "2017-01-01" + trade_end_time = "2020-08-01" + benchmark = "SH000300" + time_per_step = "day" + executor_config = { + "class": "SimulatorExecutor", + "module_path": "qlib.backtest.executor", + "kwargs": { + "time_per_step": time_per_step, + "verbose": True, + "generate_report": False, + } } - } - exchange = get_exchange( - freq="day", - limit_threshold=0.095, - deal_price="close", - open_cost=0.0005, - close_cost=0.0015, - min_cost=5 - ) - - observation = Observation(time_per_step) - action = Action() - reward_fn = Reward() - - def dummy_env(): - executor = get_executor( - trade_start_time, - trade_end_time, - executor_config, - exchange, - benchmark, - 1000000000, + exchange = get_exchange( + freq="day", + limit_threshold=0.095, + deal_price="close", + open_cost=0.0005, + close_cost=0.0015, + min_cost=5 ) - return SingleOrderEnv( - observation, action, reward_fn, - iter(DataLoader(QlibOrderDataset('assets/orders'), batch_size=None, shuffle=True)), executor) - policy = DummyPolicy() + observation = Observation(time_per_step) + action = Action() + reward_fn = Reward() - # This can not be replaced with SubprocVectorEnv - # File "/xxx/qlib/qlib/data/data.py", line 462, in dataset_processor - # p = Pool(processes=workers) - # AssertionError: daemonic processes are not allowed to have children - envs = DummyVectorEnv([dummy_env for _ in range(4)]) - test_collector = Collector(policy, envs) - policy.eval() - # TODO: create a queue for all orders and make it auto-complete when all the orders are processed - test_collector.collect(n_episode=10) + def dummy_env(): + executor = get_executor( + trade_start_time, + trade_end_time, + executor_config, + exchange, + benchmark, + 1000000000, + ) + return SingleOrderEnv( + observation, action, reward_fn, + iter(DataLoader(QlibOrderDataset('assets/orders'), batch_size=None, shuffle=True)), executor) + + policy = DummyPolicy() + + # This can not be replaced with SubprocVectorEnv + # File "/xxx/qlib/qlib/data/data.py", line 462, in dataset_processor + # p = Pool(processes=workers) + # AssertionError: daemonic processes are not allowed to have children + envs = DummyVectorEnv([dummy_env for _ in range(4)]) + test_collector = Collector(policy, envs) + policy.eval() + # TODO: create a queue for all orders and make it auto-complete when all the orders are processed + test_collector.collect(n_episode=10) + + def rl_day(self, load_model: Optional[str] = None): + self._init_qlib() + model = init_instance_by_config(self.task["model"]) + dataset = init_instance_by_config(self.task["dataset"]) + if load_model is None: + self._train_model(model, dataset) + else: + model = self._load_model(load_model) + trade_start_time = "2017-01-01" + trade_end_time = "2020-08-01" + trade_account = Account( + init_cash=int(1e9), + benchmark_config={ + "benchmark": "SH000300", + "start_time": trade_start_time, + "end_time": trade_end_time, + }, + ) + exchange = get_exchange( + freq="day", + limit_threshold=0.095, + deal_price="close", + open_cost=0.0005, + close_cost=0.0015, + min_cost=5 + ) + common_infra = CommonInfrastructure(trade_account=trade_account, trade_exchange=exchange) + strategy = init_instance_by_config({ + "class": "TopkDropoutStrategy", + "module_path": "qlib.contrib.strategy.model_strategy", + "kwargs": { + "model": model, + "dataset": dataset, + "topk": 50, + "n_drop": 5, + }, + }, common_infra=common_infra) + executor = NestedExecutor( + time_per_step="week", + inner_executor=SimulatorExecutor(time_per_step="day", verbose=True), + inner_strategy=RLStrategy(Observation("day"), Action(), DummyPolicy()), + common_infra=common_infra + ) + report_dict = backtest_func(trade_start_time, trade_end_time, strategy, executor) + print(report_dict) ### This is a full RL strategy ### @@ -527,4 +574,4 @@ def _to_float32(val): return np.array(val, dtype=np.float32) if __name__ == '__main__': - _main_tianshou() + fire.Fire(RlWorkflow) From 1581ef12accdb32f41a5272c189105184992abd6 Mon Sep 17 00:00:00 2001 From: Yuge Zhang Date: Fri, 4 Jun 2021 13:01:49 +0800 Subject: [PATCH 12/28] Update impl for robustness --- .../nested_decision_execution/rl_dummy.py | 34 ++++++++++++------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/examples/nested_decision_execution/rl_dummy.py b/examples/nested_decision_execution/rl_dummy.py index 4a8f50ad0..cd0961f66 100644 --- a/examples/nested_decision_execution/rl_dummy.py +++ b/examples/nested_decision_execution/rl_dummy.py @@ -152,8 +152,13 @@ class EpisodicState: state.cur_time, _ = calendar.get_step_time(state.cur_step) return state - def update(self, execute_result: List[Order], calendar: TradeCalendarManager, done: Optional[bool] = None) -> "StepState": - exec_vol = np.array([order.deal_amount for order, _, __, ___ in execute_result]) + def update(self, execute_result: List[Order], calendar: TradeCalendarManager, + done: Optional[bool] = None, length: Optional[int] = None) -> "StepState": + if length is not None: + exec_vol = np.zeros(length) + exec_vol[:len(execute_result)] = np.array([order.deal_amount for order, _, __, ___ in execute_result]) + else: + exec_vol = np.array([order.deal_amount for order, _, __, ___ in execute_result]) # Synchronous exec_vol to executor and synchronous back to EpisodicState cur_tick = self.cur_tick ticks_this_step = len(exec_vol) @@ -300,8 +305,6 @@ class SingleOrderEnv(gym.Env): class RLStrategy(BaseStrategy): """When inference and do the backtest from end to end, use this strategy.""" - # TODO This strategy is still for code demo purpose only. - # It has not been end-to-end tested. def __init__( self, @@ -315,12 +318,15 @@ class RLStrategy(BaseStrategy): self.action = action self.policy = policy + # TODO: how to get inner frequency and trade len + self.inner_frequency = "day" + self.inner_trade_len = 1 + def reset(self, outer_trade_decision: List[Order] = None, **kwargs): super().reset(outer_trade_decision=outer_trade_decision, **kwargs) if outer_trade_decision is not None: self.states = OrderedDict() # explicitly make it ordered for order in outer_trade_decision: - # TODO: how to get inner frequency state = EpisodicState.from_order_and_executor(order, self.trade_calendar, "day") self.states[order.stock_id, order.direction] = state @@ -331,7 +337,7 @@ class RLStrategy(BaseStrategy): for e in execute_result: orders[e[0].stock_id, e[0].direction].append(e) for (stock_id, direction), state in self.states.items(): - state.update(orders[stock_id, direction], self.trade_calendar) + state.update(orders[stock_id, direction], self.trade_calendar, length=self.inner_trade_len) if not self.states: return [] @@ -495,19 +501,21 @@ class Observation: return spaces.Dict(space) def observe(self, ep_state: EpisodicState) -> Any: + features = D.features( + [ep_state.stock_id], + ['$open', '$close', '$high', '$low', '$volume'], + start_time=ep_state.start_time, + end_time=ep_state.end_time, + freq=self.time_per_step + ).loc[(ep_state.stock_id, ep_state.cur_time)].to_numpy() + features = np.nan_to_num(features) return { 'direction': _to_int32(ep_state.direction), 'cur_step': _to_int32(min(ep_state.cur_step, ep_state.num_step - 1)), 'num_step': _to_int32(ep_state.num_step), 'target': _to_float32(ep_state.target), 'position': _to_float32(ep_state.position), - 'features': D.features( - [ep_state.stock_id], - ['$open', '$close', '$high', '$low', '$volume'], - start_time=ep_state.start_time, - end_time=ep_state.end_time, - freq=self.time_per_step - ).loc[(ep_state.stock_id, ep_state.cur_time)].to_numpy(), + 'features': features, } From 76be5d50e50904d1eb712ca91c57d76dcf3d9b1d Mon Sep 17 00:00:00 2001 From: Yuge Zhang Date: Mon, 7 Jun 2021 10:56:12 +0800 Subject: [PATCH 13/28] Refine example --- examples/nested_decision_execution/rl_dummy.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/examples/nested_decision_execution/rl_dummy.py b/examples/nested_decision_execution/rl_dummy.py index cd0961f66..c42e28be4 100644 --- a/examples/nested_decision_execution/rl_dummy.py +++ b/examples/nested_decision_execution/rl_dummy.py @@ -319,6 +319,7 @@ class RLStrategy(BaseStrategy): self.policy = policy # TODO: how to get inner frequency and trade len + # This should be no longer required when PA is provided by qlib. self.inner_frequency = "day" self.inner_trade_len = 1 @@ -432,6 +433,12 @@ class RlWorkflow(NestedDecisonExecutionWorkflow): min_cost=5 ) common_infra = CommonInfrastructure(trade_account=trade_account, trade_exchange=exchange) + executor = NestedExecutor( + time_per_step="week", + inner_executor=SimulatorExecutor(time_per_step="day", verbose=True), + inner_strategy=RLStrategy(Observation("day"), Action(), DummyPolicy()), + common_infra=common_infra + ) strategy = init_instance_by_config({ "class": "TopkDropoutStrategy", "module_path": "qlib.contrib.strategy.model_strategy", @@ -442,12 +449,6 @@ class RlWorkflow(NestedDecisonExecutionWorkflow): "n_drop": 5, }, }, common_infra=common_infra) - executor = NestedExecutor( - time_per_step="week", - inner_executor=SimulatorExecutor(time_per_step="day", verbose=True), - inner_strategy=RLStrategy(Observation("day"), Action(), DummyPolicy()), - common_infra=common_infra - ) report_dict = backtest_func(trade_start_time, trade_end_time, strategy, executor) print(report_dict) @@ -463,7 +464,7 @@ class QlibOrderDataset(Dataset): def __len__(self): return len(self.orders) - def __getitem__(self, index): + def __getitem__(self, index) -> Order: return self.orders[index] @@ -535,7 +536,7 @@ class Action: def validate(self, action: Any) -> bool: return self.action_space.contains(action) - def to_volume(self, action: Any, ep_state: EpisodicState): + def to_volume(self, action: Any, ep_state: EpisodicState) -> Any: exec_vol = ep_state.position / self.denominator * action if ep_state.cur_step + 1 >= ep_state.num_step: exec_vol = ep_state.position From 7525854beda2c0c0303b265c97b52c994561221c Mon Sep 17 00:00:00 2001 From: v-mingzhehan Date: Tue, 22 Jun 2021 03:47:39 +0000 Subject: [PATCH 14/28] Add shortcut in init --- qlib/backtest/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qlib/backtest/__init__.py b/qlib/backtest/__init__.py index 91eedd736..edfc907cd 100644 --- a/qlib/backtest/__init__.py +++ b/qlib/backtest/__init__.py @@ -8,7 +8,7 @@ from .backtest import backtest_loop from .backtest import collect_data_loop from .order import Order -from .utils import CommonInfrastructure +from .utils import CommonInfrastructure, TradeCalendarManager from ..strategy.base import BaseStrategy from ..utils import init_instance_by_config from ..log import get_module_logger From 583fbbef3ce714bdc4b3130b74620f79873119bb Mon Sep 17 00:00:00 2001 From: v-mingzhehan Date: Tue, 22 Jun 2021 07:07:19 +0000 Subject: [PATCH 15/28] Resolve init conflict --- qlib/backtest/__init__.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/qlib/backtest/__init__.py b/qlib/backtest/__init__.py index 107f97782..ae07cdbdf 100644 --- a/qlib/backtest/__init__.py +++ b/qlib/backtest/__init__.py @@ -7,15 +7,9 @@ from .exchange import Exchange from .executor import BaseExecutor from .backtest import backtest_loop from .backtest import collect_data_loop -<<<<<<< HEAD from .order import Order from .utils import CommonInfrastructure, TradeCalendarManager -======= -from .utils import CommonInfrastructure -from .order import Order - ->>>>>>> ab97e8248443789ce1e0f90a9b5596e5fee60566 from ..strategy.base import BaseStrategy from ..utils import init_instance_by_config from ..log import get_module_logger From 2b4a493617d759d28f49768310c43c99daa169f9 Mon Sep 17 00:00:00 2001 From: v-mingzhehan Date: Thu, 1 Jul 2021 09:41:08 +0000 Subject: [PATCH 16/28] Order patch --- qlib/backtest/order.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/qlib/backtest/order.py b/qlib/backtest/order.py index b013d8723..32c4121fc 100644 --- a/qlib/backtest/order.py +++ b/qlib/backtest/order.py @@ -12,6 +12,7 @@ if TYPE_CHECKING: from qlib.backtest.exchange import Exchange from qlib.backtest.utils import TradeCalendarManager import warnings +import numpy as np import pandas as pd from dataclasses import dataclass, field from typing import ClassVar, Optional, Union, List, Set, Tuple @@ -47,7 +48,7 @@ class Order: direction: int factor: float - deal_amount: float = field(init=False) + deal_amount: Optional[float] = None # FIXME: # for compatible now. @@ -62,11 +63,11 @@ class Order: self.deal_amount = 0 @staticmethod - def parse_dir(direction: Union[str, int, OrderDir]) -> OrderDir: + def parse_dir(direction: Union[str, int, np.integer, OrderDir]) -> OrderDir: if isinstance(direction, OrderDir): return direction - elif isinstance(direction, int): - return OrderDir(direction) + elif isinstance(direction, (int, float, np.integer, np.floating)): + return OrderDir(int(direction)) elif isinstance(direction, str): dl = direction.lower() if dl.strip() == "sell": From 7048bef7c69e3a3e56bbf8ffb34b85eac490c192 Mon Sep 17 00:00:00 2001 From: Young Date: Sun, 4 Jul 2021 06:41:34 +0000 Subject: [PATCH 17/28] fix ffr and order amount --- qlib/backtest/account.py | 7 +++++-- qlib/backtest/executor.py | 2 ++ qlib/backtest/order.py | 31 +++++++++++++++++++++++++++++-- qlib/backtest/report.py | 29 +++++++++++++++++++++-------- 4 files changed, 57 insertions(+), 12 deletions(-) diff --git a/qlib/backtest/account.py b/qlib/backtest/account.py index 6167ee407..0d89dde87 100644 --- a/qlib/backtest/account.py +++ b/qlib/backtest/account.py @@ -9,7 +9,7 @@ import pandas as pd from .position import BasePosition, InfPosition, Position from .report import Report, Indicator -from .order import Order +from .order import BaseTradeDecision, Order from .exchange import Exchange """ @@ -226,6 +226,7 @@ class Account: trade_end_time: pd.Timestamp, trade_exchange: Exchange, atomic: bool, + outer_trade_decision: BaseTradeDecision, generate_report: bool = False, trade_info: list = None, inner_order_indicators: Indicator = None, @@ -276,7 +277,9 @@ class Account: if atomic: self.indicator.update_order_indicators(trade_start_time, trade_end_time, trade_info, trade_exchange) else: - self.indicator.agg_order_indicators(inner_order_indicators, indicator_config) + self.indicator.agg_order_indicators( + inner_order_indicators, indicator_config=indicator_config, outer_trade_decision=outer_trade_decision + ) self.indicator.cal_trade_indicators(trade_start_time, self.freq, indicator_config) self.indicator.record(trade_start_time) diff --git a/qlib/backtest/executor.py b/qlib/backtest/executor.py index 3f7b2f4ed..7341e5225 100644 --- a/qlib/backtest/executor.py +++ b/qlib/backtest/executor.py @@ -299,6 +299,7 @@ class NestedExecutor(BaseExecutor): trade_end_time, self.trade_exchange, atomic=False, + outer_trade_decision=trade_decision, generate_report=self.generate_report, inner_order_indicators=inner_order_indicators, indicator_config=self.indicator_config, @@ -409,6 +410,7 @@ class SimulatorExecutor(BaseExecutor): trade_end_time, self.trade_exchange, atomic=True, + outer_trade_decision=trade_decision, generate_report=self.generate_report, trade_info=execute_result, indicator_config=self.indicator_config, diff --git a/qlib/backtest/order.py b/qlib/backtest/order.py index 32c4121fc..64ff2a56f 100644 --- a/qlib/backtest/order.py +++ b/qlib/backtest/order.py @@ -40,7 +40,7 @@ class Order: """ stock_id: str - amount: float + amount: float # `amount` is a non-negative value # The interval of the order which belongs to (NOTE: this is not the expected order dealing range time) start_time: pd.Timestamp @@ -48,7 +48,7 @@ class Order: direction: int factor: float - deal_amount: Optional[float] = None + deal_amount: Optional[float] = None # `deal_amount` is a non-negative value # FIXME: # for compatible now. @@ -62,6 +62,33 @@ class Order: raise NotImplementedError("direction not supported, `Order.SELL` for sell, `Order.BUY` for buy") self.deal_amount = 0 + @property + def amount_delta(self) -> float: + """ + return the delta of amount. + - Positive value indicates buying `amount` of share + - Negative value indicates selling `amount` of share + """ + return self.amount * self.sign + + @property + def deal_amount_delta(self) -> float: + """ + return the delta of deal_amount. + - Positive value indicates buying `deal_amount` of share + - Negative value indicates selling `deal_amount` of share + """ + return self.deal_amount * self.sign + + @property + def sign(self) -> float: + """ + return the sign of trading + - `+1` indicates buying + - `-1` value indicates selling + """ + return self.direction * 2 - 1 + @staticmethod def parse_dir(direction: Union[str, int, np.integer, OrderDir]) -> OrderDir: if isinstance(direction, OrderDir): diff --git a/qlib/backtest/report.py b/qlib/backtest/report.py index f217ea169..4f645c564 100644 --- a/qlib/backtest/report.py +++ b/qlib/backtest/report.py @@ -4,6 +4,8 @@ from collections import OrderedDict from logging import warning +from typing import List +from qlib.backtest.order import BaseTradeDecision, Order import pandas as pd import pathlib import warnings @@ -241,13 +243,13 @@ class Indicator: trade_cost = dict() for order, _trade_val, _trade_cost, _trade_price in trade_info: - amount[order.stock_id] = order.amount * (order.direction * 2 - 1) - deal_amount[order.stock_id] = order.deal_amount * (order.direction * 2 - 1) + amount[order.stock_id] = order.amount_delta + deal_amount[order.stock_id] = order.deal_amount_delta trade_price[order.stock_id] = _trade_price - trade_value[order.stock_id] = _trade_val * (order.direction * 2 - 1) + trade_value[order.stock_id] = _trade_val * order.sign trade_cost[order.stock_id] = _trade_cost - self.order_indicator["amount"] = pd.Series(amount) + self.order_indicator["amount"] = self.order_indicator["inner_amount"] = pd.Series(amount) self.order_indicator["deal_amount"] = pd.Series(deal_amount) self.order_indicator["trade_price"] = pd.Series(trade_price) self.order_indicator["trade_value"] = pd.Series(trade_value) @@ -271,13 +273,13 @@ class Indicator: ) / self.order_indicator["base_price"] def _agg_order_trade_info(self, inner_order_indicators): - amount = pd.Series() + inner_amount = pd.Series() deal_amount = pd.Series() trade_price = pd.Series() trade_value = pd.Series() trade_cost = pd.Series() for _order_indicator in inner_order_indicators: - amount = amount.add(_order_indicator["amount"], fill_value=0) + inner_amount = inner_amount.add(_order_indicator["inner_amount"], fill_value=0) deal_amount = deal_amount.add(_order_indicator["deal_amount"], fill_value=0) trade_price = trade_price.add( _order_indicator["trade_price"] * _order_indicator["deal_amount"], fill_value=0 @@ -285,13 +287,21 @@ class Indicator: trade_value = trade_value.add(_order_indicator["trade_value"], fill_value=0) trade_cost = trade_cost.add(_order_indicator["trade_cost"], fill_value=0) - self.order_indicator["amount"] = amount + self.order_indicator["inner_amount"] = inner_amount self.order_indicator["deal_amount"] = deal_amount trade_price /= self.order_indicator["deal_amount"] self.order_indicator["trade_price"] = trade_price self.order_indicator["trade_value"] = trade_value self.order_indicator["trade_cost"] = trade_cost + def _update_trade_amount(self, outer_trade_decision: BaseTradeDecision): + # NOTE: these indicator is designed for order execution, so the + decision: List[Order] = outer_trade_decision.get_decision() + if decision is None: + self.order_indicator["amount"] = pd.Series() + else: + self.order_indicator["amount"] = pd.Series({order.stock_id: order.amount_delta for order in decision}) + def _agg_order_fulfill_rate(self): self.order_indicator["ffr"] = self.order_indicator["deal_amount"] / self.order_indicator["amount"] @@ -367,8 +377,11 @@ class Indicator: self._update_order_fulfill_rate() self._update_order_price_advantage(trade_exchange, trade_start_time, trade_end_time) - def agg_order_indicators(self, inner_order_indicators, indicator_config={}): + def agg_order_indicators( + self, inner_order_indicators, outer_trade_decision: BaseTradeDecision, indicator_config={} + ): self._agg_order_trade_info(inner_order_indicators) + self._update_trade_amount(outer_trade_decision) self._agg_order_fulfill_rate() pa_config = indicator_config.get("pa_config", {}) self._agg_order_price_advantage(inner_order_indicators, base_price=pa_config.get("base_price", "twap")) From 82645233e7cf4efcc9cfecfa3bdc3bf67c10b237 Mon Sep 17 00:00:00 2001 From: v-mingzhehan Date: Tue, 6 Jul 2021 03:50:34 +0000 Subject: [PATCH 18/28] Support order dataframe --- qlib/contrib/strategy/rule_strategy.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/qlib/contrib/strategy/rule_strategy.py b/qlib/contrib/strategy/rule_strategy.py index d18eb2a27..8152b13de 100644 --- a/qlib/contrib/strategy/rule_strategy.py +++ b/qlib/contrib/strategy/rule_strategy.py @@ -714,12 +714,12 @@ class FileOrderStrategy(BaseStrategy): - This class provides an interface for user to read orders from csv files. """ - def __init__(self, file: Union[IO, str, Path], index_range: Tuple[int, int] = None, *args, **kwargs): + def __init__(self, file: Union[IO, str, Path, pd.DataFrame], index_range: Tuple[int, int] = None, *args, **kwargs): """ Parameters ---------- - file : Union[IO, str, Path] + file : Union[IO, str, Path, pd.DataFrame] this parameters will specify the info of expected orders Here is an example of the content @@ -741,8 +741,11 @@ class FileOrderStrategy(BaseStrategy): """ super().__init__(*args, **kwargs) - with get_io_object(file) as f: - self.order_df = pd.read_csv(f, dtype={"datetime": np.str}) + if isinstance(file, pd.DataFrame): + self.order_df = file + else: + with get_io_object(file) as f: + self.order_df = pd.read_csv(f, dtype={"datetime": np.str}) self.order_df["datetime"] = self.order_df["datetime"].apply(pd.Timestamp) self.order_df = self.order_df.set_index(["datetime", "instrument"]) From 354f7e68c2f9065971887c9c35b278215873ba7a Mon Sep 17 00:00:00 2001 From: v-mingzhehan Date: Tue, 6 Jul 2021 08:47:55 +0000 Subject: [PATCH 19/28] Constrain TWAP trade step --- qlib/contrib/strategy/rule_strategy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qlib/contrib/strategy/rule_strategy.py b/qlib/contrib/strategy/rule_strategy.py index 8152b13de..3ca325bf6 100644 --- a/qlib/contrib/strategy/rule_strategy.py +++ b/qlib/contrib/strategy/rule_strategy.py @@ -108,8 +108,8 @@ class TWAPStrategy(BaseStrategy): start_idx, end_idx = get_start_end_idx(self, self.outer_trade_decision) trade_len = end_idx - start_idx + 1 - if trade_step < start_idx: - # It is not time to start trading + if trade_step < start_idx or trade_step > end_idx: + # It is not time to start trading or trading has ended. return TradeDecisionWO(order_list=[], strategy=self) rel_trade_step = trade_step - start_idx # trade_step relative to start_idx From dd8231edebff2dc8108ce28450f507a14263f434 Mon Sep 17 00:00:00 2001 From: Young Date: Tue, 6 Jul 2021 11:09:25 +0000 Subject: [PATCH 20/28] simplify the portfolio-based report --- qlib/backtest/account.py | 52 ++++++++++++++++++++++++++++++--------- qlib/backtest/backtest.py | 8 +++--- qlib/backtest/executor.py | 44 +++++++++++---------------------- qlib/backtest/order.py | 1 - qlib/strategy/base.py | 2 +- 5 files changed, 61 insertions(+), 46 deletions(-) diff --git a/qlib/backtest/account.py b/qlib/backtest/account.py index 0d89dde87..b394d5823 100644 --- a/qlib/backtest/account.py +++ b/qlib/backtest/account.py @@ -64,34 +64,49 @@ class AccumulatedInfo: class Account: def __init__( - self, init_cash: float = 1e9, freq: str = "day", benchmark_config: dict = {}, pos_type: str = "Position" + self, + init_cash: float = 1e9, + freq: str = "day", + benchmark_config: dict = {}, + pos_type: str = "Position", + port_metr_enabled: bool = True, ): - self.pos_type = pos_type + self._pos_type = pos_type + self._port_metr_enabled = port_metr_enabled self.init_vars(init_cash, freq, benchmark_config) + def is_port_metr_enabled(self): + """ + Is portfolio-based metrics enabled. + """ + return self._port_metr_enabled and not self.current.skip_update() + def init_vars(self, init_cash, freq: str, benchmark_config: dict): # init cash self.init_cash = init_cash self.current: BasePosition = init_instance_by_config( { - "class": self.pos_type, + "class": self._pos_type, "kwargs": {"cash": init_cash}, "module_path": "qlib.backtest.position", } ) self.accum_info = AccumulatedInfo() + self.report = None + self.positions = {} self.reset(freq=freq, benchmark_config=benchmark_config, init_report=True) def reset_report(self, freq, benchmark_config): # portfolio related metrics - self.report = Report(freq, benchmark_config) - self.positions = {} + if self.is_port_metr_enabled(): + self.report = Report(freq, benchmark_config) + self.positions = {} # trading related matric(e.g. high-frequency trading) self.indicator = Indicator() - def reset(self, freq=None, benchmark_config=None, init_report=False): + def reset(self, freq=None, benchmark_config=None, init_report=False, port_metr_enabled: bool = None): """reset freq and report of account Parameters @@ -108,6 +123,9 @@ class Account: if benchmark_config is not None: self.benchmark_config = benchmark_config + if port_metr_enabled is not None: + self._port_metr_enabled = port_metr_enabled + if freq is not None or benchmark_config is not None or init_report: self.reset_report(self.freq, self.benchmark_config) @@ -137,7 +155,7 @@ class Account: self.accum_info.add_return_value(profit) # note here do not consider cost def update_order(self, order, trade_val, cost, trade_price): - if self.current.skip_update(): + if not self.is_port_metr_enabled(): # TODO: supporting polymorphism for account # updating order for infinite position is meaningless return @@ -160,12 +178,14 @@ class Account: def update_bar_count(self): """at the end of the trading bar, update holding bar, count of stock""" # update holding day count + # NOTE: updating bar_count does not only serve portfolio metrics, it also serve the strategy if not self.current.skip_update(): self.current.add_count_all(bar=self.freq) def update_current(self, trade_start_time, trade_end_time, trade_exchange): """update current to make rtn consistent with earning at the end of bar""" # update price for stock in the position and the profit from changed_price + # NOTE: updating position does not only serve portfolio metrics, it also serve the strategy if not self.current.skip_update(): stock_list = self.current.get_stock_list() for code in stock_list: @@ -227,7 +247,6 @@ class Account: trade_exchange: Exchange, atomic: bool, outer_trade_decision: BaseTradeDecision, - generate_report: bool = False, trade_info: list = None, inner_order_indicators: Indicator = None, indicator_config: dict = {}, @@ -246,8 +265,6 @@ class Account: whether the trading executor is atomic, which means there is no higher-frequency trading executor inside it - if atomic is True, calculate the indicators with trade_info - else, aggregate indicators with inner indicators - generate_report : bool, optional - whether to generate report, by default False trade_info : List[(Order, float, float, float)], optional trading information, by default None - necessary if atomic is True @@ -267,7 +284,7 @@ class Account: # TODO: `update_bar_count` and `update_current` should placed in Position and be merged. self.update_bar_count() self.update_current(trade_start_time, trade_end_time, trade_exchange) - if generate_report: + if self.is_port_metr_enabled(): # report is portfolio related analysis self.update_report(trade_start_time, trade_end_time) @@ -283,3 +300,16 @@ class Account: self.indicator.cal_trade_indicators(trade_start_time, self.freq, indicator_config) self.indicator.record(trade_start_time) + + def get_report(self): + """get the history report and postions instance""" + if self.is_port_metr_enabled(): + _report = self.report.generate_report_dataframe() + _positions = self.get_positions() + return _report, _positions + else: + raise ValueError("generate_report should be True if you want to generate report") + + def get_trade_indicator(self) -> Indicator: + """get the trade indicator instance, which has pa/pos/ffr info.""" + return self.indicator diff --git a/qlib/backtest/backtest.py b/qlib/backtest/backtest.py index 48d06db6c..573c874b0 100644 --- a/qlib/backtest/backtest.py +++ b/qlib/backtest/backtest.py @@ -69,13 +69,13 @@ def collect_data_loop( all_executors = trade_executor.get_all_executors() all_reports = { - "{}{}".format(*Freq.parse(_executor.time_per_step)): _executor.get_report() + "{}{}".format(*Freq.parse(_executor.time_per_step)): _executor.trade_account.get_report() for _executor in all_executors - if _executor.generate_report + if _executor.trade_account.is_port_metr_enabled() } all_indicators = {} for _executor in all_executors: key = "{}{}".format(*Freq.parse(_executor.time_per_step)) - all_indicators[key] = _executor.get_trade_indicator().generate_trade_indicators_dataframe() - all_indicators[key + "_obj"] = _executor.get_trade_indicator() + all_indicators[key] = _executor.trade_account.get_trade_indicator().generate_trade_indicators_dataframe() + all_indicators[key + "_obj"] = _executor.trade_account.get_trade_indicator() return_value.update({"report": all_reports, "indicator": all_indicators}) diff --git a/qlib/backtest/executor.py b/qlib/backtest/executor.py index 14d97e825..adea9dde0 100644 --- a/qlib/backtest/executor.py +++ b/qlib/backtest/executor.py @@ -103,8 +103,10 @@ class BaseExecutor: self.common_infra.update(common_infra) if common_infra.has("trade_account"): + # NOTE: there is a trick in the code. + # copy is used instead of deepcopy. So positions are shared self.trade_account = copy.copy(common_infra.get("trade_account")) - self.trade_account.reset(freq=self.time_per_step, init_report=True) + self.trade_account.reset(freq=self.time_per_step, init_report=True, port_metr_enabled=self.generate_report) def reset(self, track_data: bool = None, common_infra: CommonInfrastructure = None, **kwargs): """ @@ -167,19 +169,6 @@ class BaseExecutor: yield trade_decision return self.execute(trade_decision) - def get_report(self): - """get the history report and postions instance""" - if self.generate_report: - _report = self.trade_account.report.generate_report_dataframe() - _positions = self.trade_account.get_positions() - return _report, _positions - else: - raise ValueError("generate_report should be True if you want to generate report") - - def get_trade_indicator(self) -> Indicator: - """get the trade indicator instance, which has pa/pos/ffr info.""" - return self.trade_account.indicator - def get_all_executors(self): """get all executors""" return [self] @@ -289,21 +278,19 @@ class NestedExecutor(BaseExecutor): _inner_execute_result = yield from self.inner_executor.collect_data(trade_decision=_inner_trade_decision) execute_result.extend(_inner_execute_result) - inner_order_indicators.append(self.inner_executor.get_trade_indicator().get_order_indicator()) + inner_order_indicators.append(self.inner_executor.trade_account.get_trade_indicator().get_order_indicator()) - if hasattr(self, "trade_account"): - trade_step = self.trade_calendar.get_trade_step() - trade_start_time, trade_end_time = self.trade_calendar.get_step_time(trade_step) - self.trade_account.update_bar_end( - trade_start_time, - trade_end_time, - self.trade_exchange, - atomic=False, - outer_trade_decision=trade_decision, - generate_report=self.generate_report, - inner_order_indicators=inner_order_indicators, - indicator_config=self.indicator_config, - ) + trade_step = self.trade_calendar.get_trade_step() + trade_start_time, trade_end_time = self.trade_calendar.get_step_time(trade_step) + self.trade_account.update_bar_end( + trade_start_time, + trade_end_time, + self.trade_exchange, + atomic=False, + outer_trade_decision=trade_decision, + inner_order_indicators=inner_order_indicators, + indicator_config=self.indicator_config, + ) self.trade_calendar.step() if return_value is not None: @@ -457,7 +444,6 @@ class SimulatorExecutor(BaseExecutor): self.trade_exchange, atomic=True, outer_trade_decision=trade_decision, - generate_report=self.generate_report, trade_info=execute_result, indicator_config=self.indicator_config, ) diff --git a/qlib/backtest/order.py b/qlib/backtest/order.py index 64ff2a56f..535309d91 100644 --- a/qlib/backtest/order.py +++ b/qlib/backtest/order.py @@ -56,7 +56,6 @@ class Order: SELL: ClassVar[OrderDir] = OrderDir.SELL BUY: ClassVar[OrderDir] = OrderDir.BUY - def __post_init__(self): if self.direction not in {Order.SELL, Order.BUY}: raise NotImplementedError("direction not supported, `Order.SELL` for sell, `Order.BUY` for buy") diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py index bac59acfb..a787c098f 100644 --- a/qlib/strategy/base.py +++ b/qlib/strategy/base.py @@ -10,7 +10,7 @@ from ..utils import init_instance_by_config from ..backtest.utils import CommonInfrastructure, LevelInfrastructure, TradeCalendarManager from ..backtest.order import BaseTradeDecision -__all__ = ['BaseStrategy', 'ModelStrategy', 'RLStrategy', 'RLIntStrategy'] +__all__ = ["BaseStrategy", "ModelStrategy", "RLStrategy", "RLIntStrategy"] class BaseStrategy: From 6fd50a5bfa3a20d153bd6b86ec8305a725bef228 Mon Sep 17 00:00:00 2001 From: Young Date: Tue, 6 Jul 2021 12:08:53 +0000 Subject: [PATCH 21/28] Supporting skip empty decisions --- qlib/backtest/executor.py | 44 ++++++++++++++++++++++++++------------- qlib/backtest/order.py | 5 ++++- 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/qlib/backtest/executor.py b/qlib/backtest/executor.py index adea9dde0..c4807ebde 100644 --- a/qlib/backtest/executor.py +++ b/qlib/backtest/executor.py @@ -191,6 +191,7 @@ class NestedExecutor(BaseExecutor): generate_report: bool = False, verbose: bool = False, track_data: bool = False, + skip_empty_decision: bool = True, trade_exchange: Exchange = None, common_infra: CommonInfrastructure = None, **kwargs, @@ -206,6 +207,11 @@ class NestedExecutor(BaseExecutor): exchange that provides market info, used to generate report - If generate_report is None, trade_exchange will be ignored - Else If `trade_exchange` is None, self.trade_exchange will be set with common_infra + skip_empty_decision: bool + Will the executor skip the inner loop when the decision is empty. + It should be False in following cases + - The decisions may be updated by steps + - The inner executor may not follow the decisions from the outer strategy """ self.inner_executor = init_instance_by_config( inner_executor, common_infra=common_infra, accept_types=BaseExecutor @@ -214,6 +220,8 @@ class NestedExecutor(BaseExecutor): inner_strategy, common_infra=common_infra, accept_types=BaseStrategy ) + self._skip_empty_decision = skip_empty_decision + super(NestedExecutor, self).__init__( time_per_step=time_per_step, start_time=start_time, @@ -259,26 +267,32 @@ class NestedExecutor(BaseExecutor): def collect_data(self, trade_decision: BaseTradeDecision, return_value=None): if self.track_data: yield trade_decision - self._init_sub_trading(trade_decision) execute_result = [] inner_order_indicators = [] - _inner_execute_result = None - while not self.inner_executor.finished(): - # outter strategy have chance to update decision each iterator - updated_trade_decision = trade_decision.update(self.inner_executor.trade_calendar) - if updated_trade_decision is not None: - trade_decision = updated_trade_decision - # NEW UPDATE - # create a hook for inner strategy to update outter decision - self.inner_strategy.alter_outer_trade_decision(trade_decision) - _inner_trade_decision = self.inner_strategy.generate_trade_decision(_inner_execute_result) + if not (trade_decision.empty() and self._skip_empty_decision): + _inner_execute_result = None + self._init_sub_trading(trade_decision) + while not self.inner_executor.finished(): + # outter strategy have chance to update decision each iterator + updated_trade_decision = trade_decision.update(self.inner_executor.trade_calendar) + if updated_trade_decision is not None: + trade_decision = updated_trade_decision + # NEW UPDATE + # create a hook for inner strategy to update outter decision + self.inner_strategy.alter_outer_trade_decision(trade_decision) - # NOTE: Trade Calendar will step forward in the follow line - _inner_execute_result = yield from self.inner_executor.collect_data(trade_decision=_inner_trade_decision) + _inner_trade_decision = self.inner_strategy.generate_trade_decision(_inner_execute_result) - execute_result.extend(_inner_execute_result) - inner_order_indicators.append(self.inner_executor.trade_account.get_trade_indicator().get_order_indicator()) + # NOTE: Trade Calendar will step forward in the follow line + _inner_execute_result = yield from self.inner_executor.collect_data( + trade_decision=_inner_trade_decision + ) + + execute_result.extend(_inner_execute_result) + inner_order_indicators.append( + self.inner_executor.trade_account.get_trade_indicator().get_order_indicator() + ) trade_step = self.trade_calendar.get_trade_step() trade_start_time, trade_end_time = self.trade_calendar.get_step_time(trade_step) diff --git a/qlib/backtest/order.py b/qlib/backtest/order.py index 535309d91..1953426fd 100644 --- a/qlib/backtest/order.py +++ b/qlib/backtest/order.py @@ -197,7 +197,7 @@ class BaseTradeDecision: Example: []: Decision not available - concrete_decision: + [concrete_decision]: available """ raise NotImplementedError(f"This type of input is not supported") @@ -236,6 +236,9 @@ class BaseTradeDecision: """ raise NotImplementedError(f"Please implement the `func` method") + def empty(self) -> bool: + return len(self.get_decision()) == 0 + class TradeDecisionWO(BaseTradeDecision): """ From 32ae6e42597bb3f64523d42255c116bcbc1524ab Mon Sep 17 00:00:00 2001 From: Young Date: Thu, 8 Jul 2021 05:54:36 +0000 Subject: [PATCH 22/28] fix calculating base_price --- qlib/backtest/account.py | 12 ++- qlib/backtest/exchange.py | 20 ++--- qlib/backtest/order.py | 5 +- qlib/backtest/report.py | 151 +++++++++++++++++++++++++++----------- 4 files changed, 130 insertions(+), 58 deletions(-) diff --git a/qlib/backtest/account.py b/qlib/backtest/account.py index b394d5823..67f7b056a 100644 --- a/qlib/backtest/account.py +++ b/qlib/backtest/account.py @@ -3,6 +3,7 @@ import copy +from typing import Dict, List from qlib.utils import init_instance_by_config import warnings import pandas as pd @@ -248,7 +249,7 @@ class Account: atomic: bool, outer_trade_decision: BaseTradeDecision, trade_info: list = None, - inner_order_indicators: Indicator = None, + inner_order_indicators: List[Dict[str, pd.Series]] = None, indicator_config: dict = {}, ): """update account at each trading bar step @@ -292,10 +293,15 @@ class Account: self.indicator.clear() if atomic: - self.indicator.update_order_indicators(trade_start_time, trade_end_time, trade_info, trade_exchange) + self.indicator.update_order_indicators(trade_info) else: self.indicator.agg_order_indicators( - inner_order_indicators, indicator_config=indicator_config, outer_trade_decision=outer_trade_decision + trade_start_time, + trade_end_time, + inner_order_indicators, + outer_trade_decision=outer_trade_decision, + trade_exchange=trade_exchange, + indicator_config=indicator_config, ) self.indicator.cal_trade_indicators(trade_start_time, self.freq, indicator_config) diff --git a/qlib/backtest/exchange.py b/qlib/backtest/exchange.py index 26fae378f..3794651dc 100644 --- a/qlib/backtest/exchange.py +++ b/qlib/backtest/exchange.py @@ -281,27 +281,27 @@ class Exchange: return trade_val, trade_cost, trade_price - def get_quote_info(self, stock_id, start_time, end_time): - return resam_ts_data(self.quote[stock_id], start_time, end_time, method=ts_data_last) + def get_quote_info(self, stock_id, start_time, end_time, method=ts_data_last): + return resam_ts_data(self.quote[stock_id], start_time, end_time, method=method) - def get_close(self, stock_id, start_time, end_time): - return resam_ts_data(self.quote[stock_id]["$close"], start_time, end_time, method=ts_data_last) + def get_close(self, stock_id, start_time, end_time, method=ts_data_last): + return resam_ts_data(self.quote[stock_id]["$close"], start_time, end_time, method=method) - def get_volume(self, stock_id, start_time, end_time): - return resam_ts_data(self.quote[stock_id]["$volume"], start_time, end_time, method="sum") + def get_volume(self, stock_id, start_time, end_time, method="sum"): + return resam_ts_data(self.quote[stock_id]["$volume"], start_time, end_time, method=method) - def get_deal_price(self, stock_id, start_time, end_time, direction: OrderDir): + def get_deal_price(self, stock_id, start_time, end_time, direction: OrderDir, method=ts_data_last): if direction == OrderDir.SELL: pstr = self.sell_price elif direction == OrderDir.BUY: pstr = self.buy_price else: raise NotImplementedError(f"This type of input is not supported") - deal_price = resam_ts_data(self.quote[stock_id][pstr], start_time, end_time, method=ts_data_last) - if np.isclose(deal_price, 0.0) or np.isnan(deal_price): + deal_price = resam_ts_data(self.quote[stock_id][pstr], start_time, end_time, method=method) + if method is not None and (np.isclose(deal_price, 0.0) or np.isnan(deal_price)): self.logger.warning(f"(stock_id:{stock_id}, trade_time:{(start_time, end_time)}, {pstr}): {deal_price}!!!") self.logger.warning(f"setting deal_price to close price") - deal_price = self.get_close(stock_id, start_time, end_time) + deal_price = self.get_close(stock_id, start_time, end_time, method) return deal_price def get_factor(self, stock_id, start_time, end_time) -> Union[float, None]: diff --git a/qlib/backtest/order.py b/qlib/backtest/order.py index 1953426fd..20c97aa90 100644 --- a/qlib/backtest/order.py +++ b/qlib/backtest/order.py @@ -93,7 +93,10 @@ class Order: if isinstance(direction, OrderDir): return direction elif isinstance(direction, (int, float, np.integer, np.floating)): - return OrderDir(int(direction)) + if direction > 0: + return Order.BUY + else: + return Order.SELL elif isinstance(direction, str): dl = direction.lower() if dl.strip() == "sell": diff --git a/qlib/backtest/report.py b/qlib/backtest/report.py index ce2812bd0..43a6a455b 100644 --- a/qlib/backtest/report.py +++ b/qlib/backtest/report.py @@ -4,9 +4,11 @@ from collections import OrderedDict from logging import warning -from typing import List -from qlib.backtest.order import BaseTradeDecision, Order +from qlib.backtest.exchange import Exchange +from typing import Dict, List +from qlib.backtest.order import BaseTradeDecision, Order, OrderDir import pandas as pd +import numpy as np import pathlib import warnings from pandas.core import groupby @@ -221,6 +223,33 @@ class Report: class Indicator: + """ + `Indicator` is implemented in a aggregate way. + All the metrics are calculated aggregately. + All the metrics are calculated for a seperated stock and in a specific step on a specific level. + + | indicator | desc. | + |--------------+--------------------------------------------------------------| + | amount | the *target* amount given by the outer strategy | + | inner_amount | the total *target* amount of inner strategy | + | trade_price | the average deal price | + | trade_value | the total trade value | + | trade_cost | the total trade cost (base price need drection) | + | trade_dir | the trading direction | + | ffr | full fill rate | + | pa | price advantage | + | pos | win rate | + | base_price | the price of baseline | + | base_volume | the volume of baseline (for weighted aggregating base_price) | + + **NOTE**: + The `base_price` and `base_volume` can't be NaN when there are not trading on that step. Otherwise + aggregating get wrong results. + + So `base_price` will not be calculated in a aggregate way!! + + """ + def __init__(self): self.order_indicator_his = OrderedDict() self.order_indicator = OrderedDict() @@ -241,6 +270,7 @@ class Indicator: trade_price = dict() trade_value = dict() trade_cost = dict() + trade_dir = dict() for order, _trade_val, _trade_cost, _trade_price in trade_info: amount[order.stock_id] = order.amount_delta @@ -248,36 +278,32 @@ class Indicator: trade_price[order.stock_id] = _trade_price trade_value[order.stock_id] = _trade_val * order.sign trade_cost[order.stock_id] = _trade_cost + trade_dir[order.stock_id] = order.direction self.order_indicator["amount"] = self.order_indicator["inner_amount"] = pd.Series(amount) self.order_indicator["deal_amount"] = pd.Series(deal_amount) + # NOTE: trade_price and baseline price will be same on the lowest-level self.order_indicator["trade_price"] = pd.Series(trade_price) self.order_indicator["trade_value"] = pd.Series(trade_value) self.order_indicator["trade_cost"] = pd.Series(trade_cost) + self.order_indicator["trade_dir"] = pd.Series(trade_dir) def _update_order_fulfill_rate(self): self.order_indicator["ffr"] = self.order_indicator["deal_amount"] / self.order_indicator["amount"] - def _update_order_price_advantage(self, trade_exchange, trade_start_time, trade_end_time): - self.order_indicator["base_price"] = self.order_indicator["trade_price"] - instruments = list(self.order_indicator["base_price"].index) - self.order_indicator["volume"] = pd.Series( - [ - trade_exchange.get_volume(stock_id=inst, start_time=trade_start_time, end_time=trade_end_time) - for inst in instruments - ], - index=instruments, - ) - self.order_indicator["pa"] = ( - self.order_indicator["trade_price"] - self.order_indicator["base_price"] - ) / self.order_indicator["base_price"] + def _update_order_price_advantage(self): + # NOTE: + # trade_price and baseline price will be same on the lowest-level + # So Pa should be 0 + self.order_indicator["pa"] = 0 - def _agg_order_trade_info(self, inner_order_indicators): + def _agg_order_trade_info(self, inner_order_indicators: List[Dict[str, pd.Series]]): inner_amount = pd.Series() deal_amount = pd.Series() trade_price = pd.Series() trade_value = pd.Series() trade_cost = pd.Series() + trade_dir = pd.Series() for _order_indicator in inner_order_indicators: inner_amount = inner_amount.add(_order_indicator["inner_amount"], fill_value=0) deal_amount = deal_amount.add(_order_indicator["deal_amount"], fill_value=0) @@ -286,6 +312,9 @@ class Indicator: ) trade_value = trade_value.add(_order_indicator["trade_value"], fill_value=0) trade_cost = trade_cost.add(_order_indicator["trade_cost"], fill_value=0) + trade_dir = trade_dir.add(_order_indicator["trade_dir"]) + + trade_dir = trade_dir.apply(Order.parse_dir) self.order_indicator["inner_amount"] = inner_amount self.order_indicator["deal_amount"] = deal_amount @@ -293,6 +322,7 @@ class Indicator: self.order_indicator["trade_price"] = trade_price self.order_indicator["trade_value"] = trade_value self.order_indicator["trade_cost"] = trade_cost + self.order_indicator["trade_dir"] = trade_dir def _update_trade_amount(self, outer_trade_decision: BaseTradeDecision): # NOTE: these indicator is designed for order execution, so the @@ -305,34 +335,59 @@ class Indicator: def _agg_order_fulfill_rate(self): self.order_indicator["ffr"] = self.order_indicator["deal_amount"] / self.order_indicator["amount"] - def _agg_order_price_advantage(self, inner_order_indicators, base_price="twap"): - base_price = base_price.lower() - volume = pd.Series() - for _order_indicator in inner_order_indicators: - volume = volume.add(_order_indicator["volume"], fill_value=0) - self.order_indicator["volume"] = volume + def _agg_order_price_advantage( + self, + inner_order_indicators: List[Dict[str, pd.Series]], + trade_start_time: pd.Timestamp, + trade_end_time: pd.Timestamp, + trade_exchange: Exchange, + pa_config: dict = {}, + ): + """ - if base_price == "twap": - base_price = pd.Series() - price_count = pd.Series() - for _order_indicator in inner_order_indicators: - base_price = base_price.add(_order_indicator["base_price"], fill_value=0) - price_count = price_count.add(pd.Series(1, index=_order_indicator["base_price"].index), fill_value=0) - base_price /= price_count - self.order_indicator["base_price"] = base_price + Parameters + ---------- + inner_order_indicators : List[Dict[str, pd.Series]] + the indicators of account of inner executor + trade_start_time : pd.Timestamp + the start_time of the trade period, for slicing + trade_end_time : pd.Timestamp + the end_time of the trade period, for slicing (so it may include more time at the end) + trade_exchange : Exchange + for retrieving trading price + pa_config : dict + For example + { + "agg": "twap", # "vwap" + "price": "$close", # TODO: this is not supported now!!!!! + # default to use deal price of the exchange + } + """ - elif base_price == "vwap": - base_price = pd.Series() - for _order_indicator in inner_order_indicators: - base_price = base_price.add(_order_indicator["base_price"] * _order_indicator["volume"], fill_value=0) - base_price /= self.order_indicator["volume"] - self.order_indicator["base_price"] = base_price + agg = pa_config.get("agg", "twap").lower() + price = pa_config.get("price", "deal_price").lower() - else: - raise ValueError(f"base_price {base_price} is not supported!") + base_price = {} + for inst, dir in self.order_indicator["trade_dir"].items(): - self.order_indicator["pa"] = self.order_indicator["trade_price"] / self.order_indicator["base_price"] - 1 - # print("trade_price", self.order_indicator["trade_price"], "base_price", self.order_indicator["base_price"], "pa", self.order_indicator["pa"]* (2 * (self.order_indicator["amount"] < 0).astype(int) - 1)) + if price == "deal_price": + price_s = trade_exchange.get_deal_price(inst, trade_start_time, trade_end_time, dir, method=None) + else: + raise NotImplementedError(f"This type of input is not supported") + + # there are some zeros in the trading price. These cases are known meaningless + price_s = price_s.mask(np.isclose(price_s, 0)) + + if agg == "vwap": + volume_s = trade_exchange.get_volume(inst, trade_start_time, trade_end_time, method=None) + base_price[inst] = ((price_s * volume_s).sum() / volume_s.sum()).item() + elif agg == "twap": + base_price[inst] = price_s.mean().item() + + base_price = pd.Series(base_price) + + # update PA + self.order_indicator["pa"] = self.order_indicator["trade_price"] / base_price - 1 def _cal_trade_fulfill_rate(self, method="mean"): if method == "mean": @@ -372,19 +427,27 @@ class Indicator: def _cal_trade_order_count(self): return self.order_indicator["amount"].count() - def update_order_indicators(self, trade_start_time, trade_end_time, trade_info, trade_exchange): + def update_order_indicators(self, trade_info: list): self._update_order_trade_info(trade_info=trade_info) self._update_order_fulfill_rate() - self._update_order_price_advantage(trade_exchange, trade_start_time, trade_end_time) + self._update_order_price_advantage() def agg_order_indicators( - self, inner_order_indicators, outer_trade_decision: BaseTradeDecision, indicator_config={} + self, + trade_start_time, + trade_end_time, + inner_order_indicators: List[Dict[str, pd.Series]], + outer_trade_decision: BaseTradeDecision, + trade_exchange: Exchange, + indicator_config={}, ): self._agg_order_trade_info(inner_order_indicators) self._update_trade_amount(outer_trade_decision) self._agg_order_fulfill_rate() pa_config = indicator_config.get("pa_config", {}) - self._agg_order_price_advantage(inner_order_indicators, base_price=pa_config.get("base_price", "twap")) + self._agg_order_price_advantage( + inner_order_indicators, trade_start_time, trade_end_time, trade_exchange, pa_config=pa_config + ) def cal_trade_indicators(self, trade_start_time, freq, indicator_config={}): show_indicator = indicator_config.get("show_indicator", False) From eada8640b9d8f9e81fad9244c692853a62789c8c Mon Sep 17 00:00:00 2001 From: Young Date: Thu, 8 Jul 2021 13:37:20 +0000 Subject: [PATCH 23/28] align range limit --- qlib/backtest/__init__.py | 2 +- qlib/backtest/account.py | 19 +- qlib/backtest/backtest.py | 5 +- qlib/backtest/executor.py | 275 +++++++++++++++---------- qlib/backtest/order.py | 64 ++++-- qlib/backtest/report.py | 200 ++++++++++++------ qlib/backtest/utils.py | 64 +++++- qlib/contrib/strategy/rule_strategy.py | 26 +-- qlib/strategy/base.py | 15 +- 9 files changed, 438 insertions(+), 232 deletions(-) diff --git a/qlib/backtest/__init__.py b/qlib/backtest/__init__.py index fa57e354b..ab3d29408 100644 --- a/qlib/backtest/__init__.py +++ b/qlib/backtest/__init__.py @@ -13,7 +13,7 @@ from .executor import BaseExecutor from .backtest import backtest_loop from .backtest import collect_data_loop from .order import Order -from .utils import CommonInfrastructure, TradeCalendarManager +from .utils import CommonInfrastructure, LevelInfrastructure, TradeCalendarManager from ..utils import init_instance_by_config from ..log import get_module_logger from ..config import C diff --git a/qlib/backtest/account.py b/qlib/backtest/account.py index 67f7b056a..3ef1cdd03 100644 --- a/qlib/backtest/account.py +++ b/qlib/backtest/account.py @@ -3,7 +3,7 @@ import copy -from typing import Dict, List +from typing import Dict, List, Tuple from qlib.utils import init_instance_by_config import warnings import pandas as pd @@ -250,6 +250,7 @@ class Account: outer_trade_decision: BaseTradeDecision, trade_info: list = None, inner_order_indicators: List[Dict[str, pd.Series]] = None, + decision_list: List[Tuple[BaseTradeDecision, pd.Timestamp, pd.Timestamp]] = None, indicator_config: dict = {}, ): """update account at each trading bar step @@ -274,6 +275,9 @@ class Account: indicators of inner executor, by default None - necessary if atomic is False - used to aggregate outer indicators + decision_list: List[Tuple[BaseTradeDecision, pd.Timestamp, pd.Timestamp]] = None, + The decision list of the inner level: List[Tuple[, , ]] + The inner level indicator_config : dict, optional config of calculating indicators, by default {} """ @@ -289,22 +293,27 @@ class Account: # report is portfolio related analysis self.update_report(trade_start_time, trade_end_time) - # indicator is trading (e.g. high-frequency order execution) related analysis - self.indicator.clear() + # TODO: will skip empty decisions make it faster? `outer_trade_decision.empty():` + # indicator is trading (e.g. high-frequency order execution) related analysis + self.indicator.reset() + + # aggregate the information for each order if atomic: self.indicator.update_order_indicators(trade_info) else: self.indicator.agg_order_indicators( - trade_start_time, - trade_end_time, inner_order_indicators, + decision_list=decision_list, outer_trade_decision=outer_trade_decision, trade_exchange=trade_exchange, indicator_config=indicator_config, ) + # aggregate all the order metrics a single step self.indicator.cal_trade_indicators(trade_start_time, self.freq, indicator_config) + + # record the metrics self.indicator.record(trade_start_time) def get_report(self): diff --git a/qlib/backtest/backtest.py b/qlib/backtest/backtest.py index 573c874b0..89b8c7830 100644 --- a/qlib/backtest/backtest.py +++ b/qlib/backtest/backtest.py @@ -55,14 +55,13 @@ def collect_data_loop( trade decision """ trade_executor.reset(start_time=start_time, end_time=end_time) - level_infra = trade_executor.get_level_infra() - trade_strategy.reset(level_infra=level_infra) + trade_strategy.reset(level_infra=trade_executor.get_level_infra()) with tqdm(total=trade_executor.trade_calendar.get_trade_len(), desc="backtest loop") as bar: _execute_result = None while not trade_executor.finished(): _trade_decision: BaseTradeDecision = trade_strategy.generate_trade_decision(_execute_result) - _execute_result = yield from trade_executor.collect_data(_trade_decision) + _execute_result = yield from trade_executor.collect_data(_trade_decision, level=0) bar.update(1) if return_value is not None: diff --git a/qlib/backtest/executor.py b/qlib/backtest/executor.py index c4807ebde..b99380c54 100644 --- a/qlib/backtest/executor.py +++ b/qlib/backtest/executor.py @@ -1,13 +1,16 @@ +from abc import abstractclassmethod, abstractmethod import copy +from types import GeneratorType +from qlib.backtest.account import Account import warnings import pandas as pd -from typing import List, Union +from typing import List, Tuple, Union from qlib.backtest.report import Indicator -from .order import Order, BaseTradeDecision +from .order import EmptyTradeDecision, Order, BaseTradeDecision from .exchange import Exchange -from .utils import TradeCalendarManager, CommonInfrastructure, LevelInfrastructure +from .utils import TradeCalendarManager, CommonInfrastructure, LevelInfrastructure, get_start_end_idx from ..utils import init_instance_by_config from ..utils.time import Freq @@ -26,6 +29,7 @@ class BaseExecutor: generate_report: bool = False, verbose: bool = False, track_data: bool = False, + trade_exchange: Exchange = None, common_infra: CommonInfrastructure = None, **kwargs, ): @@ -62,8 +66,8 @@ class BaseExecutor: { 'show_indicator': True, 'pa_config': { - 'base_value': 'twap', - 'weight_method': 'value_weighted', + "agg": "twap", # "vwap" + "price": "$close", # default to use deal price of the exchange }, 'ffr_config':{ 'weight_method': 'value_weighted', @@ -77,6 +81,12 @@ class BaseExecutor: whether to generate trade_decision, will be used when training rl agent - If `self.track_data` is true, when making data for training, the input `trade_decision` of `execute` will be generated by `collect_data` - Else, `trade_decision` will not be generated + + trade_exchange : Exchange + exchange that provides market info, used to generate report + - If generate_report is None, trade_exchange will be ignored + - Else If `trade_exchange` is None, self.trade_exchange will be set with common_infra + common_infra : CommonInfrastructure, optional: common infrastructure for backtesting, may including: - trade_account : Account, optional @@ -90,7 +100,9 @@ class BaseExecutor: self.generate_report = generate_report self.verbose = verbose self.track_data = track_data - self.reset(start_time=start_time, end_time=end_time, track_data=track_data, common_infra=common_infra) + self._trade_exchange = trade_exchange + self.level_infra = LevelInfrastructure() + self.reset(start_time=start_time, end_time=end_time, common_infra=common_infra) def reset_common_infra(self, common_infra): """ @@ -105,60 +117,106 @@ class BaseExecutor: if common_infra.has("trade_account"): # NOTE: there is a trick in the code. # copy is used instead of deepcopy. So positions are shared - self.trade_account = copy.copy(common_infra.get("trade_account")) + self.trade_account: Account = copy.copy(common_infra.get("trade_account")) self.trade_account.reset(freq=self.time_per_step, init_report=True, port_metr_enabled=self.generate_report) - def reset(self, track_data: bool = None, common_infra: CommonInfrastructure = None, **kwargs): + @property + def trade_exchange(self) -> Exchange: + """get trade exchange in a prioritized order""" + return getattr(self, "_trade_exchange", None) or self.common_infra.get("trade_exchange") + + @property + def trade_calendar(self) -> TradeCalendarManager: + """ + Though trade calendar can be accessed from multiple sources, but managing in a centralized way will make the + code easier + """ + return self.level_infra.get("trade_calendar") + + def reset(self, common_infra: CommonInfrastructure = None, **kwargs): """ - reset `start_time` and `end_time`, used in trade calendar - - reset `track_data`, used when making data for multi-level training - reset `common_infra`, used to reset `trade_account`, `trade_exchange`, .etc """ - if track_data is not None: - self.track_data = track_data - if "start_time" in kwargs or "end_time" in kwargs: start_time = kwargs.get("start_time") end_time = kwargs.get("end_time") - self.trade_calendar = TradeCalendarManager( - freq=self.time_per_step, start_time=start_time, end_time=end_time - ) - + self.level_infra.reset_cal(freq=self.time_per_step, start_time=start_time, end_time=end_time) if common_infra is not None: self.reset_common_infra(common_infra) def get_level_infra(self): - return LevelInfrastructure(trade_calendar=self.trade_calendar) + return self.level_infra def finished(self): return self.trade_calendar.finished() - def execute(self, trade_decision): + def execute(self, trade_decision: BaseTradeDecision, level: int = 0): """execute the trade decision and return the executed result + NOTE: this function is never used directly in the framework. Should we delete it? + Parameters ---------- trade_decision : BaseTradeDecision + level : int + the level of current executor + Returns ---------- execute_result : List[object] the executed result for trade decision """ - raise NotImplementedError("execute is not implemented!") + return_value = {} + for _decision in self.collect_data(trade_decision, return_value=return_value, level=level): + pass + return return_value.get("execute_result") - def collect_data(self, trade_decision): + @abstractclassmethod + def _collect_data(self, trade_decision: BaseTradeDecision, level: int = 0) -> Tuple[List[object], dict]: + """ + Please refer to the doc of collect_data + The only difference between `_collect_data` and `collect_data` is that some common steps are moved into + collect_data + + Parameters + ---------- + Please refer to the doc of collect_data + + + Returns + ------- + Tuple[List[object], dict]: + (, ) + """ + + def collect_data( + self, trade_decision: BaseTradeDecision, return_value: dict = None, level: int = 0 + ) -> List[object]: """Generator for collecting the trade decision data for rl training + his function will make a step forward + Parameters ---------- trade_decision : BaseTradeDecision + level : int + the level of current executor. 0 indicates the top level + + return_value : dict + the mem address to return the value + e.g. {"return_value": } + Returns ---------- execute_result : List[object] - the executed result for trade decision + the executed result for trade decision. + ** NOTE!!!! **: + 1) This is necessary, The return value of geenrator will be used in NestedExecutor + 2) Please note the executed results are not merged. Yields ------- @@ -167,7 +225,36 @@ class BaseExecutor: """ if self.track_data: yield trade_decision - return self.execute(trade_decision) + + atomic = not issubclass(self.__class__, NestedExecutor) # issubclass(A, A) is True + + if atomic and trade_decision.get_range_limit(default_value=None) is not None: + raise ValueError("atomic executor doesn't support specify `range_limit`") + + obj = self._collect_data(trade_decision=trade_decision, level=level) + + if isinstance(obj, GeneratorType): + res, kwargs = yield from obj + else: + # Some concrete executor don't have inner decisions + res, kwargs = obj + + trade_start_time, trade_end_time = self.trade_calendar.get_cur_step_time() + # Account will not be changed in this function + self.trade_account.update_bar_end( + trade_start_time, + trade_end_time, + self.trade_exchange, + atomic=atomic, + outer_trade_decision=trade_decision, + indicator_config=self.indicator_config, + **kwargs, + ) + + self.trade_calendar.step() + if return_value is not None: + return_value.update({"execute_result": res}) + return res def get_all_executors(self): """get all executors""" @@ -192,7 +279,7 @@ class NestedExecutor(BaseExecutor): verbose: bool = False, track_data: bool = False, skip_empty_decision: bool = True, - trade_exchange: Exchange = None, + align_range_limit: bool = True, common_infra: CommonInfrastructure = None, **kwargs, ): @@ -203,24 +290,24 @@ class NestedExecutor(BaseExecutor): trading env in each trading bar. inner_strategy : BaseStrategy trading strategy in each trading bar - trade_exchange : Exchange - exchange that provides market info, used to generate report - - If generate_report is None, trade_exchange will be ignored - - Else If `trade_exchange` is None, self.trade_exchange will be set with common_infra skip_empty_decision: bool - Will the executor skip the inner loop when the decision is empty. + Will the executor skip call inner loop when the decision is empty. It should be False in following cases - The decisions may be updated by steps - The inner executor may not follow the decisions from the outer strategy + align_range_limit: bool + force to align the index_range decision + It is only for nested executor, because range_limit is given by outer strategy """ - self.inner_executor = init_instance_by_config( + self.inner_executor: BaseExecutor = init_instance_by_config( inner_executor, common_infra=common_infra, accept_types=BaseExecutor ) - self.inner_strategy = init_instance_by_config( + self.inner_strategy: BaseStrategy = init_instance_by_config( inner_strategy, common_infra=common_infra, accept_types=BaseStrategy ) self._skip_empty_decision = skip_empty_decision + self._align_range_limit = align_range_limit super(NestedExecutor, self).__init__( time_per_step=time_per_step, @@ -234,82 +321,82 @@ class NestedExecutor(BaseExecutor): **kwargs, ) - if trade_exchange is not None: - self.trade_exchange = trade_exchange - def reset_common_infra(self, common_infra): """ reset infrastructure for trading - - reset trade_exchange - reset inner_strategyand inner_executor common infra """ super(NestedExecutor, self).reset_common_infra(common_infra) - if common_infra.has("trade_exchange"): - self.trade_exchange = common_infra.get("trade_exchange") - self.inner_executor.reset_common_infra(common_infra) self.inner_strategy.reset_common_infra(common_infra) def _init_sub_trading(self, trade_decision): - trade_step = self.trade_calendar.get_trade_step() - trade_start_time, trade_end_time = self.trade_calendar.get_step_time(trade_step) + trade_start_time, trade_end_time = self.trade_calendar.get_cur_step_time() self.inner_executor.reset(start_time=trade_start_time, end_time=trade_end_time) sub_level_infra = self.inner_executor.get_level_infra() + self.level_infra.set_sub_level_infra(sub_level_infra) self.inner_strategy.reset(level_infra=sub_level_infra, outer_trade_decision=trade_decision) - def execute(self, trade_decision): - return_value = {} - for _decision in self.collect_data(trade_decision, return_value): - pass - return return_value.get("execute_result") + def _update_trade_decision(self, trade_decision: BaseTradeDecision) -> BaseTradeDecision: + # outter strategy have chance to update decision each iterator + updated_trade_decision = trade_decision.update(self.inner_executor.trade_calendar) + if updated_trade_decision is not None: + trade_decision = updated_trade_decision + # NEW UPDATE + # create a hook for inner strategy to update outter decision + self.inner_strategy.alter_outer_trade_decision(trade_decision) + return trade_decision - def collect_data(self, trade_decision: BaseTradeDecision, return_value=None): - if self.track_data: - yield trade_decision + # def _get_inner_trade_decision(self, outer_trade_decision: BaseTradeDecision, inner_execute_result): + # # In some cases, the inner strategy can be skipped, but the inner executor should keep running + # if outer_trade_decision.empty() and self._skip_empty_decision: + # return EmptyTradeDecision(self.inner_strategy) + # return self.inner_strategy.generate_trade_decision(inner_execute_result) + # _inner_trade_decision = self._get_inner_trade_decision(trade_decision, _inner_execute_result) + + def _collect_data(self, trade_decision: BaseTradeDecision, level: int = 0): execute_result = [] inner_order_indicators = [] + decision_list = [] + # NOTE: + # - this is necessary to calculating the steps in sub level + # - more detailed information will be set into trade decision + self._init_sub_trading(trade_decision) - if not (trade_decision.empty() and self._skip_empty_decision): - _inner_execute_result = None - self._init_sub_trading(trade_decision) - while not self.inner_executor.finished(): - # outter strategy have chance to update decision each iterator - updated_trade_decision = trade_decision.update(self.inner_executor.trade_calendar) - if updated_trade_decision is not None: - trade_decision = updated_trade_decision - # NEW UPDATE - # create a hook for inner strategy to update outter decision - self.inner_strategy.alter_outer_trade_decision(trade_decision) + _inner_execute_result = None + while not self.inner_executor.finished(): + trade_decision = self._update_trade_decision(trade_decision) + + if trade_decision.empty() and self._skip_empty_decision: + # give one chance for outer stategy to update the strategy + # - For updating some information in the sub executor(the strategy have no knowledge of the inner + # executor when generating the decision) + break + + sub_cal: TradeCalendarManager = self.inner_executor.trade_calendar + start_idx, end_idx = get_start_end_idx(sub_cal, trade_decision) + if not self._align_range_limit or start_idx <= sub_cal.get_trade_step() <= end_idx: + # if force align the range limit, skip the steps outside the decision range limit _inner_trade_decision = self.inner_strategy.generate_trade_decision(_inner_execute_result) + # NOTE sub_cal.get_cur_step_time() must be called before collect_data in case of step shifting + decision_list.append((_inner_trade_decision, *sub_cal.get_cur_step_time())) # NOTE: Trade Calendar will step forward in the follow line _inner_execute_result = yield from self.inner_executor.collect_data( - trade_decision=_inner_trade_decision + trade_decision=_inner_trade_decision, level=level + 1 ) - execute_result.extend(_inner_execute_result) + inner_order_indicators.append( self.inner_executor.trade_account.get_trade_indicator().get_order_indicator() ) + else: + # do nothing and just step forward + sub_cal.step() - trade_step = self.trade_calendar.get_trade_step() - trade_start_time, trade_end_time = self.trade_calendar.get_step_time(trade_step) - self.trade_account.update_bar_end( - trade_start_time, - trade_end_time, - self.trade_exchange, - atomic=False, - outer_trade_decision=trade_decision, - inner_order_indicators=inner_order_indicators, - indicator_config=self.indicator_config, - ) - - self.trade_calendar.step() - if return_value is not None: - return_value.update({"execute_result": execute_result}) - return execute_result + return execute_result, {"inner_order_indicators": inner_order_indicators, "decision_list": decision_list} def get_all_executors(self): """get all executors, including self and inner_executor.get_all_executors()""" @@ -337,17 +424,13 @@ class SimulatorExecutor(BaseExecutor): generate_report: bool = False, verbose: bool = False, track_data: bool = False, - trade_exchange: Exchange = None, common_infra: CommonInfrastructure = None, - trade_type: str = TT_PARAL, + trade_type: str = TT_SERIAL, **kwargs, ): """ Parameters ---------- - trade_exchange : Exchange - exchange that provides market info, used to deal order and generate report - - If `trade_exchange` is None, self.trade_exchange will be set with common_infra trade_type: str please refer to the doc of `TT_SERIAL` & `TT_PARAL` """ @@ -362,20 +445,9 @@ class SimulatorExecutor(BaseExecutor): common_infra=common_infra, **kwargs, ) - if trade_exchange is not None: - self.trade_exchange = trade_exchange self.trade_type = trade_type - def reset_common_infra(self, common_infra): - """ - reset infrastructure for trading - - reset trade_exchange - """ - super(SimulatorExecutor, self).reset_common_infra(common_infra) - if common_infra.has("trade_exchange"): - self.trade_exchange = common_infra.get("trade_exchange") - def _get_order_iterator(self, trade_decision: BaseTradeDecision) -> List[Order]: """ @@ -405,10 +477,9 @@ class SimulatorExecutor(BaseExecutor): raise NotImplementedError(f"This type of input is not supported") return order_it - def execute(self, trade_decision: BaseTradeDecision): + def _collect_data(self, trade_decision: BaseTradeDecision, level: int = 0): - trade_step = self.trade_calendar.get_trade_step() - trade_start_time, trade_end_time = self.trade_calendar.get_step_time(trade_step) + trade_start_time, _ = self.trade_calendar.get_cur_step_time() execute_result = [] for order in self._get_order_iterator(trade_decision): @@ -450,16 +521,4 @@ class SimulatorExecutor(BaseExecutor): print("[W {:%Y-%m-%d %H:%M:%S}]: {} wrong.".format(trade_start_time, order.stock_id)) # do nothing pass - - # Account will not be changed in this function - self.trade_account.update_bar_end( - trade_start_time, - trade_end_time, - self.trade_exchange, - atomic=True, - outer_trade_decision=trade_decision, - trade_info=execute_result, - indicator_config=self.indicator_config, - ) - self.trade_calendar.step() - return execute_result + return execute_result, {"trade_info": execute_result} diff --git a/qlib/backtest/order.py b/qlib/backtest/order.py index 20c97aa90..1a88ded93 100644 --- a/qlib/backtest/order.py +++ b/qlib/backtest/order.py @@ -3,6 +3,7 @@ # TODO: rename it with decision.py from __future__ import annotations from enum import IntEnum +from qlib.log import get_module_logger # try to fix circular imports when enabling type hints from typing import TYPE_CHECKING @@ -179,7 +180,7 @@ class BaseTradeDecision: 2. Same as `case 1.3` """ - def __init__(self, strategy: BaseStrategy): + def __init__(self, strategy: BaseStrategy, idx_range: Tuple[int, int] = None): """ Parameters ---------- @@ -187,6 +188,8 @@ class BaseTradeDecision: The strategy who make the decision """ self.strategy = strategy + self.total_step = None # upper strategy has no knowledge about the sub executor before `_init_sub_trading` + self.idx_range = idx_range def get_decision(self) -> List[object]: """ @@ -207,7 +210,11 @@ class BaseTradeDecision: def update(self, trade_calendar: TradeCalendarManager) -> Union["BaseTradeDecision", None]: """ - Be called at the **start** of each step + Be called at the **start** of each step. + + This function is designn for following purpose + 1) Leave a hook for the strategy who make `self` decision to update the decision itself + 2) Update some information from the inner executor calendar Parameters ---------- @@ -221,13 +228,27 @@ class BaseTradeDecision: BaseTradeDecision: New update, use new decision """ + # purpose 1) + self.total_step = trade_calendar.get_trade_len() + if self.idx_range is not None: + logger = get_module_logger("decision") + start_idx, end_idx = self.idx_range + if start_idx < 0 or end_idx >= self.total_step: + logger.warning(f"{self.idx_range} go beyound the total_step({self.total_step}), it will be clipped") + self.idx_range = max(0, start_idx), min(self.total_step - 1, end_idx) + + # purpose 2) return self.strategy.update_trade_decision(self, trade_calendar) - def get_range_limit(self) -> Tuple[int, int]: + def get_range_limit(self, **kwargs) -> Tuple[int, int]: """ return the expected step range for limiting the decision execution time Both left and right are **closed** + **kwargs: + {"default_value": } + # using dict is for distinguish no value provided or None provided + Returns ------- Tuple[int, int]: @@ -235,12 +256,32 @@ class BaseTradeDecision: Raises ------ NotImplementedError: - If the decision can't provide a unified start and end + If the following criteria meet + 1) the decision can't provide a unified start and end + 2) default_value is None """ - raise NotImplementedError(f"Please implement the `func` method") + if self.idx_range is None: + if "default_value" in kwargs: + return kwargs["default_value"] + else: + # Default to get full index + raise NotImplementedError(f"The decision didn't provide an index range") + return self.idx_range def empty(self) -> bool: - return len(self.get_decision()) == 0 + for obj in self.get_decision(): + if isinstance(obj, Order): + # Zero amount order will be treated as empty + if not np.isclose(obj.amount, 0.0): + return False + else: + return True + return True + + +class EmptyTradeDecision(BaseTradeDecision): + def empty(self) -> bool: + return True class TradeDecisionWO(BaseTradeDecision): @@ -249,16 +290,9 @@ class TradeDecisionWO(BaseTradeDecision): Besides, the time_range is also included. """ - def __init__(self, order_list: List[Order], strategy: BaseStrategy, idx_range: Tuple = None): - super().__init__(strategy) + def __init__(self, order_list: List[Order], strategy: BaseStrategy, idx_range: Tuple[int, int] = None): + super().__init__(strategy, idx_range=idx_range) self.order_list = order_list - self.idx_range = idx_range - - def get_range_limit(self) -> Tuple[int, int]: - if self.idx_range is None: - # Default to get full index - raise NotImplementedError(f"The decision didn't provide an index range") - return self.idx_range def get_decision(self) -> List[object]: return self.order_list diff --git a/qlib/backtest/report.py b/qlib/backtest/report.py index 43a6a455b..138a44faa 100644 --- a/qlib/backtest/report.py +++ b/qlib/backtest/report.py @@ -4,21 +4,23 @@ from collections import OrderedDict from logging import warning -from qlib.backtest.exchange import Exchange -from typing import Dict, List -from qlib.backtest.order import BaseTradeDecision, Order, OrderDir -import pandas as pd -import numpy as np import pathlib +from typing import Dict, List, Tuple import warnings -from pandas.core import groupby +import numpy as np +import pandas as pd +from pandas.core import groupby from pandas.core.frame import DataFrame -from ..utils.time import Freq -from ..utils.resam import resam_ts_data, get_higher_eq_freq_feature +from qlib.backtest.exchange import Exchange +from qlib.backtest.order import BaseTradeDecision, Order, OrderDir +from qlib.backtest.utils import TradeCalendarManager + from ..data import D from ..tests.config import CSI300_BENCH +from ..utils.resam import get_higher_eq_freq_feature, resam_ts_data +from ..utils.time import Freq class Report: @@ -251,14 +253,21 @@ class Indicator: """ def __init__(self): + # order indicator is metrics for a single order for a specific step self.order_indicator_his = OrderedDict() - self.order_indicator = OrderedDict() - self.trade_indicator_his = OrderedDict() - self.trade_indicator = OrderedDict() + self.order_indicator: Dict[str, pd.Series] = OrderedDict() - def clear(self): + # trade indicator is metrics for all orders for a specific step + self.trade_indicator_his = OrderedDict() + self.trade_indicator: Dict[str, float] = OrderedDict() + + self._trade_calendar = None + + # def reset(self, trade_calendar: TradeCalendarManager): + def reset(self): self.order_indicator = OrderedDict() self.trade_indicator = OrderedDict() + # self._trade_calendar = trade_calendar def record(self, trade_start_time): self.order_indicator_his[trade_start_time] = self.order_indicator @@ -294,9 +303,14 @@ class Indicator: def _update_order_price_advantage(self): # NOTE: # trade_price and baseline price will be same on the lowest-level - # So Pa should be 0 + # So Pa should be 0 or do nothing self.order_indicator["pa"] = 0 + def update_order_indicators(self, trade_info: list): + self._update_order_trade_info(trade_info=trade_info) + self._update_order_fulfill_rate() + self._update_order_price_advantage() + def _agg_order_trade_info(self, inner_order_indicators: List[Dict[str, pd.Series]]): inner_amount = pd.Series() deal_amount = pd.Series() @@ -312,7 +326,7 @@ class Indicator: ) trade_value = trade_value.add(_order_indicator["trade_value"], fill_value=0) trade_cost = trade_cost.add(_order_indicator["trade_cost"], fill_value=0) - trade_dir = trade_dir.add(_order_indicator["trade_dir"]) + trade_dir = trade_dir.add(_order_indicator["trade_dir"], fill_value=0) trade_dir = trade_dir.apply(Order.parse_dir) @@ -335,24 +349,77 @@ class Indicator: def _agg_order_fulfill_rate(self): self.order_indicator["ffr"] = self.order_indicator["deal_amount"] / self.order_indicator["amount"] - def _agg_order_price_advantage( + def _get_base_vol_pri( self, - inner_order_indicators: List[Dict[str, pd.Series]], + inst: str, trade_start_time: pd.Timestamp, trade_end_time: pd.Timestamp, + direction: OrderDir, + decision: BaseTradeDecision, + trade_exchange: Exchange, + pa_config: dict = {}, + ): + """Get the base volume and price information""" + + agg = pa_config.get("agg", "twap").lower() + price = pa_config.get("price", "deal_price").lower() + + if price == "deal_price": + price_s = trade_exchange.get_deal_price( + inst, trade_start_time, trade_end_time, direction=direction, method=None + ) + else: + raise NotImplementedError(f"This type of input is not supported") + + # NOTE: there are some zeros in the trading price. These cases are known meaningless + # for aligning the previous logic, remove it. + # price_s = price_s.mask(np.isclose(price_s, 0)) + + if agg == "vwap": + volume_s = trade_exchange.get_volume(inst, trade_start_time, trade_end_time, method=None) + elif agg == "twap": + volume_s = pd.Series(1, index=price_s.index) + else: + raise NotImplementedError(f"This type of input is not supported") + + # no sub executor on the lowest level + # So range_limit an total step will all be None + total_step = decision.total_step + if total_step is None: + total_step = 1 + range_limit = decision.get_range_limit(default_value=(0, total_step - 1)) + + assert volume_s.shape[0] % total_step == 0, "The price series can't be divided by step length" + factor = volume_s.shape[0] // total_step + + slc = slice(range_limit[0] * factor, (range_limit[1] + 1) * factor) + + volume_s = volume_s.iloc[slc] + price_s = price_s.iloc[slc] + + base_volume = volume_s.sum().item() + base_price = ((price_s * volume_s).sum() / base_volume).item() + + return base_price, base_volume + + def _agg_base_price( + self, + inner_order_indicators: List[Dict[str, pd.Series]], + decision_list: List[Tuple[BaseTradeDecision, pd.Timestamp, pd.Timestamp]], trade_exchange: Exchange, pa_config: dict = {}, ): """ + # NOTE:!!!! + # Strong assumption!!!!!! + # the correctness of the base_price relies on that the **same** exchange is used Parameters ---------- inner_order_indicators : List[Dict[str, pd.Series]] the indicators of account of inner executor - trade_start_time : pd.Timestamp - the start_time of the trade period, for slicing - trade_end_time : pd.Timestamp - the end_time of the trade period, for slicing (so it may include more time at the end) + decision_list: List[Tuple[BaseTradeDecision, pd.Timestamp, pd.Timestamp]], + a list of decisions according to inner_order_indicators trade_exchange : Exchange for retrieving trading price pa_config : dict @@ -362,32 +429,61 @@ class Indicator: "price": "$close", # TODO: this is not supported now!!!!! # default to use deal price of the exchange } + """ - agg = pa_config.get("agg", "twap").lower() - price = pa_config.get("price", "deal_price").lower() + # TODO: I think there are potentials to be optimized + trade_dir = self.order_indicator["trade_dir"] + if len(trade_dir) > 0: + bp_all, bv_all = [], [] + # + for oi, (dec, start, end) in zip(inner_order_indicators, decision_list): + bp_s = oi.get("base_price", pd.Series()).reindex(trade_dir.index) + bv_s = oi.get("base_volume", pd.Series()).reindex(trade_dir.index) + bp_new, bv_new = {}, {} + for pr, v, (inst, direction) in zip(bp_s.values, bv_s.values, trade_dir.items()): + if np.isnan(pr): + bp_new[inst], bv_new[inst] = self._get_base_vol_pri( + inst, + start, + end, + decision=dec, + direction=direction, + trade_exchange=trade_exchange, + pa_config=pa_config, + ) + else: + bp_new[inst], bv_new[inst] = pr, v - base_price = {} - for inst, dir in self.order_indicator["trade_dir"].items(): + bp_new, bv_new = pd.Series(bp_new), pd.Series(bv_new) + bp_all.append(bp_new) + bv_all.append(bv_new) + bp_all = pd.concat(bp_all, axis=1) + bv_all = pd.concat(bv_all, axis=1) - if price == "deal_price": - price_s = trade_exchange.get_deal_price(inst, trade_start_time, trade_end_time, dir, method=None) - else: - raise NotImplementedError(f"This type of input is not supported") + self.order_indicator["base_volume"] = bv_all.sum(axis=1) + self.order_indicator["base_price"] = (bp_all * bv_all).sum(axis=1) / self.order_indicator["base_volume"] - # there are some zeros in the trading price. These cases are known meaningless - price_s = price_s.mask(np.isclose(price_s, 0)) + def _agg_order_price_advantage(self): + if not self.order_indicator["trade_price"].empty: + self.order_indicator["pa"] = self.order_indicator["trade_price"] / self.order_indicator["base_price"] - 1 + else: + self.order_indicator["pa"] = pd.Series() - if agg == "vwap": - volume_s = trade_exchange.get_volume(inst, trade_start_time, trade_end_time, method=None) - base_price[inst] = ((price_s * volume_s).sum() / volume_s.sum()).item() - elif agg == "twap": - base_price[inst] = price_s.mean().item() - - base_price = pd.Series(base_price) - - # update PA - self.order_indicator["pa"] = self.order_indicator["trade_price"] / base_price - 1 + def agg_order_indicators( + self, + inner_order_indicators: List[Dict[str, pd.Series]], + decision_list: List[Tuple[BaseTradeDecision, pd.Timestamp, pd.Timestamp]], + outer_trade_decision: BaseTradeDecision, + trade_exchange: Exchange, + indicator_config={}, + ): + self._agg_order_trade_info(inner_order_indicators) + self._update_trade_amount(outer_trade_decision) + self._agg_order_fulfill_rate() + pa_config = indicator_config.get("pa_config", {}) + self._agg_base_price(inner_order_indicators, decision_list, trade_exchange, pa_config=pa_config) + self._agg_order_price_advantage() def _cal_trade_fulfill_rate(self, method="mean"): if method == "mean": @@ -402,7 +498,7 @@ class Indicator: raise ValueError(f"method {method} is not supported!") def _cal_trade_price_advantage(self, method="mean"): - pa_order = self.order_indicator["pa"] * (2 * (self.order_indicator["amount"] < 0).astype(int) - 1) + pa_order = self.order_indicator["pa"] * (1 - self.order_indicator["trade_dir"] * 2) if method == "mean": return pa_order.mean() elif method == "amount_weighted": @@ -427,28 +523,6 @@ class Indicator: def _cal_trade_order_count(self): return self.order_indicator["amount"].count() - def update_order_indicators(self, trade_info: list): - self._update_order_trade_info(trade_info=trade_info) - self._update_order_fulfill_rate() - self._update_order_price_advantage() - - def agg_order_indicators( - self, - trade_start_time, - trade_end_time, - inner_order_indicators: List[Dict[str, pd.Series]], - outer_trade_decision: BaseTradeDecision, - trade_exchange: Exchange, - indicator_config={}, - ): - self._agg_order_trade_info(inner_order_indicators) - self._update_trade_amount(outer_trade_decision) - self._agg_order_fulfill_rate() - pa_config = indicator_config.get("pa_config", {}) - self._agg_order_price_advantage( - inner_order_indicators, trade_start_time, trade_end_time, trade_exchange, pa_config=pa_config - ) - def cal_trade_indicators(self, trade_start_time, freq, indicator_config={}): show_indicator = indicator_config.get("show_indicator", False) ffr_config = indicator_config.get("ffr_config", {}) diff --git a/qlib/backtest/utils.py b/qlib/backtest/utils.py index 0ba607bdb..5c643df30 100644 --- a/qlib/backtest/utils.py +++ b/qlib/backtest/utils.py @@ -1,9 +1,14 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. +from __future__ import annotations +from typing import Union, TYPE_CHECKING, Tuple, Union, List, Set + +if TYPE_CHECKING: + from qlib.backtest.order import BaseTradeDecision + from qlib.strategy.base import BaseStrategy import pandas as pd import warnings -from typing import Tuple, Union, List, Set from ..utils.resam import get_resam_calendar from ..data.data import Cal @@ -30,17 +35,20 @@ class TradeCalendarManager: closed end of the trade time range, by default None If `end_time` is None, it must be reset before trading. """ - self.freq = freq - self.start_time = pd.Timestamp(start_time) if start_time else None - self.end_time = pd.Timestamp(end_time) if end_time else None - self._init_trade_calendar(freq=freq, start_time=start_time, end_time=end_time) + self.reset(freq=freq, start_time=start_time, end_time=end_time) - def _init_trade_calendar(self, freq, start_time, end_time): + def reset(self, freq, start_time, end_time): """ + Please refer to the docs of `__init__` + Reset the trade calendar - self.trade_len : The total count for trading step - self.trade_step : The number of trading step finished, self.trade_step can be [0, 1, 2, ..., self.trade_len - 1] """ + self.freq = freq + self.start_time = pd.Timestamp(start_time) if start_time else None + self.end_time = pd.Timestamp(end_time) if end_time else None + _calendar, freq, freq_sam = get_resam_calendar(freq=freq) self._calendar = _calendar _, _, _start_index, _end_index = Cal.locate_index(start_time, end_time, freq=freq, freq_sam=freq_sam) @@ -67,6 +75,7 @@ class TradeCalendarManager: return self.freq def get_trade_len(self): + """get the total step length""" return self.trade_len def get_trade_step(self): @@ -99,6 +108,12 @@ class TradeCalendarManager: calendar_index = self.start_index + trade_step return self._calendar[calendar_index], self._calendar[calendar_index + 1] - pd.Timedelta(seconds=1) + def get_cur_step_time(self): + """ + get current step time + """ + return self.get_step_time(self.get_trade_step()) + def get_all_time(self): """Get the start_time and end_time for trading""" return self.start_time, self.end_time @@ -146,5 +161,40 @@ class CommonInfrastructure(BaseInfrastructure): class LevelInfrastructure(BaseInfrastructure): + """level instrastructure is created by executor, and then shared to strategies on the same level""" + def get_support_infra(self): - return ["trade_calendar"] + return ["trade_calendar", "sub_level_infra"] + + def reset_cal(self, freq, start_time, end_time): + """reset trade calendar manager""" + if self.has("trade_calendar"): + self.get("trade_calendar").reset(freq, start_time=start_time, end_time=end_time) + else: + self.reset_infra(trade_calendar=TradeCalendarManager(freq, start_time=start_time, end_time=end_time)) + + def set_sub_level_infra(self, sub_level_infra: LevelInfrastructure): + """this will make the calendar access easier when acrossing multi-levels""" + self.reset_infra(sub_level_infra=sub_level_infra) + + +def get_start_end_idx(trade_calendar: TradeCalendarManager, outer_trade_decision: BaseTradeDecision) -> Union[int, int]: + """ + A helper function for getting the decision-level index range limitation for inner strategy + - NOTE: this function is not applicable to order-level + + Parameters + ---------- + trade_calendar : TradeCalendarManager + outer_trade_decision : BaseTradeDecision + the trade decision made by outer strategy + + Returns + ------- + Union[int, int]: + start index and end index + """ + try: + return outer_trade_decision.get_range_limit() + except NotImplementedError: + return 0, trade_calendar.get_trade_len() - 1 diff --git a/qlib/contrib/strategy/rule_strategy.py b/qlib/contrib/strategy/rule_strategy.py index 3ca325bf6..026afc8bb 100644 --- a/qlib/contrib/strategy/rule_strategy.py +++ b/qlib/contrib/strategy/rule_strategy.py @@ -14,29 +14,7 @@ from ...backtest.order import BaseTradeDecision, Order, TradeDecisionWO from ...backtest.exchange import Exchange, OrderHelper from ...backtest.utils import CommonInfrastructure, LevelInfrastructure from qlib.utils.file import get_io_object - - -def get_start_end_idx(strategy: BaseStrategy, outer_trade_decision: BaseTradeDecision) -> Union[int, int]: - """ - A helper function for getting the decision-level index range limitation for inner strategy - - NOTE: this function is not applicable to order-level - - Parameters - ---------- - strategy : BaseStrategy - the inner strawtegy - outer_trade_decision : BaseTradeDecision - the trade decision made by outer strategy - - Returns - ------- - Union[int, int]: - start index and end index - """ - try: - return outer_trade_decision.get_range_limit() - except NotImplementedError: - return 0, strategy.trade_calendar.get_trade_len() - 1 +from qlib.backtest.utils import get_start_end_idx class TWAPStrategy(BaseStrategy): @@ -105,7 +83,7 @@ class TWAPStrategy(BaseStrategy): # get the number of trading step finished, trade_step can be [0, 1, 2, ..., trade_len - 1] trade_step = self.trade_calendar.get_trade_step() # get the total count of trading step - start_idx, end_idx = get_start_end_idx(self, self.outer_trade_decision) + start_idx, end_idx = get_start_end_idx(self.trade_calendar, self.outer_trade_decision) trade_len = end_idx - start_idx + 1 if trade_step < start_idx or trade_step > end_idx: diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py index a787c098f..23d6b520a 100644 --- a/qlib/strategy/base.py +++ b/qlib/strategy/base.py @@ -1,5 +1,6 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. +from qlib.backtest.position import BasePosition from typing import List, Union from ..model.base import BaseModel @@ -37,24 +38,26 @@ class BaseStrategy: self.reset(level_infra=level_infra, common_infra=common_infra, outer_trade_decision=outer_trade_decision) + @property + def trade_calendar(self) -> TradeCalendarManager: + return self.level_infra.get("trade_calendar") + + @property + def trade_position(self) -> BasePosition: + return self.common_infra.get("trade_account").current + def reset_level_infra(self, level_infra: LevelInfrastructure): if not hasattr(self, "level_infra"): self.level_infra = level_infra else: self.level_infra.update(level_infra) - if level_infra.has("trade_calendar"): - self.trade_calendar: TradeCalendarManager = level_infra.get("trade_calendar") - def reset_common_infra(self, common_infra: CommonInfrastructure): if not hasattr(self, "common_infra"): self.common_infra: CommonInfrastructure = common_infra else: self.common_infra.update(common_infra) - if common_infra.has("trade_account"): - self.trade_position = common_infra.get("trade_account").current - def reset( self, level_infra: LevelInfrastructure = None, From 155019ba353bcd7d6758dd23914698f2c34395d8 Mon Sep 17 00:00:00 2001 From: Young Date: Fri, 9 Jul 2021 10:33:41 +0000 Subject: [PATCH 24/28] move the pa sign from last step to first --- qlib/backtest/report.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/qlib/backtest/report.py b/qlib/backtest/report.py index 138a44faa..8a49af490 100644 --- a/qlib/backtest/report.py +++ b/qlib/backtest/report.py @@ -466,7 +466,10 @@ class Indicator: def _agg_order_price_advantage(self): if not self.order_indicator["trade_price"].empty: - self.order_indicator["pa"] = self.order_indicator["trade_price"] / self.order_indicator["base_price"] - 1 + sign = 1 - self.order_indicator["trade_dir"] * 2 + self.order_indicator["pa"] = sign * ( + self.order_indicator["trade_price"] / self.order_indicator["base_price"] - 1 + ) else: self.order_indicator["pa"] = pd.Series() @@ -498,7 +501,11 @@ class Indicator: raise ValueError(f"method {method} is not supported!") def _cal_trade_price_advantage(self, method="mean"): - pa_order = self.order_indicator["pa"] * (1 - self.order_indicator["trade_dir"] * 2) + pa_order = self.order_indicator["pa"] + if isinstance(pa_order, (int, float)): + # pa from atomic executor + return pa_order + if method == "mean": return pa_order.mean() elif method == "amount_weighted": @@ -511,7 +518,10 @@ class Indicator: raise ValueError(f"method {method} is not supported!") def _cal_trade_positive_rate(self): - pa_order = self.order_indicator["pa"] * (2 * (self.order_indicator["amount"] < 0).astype(int) - 1) + pa_order = self.order_indicator["pa"] + if isinstance(pa_order, (int, float)): + # pa from atomic executor + return pa_order return (pa_order > 0).astype(int).sum() / pa_order.count() def _cal_trade_amount(self): From c29e5b262191557a3a3d08ef68a8a80a3a28973b Mon Sep 17 00:00:00 2001 From: v-mingzhehan Date: Mon, 12 Jul 2021 13:50:13 +0000 Subject: [PATCH 25/28] Fix circular import --- qlib/strategy/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/qlib/strategy/__init__.py b/qlib/strategy/__init__.py index e3fcd8e26..59e481eb9 100644 --- a/qlib/strategy/__init__.py +++ b/qlib/strategy/__init__.py @@ -1,4 +1,2 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. - -from .base import * From 9bf8c999e67520a45ad4bf1b0351ec311debb2ec Mon Sep 17 00:00:00 2001 From: v-mingzhehan Date: Tue, 20 Jul 2021 06:14:40 +0000 Subject: [PATCH 26/28] type checking update --- qlib/strategy/base.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py index fa21fae5f..7a267b511 100644 --- a/qlib/strategy/base.py +++ b/qlib/strategy/base.py @@ -1,6 +1,9 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -from qlib.backtest.exchange import Exchange +from __future__ import annotations +from typing import TYPE_CHECKING +if TYPE_CHECKING: + from qlib.backtest.exchange import Exchange from qlib.backtest.position import BasePosition from typing import List, Tuple, Union From bdebe12cf29ad7b7cad3b261e6a603a579f2d458 Mon Sep 17 00:00:00 2001 From: Young Date: Mon, 26 Jul 2021 06:14:57 +0000 Subject: [PATCH 27/28] support empty benchmark Empty benchmark could accelerate the learning process --- qlib/backtest/__init__.py | 3 ++- qlib/backtest/account.py | 21 +++++++++-------- qlib/backtest/exchange.py | 20 ++++++++-------- qlib/backtest/report.py | 5 ++-- qlib/contrib/strategy/rule_strategy.py | 32 ++++++++++++++------------ qlib/strategy/base.py | 3 ++- 6 files changed, 45 insertions(+), 39 deletions(-) diff --git a/qlib/backtest/__init__.py b/qlib/backtest/__init__.py index 19dbe87ce..dbfbd4a0e 100644 --- a/qlib/backtest/__init__.py +++ b/qlib/backtest/__init__.py @@ -8,9 +8,9 @@ from .account import Account if TYPE_CHECKING: from ..strategy.base import BaseStrategy + from .executor import BaseExecutor from .position import Position from .exchange import Exchange -from .executor import BaseExecutor from .backtest import backtest_loop from .backtest import collect_data_loop from .order import Order @@ -155,6 +155,7 @@ def get_strategy_executor( # - for avoiding recursive import # - typing annotations is not reliable from ..strategy.base import BaseStrategy + from .executor import BaseExecutor trade_account = create_account_instance( start_time=start_time, end_time=end_time, benchmark=benchmark, account=account, pos_type=pos_type diff --git a/qlib/backtest/account.py b/qlib/backtest/account.py index 806f88a96..9b9a25c23 100644 --- a/qlib/backtest/account.py +++ b/qlib/backtest/account.py @@ -75,17 +75,7 @@ class Account: ): self._pos_type = pos_type self._port_metr_enabled = port_metr_enabled - self.init_vars(init_cash, position_dict, freq, benchmark_config) - def is_port_metr_enabled(self): - """ - Is portfolio-based metrics enabled. - """ - return self._port_metr_enabled and not self.current.skip_update() - - def init_vars(self, init_cash, position_dict, freq: str, benchmark_config: dict): - - # init cash self.init_cash = init_cash self.current: BasePosition = init_instance_by_config( { @@ -100,8 +90,19 @@ class Account: self.accum_info = AccumulatedInfo() self.report = None self.positions = {} + + # in of reset ignore None values + self.benchmark_config = benchmark_config + self.freq = freq + self.reset(freq=freq, benchmark_config=benchmark_config, init_report=True) + def is_port_metr_enabled(self): + """ + Is portfolio-based metrics enabled. + """ + return self._port_metr_enabled and not self.current.skip_update() + def reset_report(self, freq, benchmark_config): # portfolio related metrics if self.is_port_metr_enabled(): diff --git a/qlib/backtest/exchange.py b/qlib/backtest/exchange.py index a22754885..ea1d012eb 100644 --- a/qlib/backtest/exchange.py +++ b/qlib/backtest/exchange.py @@ -512,7 +512,7 @@ class Exchange: def _get_factor_or_raise_erorr(self, factor: float = None, stock_id: str = None, start_time=None, end_time=None): """Please refer to the docs of get_amount_of_trade_unit""" if factor is None: - if stock_id is not None and start_time is not None and end_time is not None : + if stock_id is not None and start_time is not None and end_time is not None: factor = self.get_factor(stock_id=stock_id, start_time=start_time, end_time=end_time) else: raise ValueError(f"`factor` and (`stock_id`, `start_time`, `end_time`) can't both be None") @@ -537,15 +537,16 @@ class Exchange: the end time of trading range """ if not self.trade_w_adj_price and self.trade_unit is not None: - factor = self._get_factor_or_raise_erorr(factor=factor, - stock_id=stock_id, - start_time=start_time, - end_time=end_time) + factor = self._get_factor_or_raise_erorr( + factor=factor, stock_id=stock_id, start_time=start_time, end_time=end_time + ) return self.trade_unit / factor else: return None - def round_amount_by_trade_unit(self, deal_amount, factor: float = None, stock_id: str = None, start_time=None, end_time=None): + def round_amount_by_trade_unit( + self, deal_amount, factor: float = None, stock_id: str = None, start_time=None, end_time=None + ): """Parameter Please refer to the docs of get_amount_of_trade_unit @@ -555,10 +556,9 @@ class Exchange: """ if not self.trade_w_adj_price and self.trade_unit is not None: # the minimal amount is 1. Add 0.1 for solving precision problem. - factor = self._get_factor_or_raise_erorr(factor=factor, - stock_id=stock_id, - start_time=start_time, - end_time=end_time) + factor = self._get_factor_or_raise_erorr( + factor=factor, stock_id=stock_id, start_time=start_time, end_time=end_time + ) return (deal_amount * factor + 0.1) // self.trade_unit * self.trade_unit / factor return deal_amount diff --git a/qlib/backtest/report.py b/qlib/backtest/report.py index 6b64bf3b1..84cae2568 100644 --- a/qlib/backtest/report.py +++ b/qlib/backtest/report.py @@ -80,11 +80,12 @@ class Report: def init_bench(self, freq=None, benchmark_config=None): if freq is not None: self.freq = freq - if benchmark_config is not None: - self.benchmark_config = benchmark_config + self.benchmark_config = benchmark_config self.bench = self._cal_benchmark(self.benchmark_config, self.freq) def _cal_benchmark(self, benchmark_config, freq): + if benchmark_config is None: + return None benchmark = benchmark_config.get("benchmark", CSI300_BENCH) if benchmark is None: return None diff --git a/qlib/contrib/strategy/rule_strategy.py b/qlib/contrib/strategy/rule_strategy.py index 1ec054e45..b42c4f578 100644 --- a/qlib/contrib/strategy/rule_strategy.py +++ b/qlib/contrib/strategy/rule_strategy.py @@ -63,9 +63,9 @@ class TWAPStrategy(BaseStrategy): stock_id=order.stock_id, start_time=trade_start_time, end_time=trade_end_time ): continue - _amount_trade_unit = self.trade_exchange.get_amount_of_trade_unit(stock_id=order.stock_id, - start_time=order.start_time, - end_time=order.end_time) + _amount_trade_unit = self.trade_exchange.get_amount_of_trade_unit( + stock_id=order.stock_id, start_time=order.start_time, end_time=order.end_time + ) _order_amount = None # considering trade unit if _amount_trade_unit is None: @@ -169,9 +169,9 @@ class SBBStrategyBase(BaseStrategy): self.trade_trend[order.stock_id] = _pred_trend continue # get amount of one trade unit - _amount_trade_unit = self.trade_exchange.get_amount_of_trade_unit(stock_id=order.stock_id, - start_time=order.start_time, - end_time=order.end_time) + _amount_trade_unit = self.trade_exchange.get_amount_of_trade_unit( + stock_id=order.stock_id, start_time=order.start_time, end_time=order.end_time + ) if _pred_trend == self.TREND_MID: _order_amount = None # considering trade unit @@ -471,9 +471,9 @@ class ACStrategy(BaseStrategy): if sig_sam is None or np.isnan(sig_sam): # no signal, TWAP - _amount_trade_unit = self.trade_exchange.get_amount_of_trade_unit(stock_id=order.stock_id, - start_time=order.start_time, - end_time=order.end_time) + _amount_trade_unit = self.trade_exchange.get_amount_of_trade_unit( + stock_id=order.stock_id, start_time=order.start_time, end_time=order.end_time + ) if _amount_trade_unit is None: # divide the order into equal parts, and trade one part _order_amount = self.trade_amount[order.stock_id] / (trade_len - trade_step) @@ -494,10 +494,9 @@ class ACStrategy(BaseStrategy): np.sinh(kappa * (trade_len - trade_step)) - np.sinh(kappa * (trade_len - trade_step - 1)) ) / np.sinh(kappa * trade_len) _order_amount = order.amount * amount_ratio - _order_amount = self.trade_exchange.round_amount_by_trade_unit(_order_amount, - stock_id=order.stock_id, - start_time=order.start_time, - end_time=order.end_time) + _order_amount = self.trade_exchange.round_amount_by_trade_unit( + _order_amount, stock_id=order.stock_id, start_time=order.start_time, end_time=order.end_time + ) if order.direction == order.SELL: # sell all amount at last @@ -584,8 +583,11 @@ class FileOrderStrategy(BaseStrategy): """ def __init__( - self, file: Union[IO, str, Path, pd.DataFrame], - trade_range: Union[Tuple[int, int], TradeRange] = None, *args, **kwargs + self, + file: Union[IO, str, Path, pd.DataFrame], + trade_range: Union[Tuple[int, int], TradeRange] = None, + *args, + **kwargs, ): """ diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py index 7a267b511..c47d2494f 100644 --- a/qlib/strategy/base.py +++ b/qlib/strategy/base.py @@ -2,9 +2,10 @@ # Licensed under the MIT License. from __future__ import annotations from typing import TYPE_CHECKING + if TYPE_CHECKING: from qlib.backtest.exchange import Exchange -from qlib.backtest.position import BasePosition + from qlib.backtest.position import BasePosition from typing import List, Tuple, Union from ..model.base import BaseModel From e817413769c648a7cd6e9a902f9de568b3c08a5c Mon Sep 17 00:00:00 2001 From: v-mingzhehan Date: Tue, 27 Jul 2021 14:52:29 +0000 Subject: [PATCH 28/28] Restore examples --- .../nested_decision_execution/assets/orders | Bin 3464 -> 0 bytes .../requirements.txt | 2 - .../nested_decision_execution/rl_dummy.py | 586 ------------------ .../nested_decision_execution/workflow.py | 11 +- 4 files changed, 2 insertions(+), 597 deletions(-) delete mode 100644 examples/nested_decision_execution/assets/orders delete mode 100644 examples/nested_decision_execution/requirements.txt delete mode 100644 examples/nested_decision_execution/rl_dummy.py diff --git a/examples/nested_decision_execution/assets/orders b/examples/nested_decision_execution/assets/orders deleted file mode 100644 index 7902b901c000bfd82fb7fcc0386c588f3f78cbb4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3464 zcmai$eM}Q~7{^;->?j+k)cJzoL`)bn&F}ioCT}CMX-(Nc=HkTe`a-UQzU+G4Bus`Q zI>BQSJ%TO}L`B0UZjDnjmStp_EHR5j&`k+(i2|Dw#>|9GmWBPU9rq*4-LLmYFYP_~ z-1m8&@9%k97u&MuNk#Z7=QFwFx2oKBjh%8-vaSHD@i9&p!*h=nhwn%DXZG@YU=$Hx zeU3_-+sSi8=}SfcOhCtTag@gw^s+p+%p3Iht2GWE zVH$dr2E9}wiP)^8GkM_zf_6G4Qc*e%7IFswTD`%@(*z_fDI|2)$?me_$pzt9dbveG zub@-uH2P_Jtwl+v!=Uwrv709+a%EvAg9!UZb-%Y|UHDQ}%7Dpe7Gb78?v?ND zOvJ;c2G2?=6Z~Qzvqjjb>Wh7UcMV5+Dwk$~(<%vrH3Pt?6Je)8NAJDdB)qbph&f3Z zR7L`Tl>J^@(iBX}!o$vOJcNOvZZ?uRz2~V7cv$N`ZODF0d10vz53|RWOTvWXWLR*}S6m-_MO`qBw?_Vz zOvwfaHPdTR&GPqMiNE!58D3eX@~EUTAx)hQS%Yc(Zt9o#=kT!BHPTZ$G zyve?R77v@K@XIPQk;rE=LqiGwG)HMbd#Oea21ql4)sN1mzA(KGudM4wP7)?GK&vN3 zpB-%P?w@Rk#lub{zbe^Hp=M?if;GDQ>ANoK@vzksJ+jK+zXnm+{qJwi7HzqS_u2N2 z?U8{0?M*?M&Wx<__>JOEE~Uiam2G|PUCA_e=-?4BqrI6j=WQ+x#p7X*KD#TKrl5?> zMmylzdy8|`Em7uK_o75;Wx{bHkuvqQnz8(kpTaA9N7X5>%z(zWZ+emj5i%p_1m(_;NTX0w)?lfma&mM zJgkmgFAFm**a5@A(LP&wl!fx_;d;4l0>gssba_1WTw7=aU$fym6_POMGq?lL)OfK! zcs@f#;?qp&|4=qfok=U!?D*>1*`lPCcvwS^^mYpGO%nNxFPkXaoCNT&+GB*IvhX|u zNLk6u!L7u)O?cSd99D7(3pInIR!q~dZQOS8Oq80b_n(qe28RZM{b0Er^8OlS1gRdL Jo`<=0.4.1 -torch>=1.8.0 diff --git a/examples/nested_decision_execution/rl_dummy.py b/examples/nested_decision_execution/rl_dummy.py deleted file mode 100644 index c42e28be4..000000000 --- a/examples/nested_decision_execution/rl_dummy.py +++ /dev/null @@ -1,586 +0,0 @@ -import pickle -from collections import OrderedDict, defaultdict -from dataclasses import dataclass, asdict -from pprint import pprint -from typing import Iterable, Any, Optional, OrderedDict, Tuple, Dict, List - -import fire -import gym -import numpy as np -import pandas as pd -import qlib -from gym import spaces -from qlib.backtest import get_exchange, Account, BaseExecutor, CommonInfrastructure, Order, TradeCalendarManager, backtest_func -from qlib.backtest.executor import NestedExecutor, SimulatorExecutor -from qlib.config import REG_CN -from qlib.data import D -from qlib.rl.interpreter import StateInterpreter, ActionInterpreter -from qlib.strategy import BaseStrategy -from qlib.tests.data import GetData -from qlib.utils import init_instance_by_config, exists_qlib_data -from torch.utils.data import Dataset, DataLoader -from tianshou.data import Batch, Collector -from tianshou.env import DummyVectorEnv, SubprocVectorEnv -from tianshou.policy import BasePolicy - -from workflow import NestedDecisonExecutionWorkflow - - -MAX_STEPS = 10 - - -def get_executor(start_time, end_time, executor, exchange, benchmark="SH000300", account=1e9) -> BaseExecutor: - trade_account = Account( - init_cash=account, - benchmark_config={ - "benchmark": benchmark, - "start_time": start_time, - "end_time": end_time, - }, - ) - - common_infra = CommonInfrastructure(trade_account=trade_account, trade_exchange=exchange) - trade_executor = init_instance_by_config(executor, accept_types=BaseExecutor, common_infra=common_infra) - - return trade_executor - - -def price_advantage(exec_price: float, baseline_price: float, direction: int) -> float: - if baseline_price == 0: - return 0. - if direction == 1: - return (1 - exec_price / baseline_price) * 10000 - else: - return (exec_price / baseline_price - 1) * 10000 - - -@dataclass -class EpisodicState: - """ - A simplified data structure as the input of RL-related components to calculate observations and rewards. - Some of the metrics info are calculated on-the-fly in this class. - """ - # requirements - stock_id: int - start_time: pd.Timestamp - end_time: pd.Timestamp - direction: int - target: float - num_step: int - - # simplified market data used to calculate backtest metrics - # this may contains information from future so be careful - market_price: np.ndarray - market_vol: np.ndarray - - # agent state - cur_time: Optional[pd.Timestamp] = None - cur_step: int = 0 - cur_tick: int = 0 # tick is the most fine-grained time unit (typically minute) - done: bool = False - position: Optional[float] = None - exec_vol: Optional[np.ndarray] = None - last_step_duration: Optional[int] = None - position_history: Optional[np.ndarray] = None - - # calculated statistics - turnover: Optional[float] = None - baseline_twap: Optional[float] = None - baseline_vwap: Optional[float] = None - exec_avg_price: Optional[float] = None - pa_twap: Optional[float] = None - pa_vwap: Optional[float] = None - fulfill_rate: Optional[float] = None - - def __post_init__(self): - assert self.target >= 0 - assert len(self.market_price) == len(self.market_vol) - self.cur_time = self.start_time - self.position = self.target - self.position_history = np.full((self.num_step + 1), np.nan) - self.position_history[0] = self.position - self.baseline_twap = np.mean(self.market_price) - if self.market_vol.sum() == 0: - self.baseline_vwap = np.mean(self.market_price) - else: - self.baseline_vwap = np.average(self.market_price, weights=self.market_vol) - - def update_stats(self): - market_price = self.market_price[:len(self.exec_vol)] - self.turnover = (self.exec_vol * market_price).sum() - # exec_vol can be zero - if np.isclose(self.exec_vol.sum(), 0): - self.exec_avg_price = market_price[0] - else: - self.exec_avg_price = np.average(market_price, weights=self.exec_vol) - self.pa_twap = price_advantage(self.exec_avg_price, self.baseline_twap, self.direction) - self.pa_vwap = price_advantage(self.exec_avg_price, self.baseline_vwap, self.direction) - self.fulfill_rate = (self.target - self.position) / self.target - if abs(self.fulfill_rate - 1.0) < 1e-5: - self.fulfill_rate = 1.0 - self.fulfill_rate *= 100 - - def logs(self): - logs = { - 'stop_time': self.cur_time - self.start_time, - 'stop_step': self.cur_step, - 'turnover': self.turnover, - 'baseline_twap': self.baseline_twap, - 'baseline_vwap': self.baseline_vwap, - 'exec_avg_price': self.exec_avg_price, - 'pa_twap': self.pa_twap, - 'pa_vwap': self.pa_vwap, - 'ffr': self.fulfill_rate - } - return logs - - @classmethod - def from_order_and_executor(cls, order: Order, calendar: TradeCalendarManager, frequency: str) -> "EpisodicState": - # Synchronous state for executor to EpisodicState - state = cls( - stock_id=order.stock_id, - start_time=order.start_time, - end_time=order.end_time, - direction=order.direction, - target=order.amount, - num_step=calendar.get_trade_len(), - market_price=_retrieve_backtest_data(order, '$close', frequency), - market_vol=_retrieve_backtest_data(order, '$volume', frequency), - ) - state.cur_step = calendar.get_trade_step() - assert state.cur_step == 0 - state.cur_time, _ = calendar.get_step_time(state.cur_step) - return state - - def update(self, execute_result: List[Order], calendar: TradeCalendarManager, - done: Optional[bool] = None, length: Optional[int] = None) -> "StepState": - if length is not None: - exec_vol = np.zeros(length) - exec_vol[:len(execute_result)] = np.array([order.deal_amount for order, _, __, ___ in execute_result]) - else: - exec_vol = np.array([order.deal_amount for order, _, __, ___ in execute_result]) - # Synchronous exec_vol to executor and synchronous back to EpisodicState - cur_tick = self.cur_tick - ticks_this_step = len(exec_vol) - self.cur_step = trade_step = calendar.get_trade_step() - self.cur_tick += ticks_this_step - self.position -= np.sum(exec_vol) - self.position_history[trade_step] = self.position - if done is not None: - self.done = done - else: - self.done = self.position < 1e-5 - self.exec_vol = exec_vol if self.exec_vol is None else \ - np.concatenate((self.exec_vol, exec_vol)) - - if self.done: - self.update_stats() - else: - self.cur_time, _ = calendar.get_step_time(trade_step) - - l, r = cur_tick, cur_tick + ticks_this_step - assert 0 <= l < r - return StepState(exec_vol, self.market_vol[l:r], self.market_price[l:r], self) - - -@dataclass -class StepState: - # market info and execution volume for current step - exec_vol: np.ndarray - market_vol: np.ndarray - market_price: np.ndarray - - # episode info - episode_state: EpisodicState - - # calculated statistics - turnover: Optional[float] = None - exec_avg_price: Optional[float] = None - pa_twap: Optional[float] = None - pa_vwap: Optional[float] = None - - def __post_init__(self): - assert len(self.exec_vol) == len(self.market_price) == len(self.market_vol) - self.turnover = (self.exec_vol * self.market_price).sum() - if np.isclose(self.market_vol.sum(), 0): - self.exec_avg_price = self.market_price[0] - else: - self.exec_avg_price = np.average(self.market_price, weights=self.market_vol) - self.pa_twap = price_advantage(self.exec_avg_price, self.episode_state.baseline_twap, - self.episode_state.direction) - self.pa_vwap = price_advantage(self.exec_avg_price, self.episode_state.baseline_vwap, - self.episode_state.direction) - - -def _retrieve_backtest_data(order: Order, field: str, frequency: str) -> np.ndarray: - # Retrieve backtest data for RL-specific use (including reward calculation) - return D.features( - [order.stock_id], - ['$open', '$close', '$high', '$low', '$volume'], - start_time=order.start_time, - end_time=order.end_time, - freq=frequency - )[field].to_numpy() - - -def create_sub_order(exec_vol: float, calendar: TradeCalendarManager, original_order: Order) -> Order: - # Convert a real number to an order - trade_step = calendar.get_trade_step() - trade_start_time, trade_end_time = calendar.get_step_time(trade_step) - order_kwargs = asdict(original_order) - order_kwargs.update(start_time=trade_start_time, end_time=trade_end_time, amount=exec_vol) - trade_decision = Order(**order_kwargs) - return trade_decision - - -class SingleOrderEnv(gym.Env): - def __init__(self, - observation: StateInterpreter, - action: ActionInterpreter, - reward: Any, - dataloader: Iterable, - executor: BaseExecutor): - self.action = action - self.observation = observation - self.reward = reward - self.dataloader = dataloader - self.executor = executor - - self.inner_frequency = self.executor.get_all_executor()[-1].time_per_step - - @property - def action_space(self): - return self.action.action_space - - @property - def observation_space(self): - return self.observation.observation_space - - def reset(self): - try: - self.cur_order = next(self.dataloader) - except StopIteration: - self.dataloader = None - return None - - self.execute_result = [] - self.executor.reset(start_time=self.cur_order.start_time, end_time=self.cur_order.end_time) - self.ep_state = EpisodicState.from_order_and_executor( - self.cur_order, self.executor.trade_calendar, self.inner_frequency - ) - - self.action_history = np.full(self.ep_state.num_step, np.nan) - return self.observation(self.ep_state) - - def step(self, action): - assert self.dataloader is not None - assert not self.executor.finished() - self.action_history[self.ep_state.cur_step] = action - - exec_vol = self.action(action, self.ep_state) - trade_decision = create_sub_order(exec_vol, self.executor.trade_calendar, self.cur_order) - execute_result = self.executor.execute([trade_decision]) - step_state = self.ep_state.update(execute_result, self.executor.trade_calendar) - if self.executor.finished(): - assert self.ep_state.done - - reward, rew_info = self.reward(self.ep_state, step_state) - - info = { - 'action_history': self.action_history, - 'category': self.ep_state.direction, - 'reward': rew_info - } - if self.ep_state.done: - info['logs'] = self.ep_state.logs() - info['index'] = { - 'ins': self.ep_state.stock_id, - 'date': self.ep_state.start_time, - } - # TODO: collect logs - pprint(info) - - return self.observation(self.ep_state), reward, self.ep_state.done, info - - -class RLStrategy(BaseStrategy): - """When inference and do the backtest from end to end, use this strategy.""" - - def __init__( - self, - observation: "Observation", - action: "Action", - policy: BasePolicy, - **kwargs - ): - super().__init__(**kwargs) - self.observation = observation - self.action = action - self.policy = policy - - # TODO: how to get inner frequency and trade len - # This should be no longer required when PA is provided by qlib. - self.inner_frequency = "day" - self.inner_trade_len = 1 - - def reset(self, outer_trade_decision: List[Order] = None, **kwargs): - super().reset(outer_trade_decision=outer_trade_decision, **kwargs) - if outer_trade_decision is not None: - self.states = OrderedDict() # explicitly make it ordered - for order in outer_trade_decision: - state = EpisodicState.from_order_and_executor(order, self.trade_calendar, "day") - self.states[order.stock_id, order.direction] = state - - def generate_trade_decision(self, execute_result=None): - # apply results from the last step - if execute_result is not None: - orders = defaultdict(list) - for e in execute_result: - orders[e[0].stock_id, e[0].direction].append(e) - for (stock_id, direction), state in self.states.items(): - state.update(orders[stock_id, direction], self.trade_calendar, length=self.inner_trade_len) - - if not self.states: - return [] - - obs_batch = Batch([{"obs": self.observation(state)} for state in self.states.values()]) - act = self.policy(obs_batch) - exec_vols = [self.action(a, s) for a, s in zip(act.act, self.states.values())] - return [create_sub_order(v, self.trade_calendar, o) for v, o in zip(exec_vols, self.outer_trade_decision)] - - -class RlWorkflow(NestedDecisonExecutionWorkflow): - - def tianshou(self): - self._init_qlib() - - # TODO: why is there a benchmark? - trade_start_time = "2017-01-01" - trade_end_time = "2020-08-01" - benchmark = "SH000300" - time_per_step = "day" - executor_config = { - "class": "SimulatorExecutor", - "module_path": "qlib.backtest.executor", - "kwargs": { - "time_per_step": time_per_step, - "verbose": True, - "generate_report": False, - } - } - exchange = get_exchange( - freq="day", - limit_threshold=0.095, - deal_price="close", - open_cost=0.0005, - close_cost=0.0015, - min_cost=5 - ) - - observation = Observation(time_per_step) - action = Action() - reward_fn = Reward() - - def dummy_env(): - executor = get_executor( - trade_start_time, - trade_end_time, - executor_config, - exchange, - benchmark, - 1000000000, - ) - return SingleOrderEnv( - observation, action, reward_fn, - iter(DataLoader(QlibOrderDataset('assets/orders'), batch_size=None, shuffle=True)), executor) - - policy = DummyPolicy() - - # This can not be replaced with SubprocVectorEnv - # File "/xxx/qlib/qlib/data/data.py", line 462, in dataset_processor - # p = Pool(processes=workers) - # AssertionError: daemonic processes are not allowed to have children - envs = DummyVectorEnv([dummy_env for _ in range(4)]) - test_collector = Collector(policy, envs) - policy.eval() - # TODO: create a queue for all orders and make it auto-complete when all the orders are processed - test_collector.collect(n_episode=10) - - def rl_day(self, load_model: Optional[str] = None): - self._init_qlib() - model = init_instance_by_config(self.task["model"]) - dataset = init_instance_by_config(self.task["dataset"]) - if load_model is None: - self._train_model(model, dataset) - else: - model = self._load_model(load_model) - trade_start_time = "2017-01-01" - trade_end_time = "2020-08-01" - trade_account = Account( - init_cash=int(1e9), - benchmark_config={ - "benchmark": "SH000300", - "start_time": trade_start_time, - "end_time": trade_end_time, - }, - ) - exchange = get_exchange( - freq="day", - limit_threshold=0.095, - deal_price="close", - open_cost=0.0005, - close_cost=0.0015, - min_cost=5 - ) - common_infra = CommonInfrastructure(trade_account=trade_account, trade_exchange=exchange) - executor = NestedExecutor( - time_per_step="week", - inner_executor=SimulatorExecutor(time_per_step="day", verbose=True), - inner_strategy=RLStrategy(Observation("day"), Action(), DummyPolicy()), - common_infra=common_infra - ) - strategy = init_instance_by_config({ - "class": "TopkDropoutStrategy", - "module_path": "qlib.contrib.strategy.model_strategy", - "kwargs": { - "model": model, - "dataset": dataset, - "topk": 50, - "n_drop": 5, - }, - }, common_infra=common_infra) - report_dict = backtest_func(trade_start_time, trade_end_time, strategy, executor) - print(report_dict) - - -### This is a full RL strategy ### - - -class QlibOrderDataset(Dataset): - def __init__(self, order_file): - with open(order_file, 'rb') as f: - self.orders = pickle.load(f) - - def __len__(self): - return len(self.orders) - - def __getitem__(self, index) -> Order: - return self.orders[index] - - -class DummyPolicy(BasePolicy): - def forward(self, batch, state=None, **kwargs): - return Batch(act=np.random.randint(0, 5, size=(len(batch), ))) - - def learn(self, *args, **kwargs): - pass - - -class Observation: - def __init__(self, time_per_step): - self.time_per_step = time_per_step - - def __call__(self, ep_state: EpisodicState) -> Any: - obs = self.observe(ep_state) - if not self.validate(obs): - raise ValueError(f'Observation space does not contain obs. Space: {self.observation_space} Sample: {obs}') - return obs - - def validate(self, obs: Any) -> bool: - return self.observation_space.contains(obs) - - @property - def observation_space(self): - space = { - 'direction': spaces.Discrete(2), - 'cur_step': spaces.Box(0, MAX_STEPS, shape=(), dtype=np.int32), - 'num_step': spaces.Box(0, MAX_STEPS, shape=(), dtype=np.int32), - 'target': spaces.Box(-1e-5, np.inf, shape=()), - 'position': spaces.Box(-1e-5, np.inf, shape=()), - 'features': spaces.Box(-np.inf, np.inf, shape=(5, )) - } - return spaces.Dict(space) - - def observe(self, ep_state: EpisodicState) -> Any: - features = D.features( - [ep_state.stock_id], - ['$open', '$close', '$high', '$low', '$volume'], - start_time=ep_state.start_time, - end_time=ep_state.end_time, - freq=self.time_per_step - ).loc[(ep_state.stock_id, ep_state.cur_time)].to_numpy() - features = np.nan_to_num(features) - return { - 'direction': _to_int32(ep_state.direction), - 'cur_step': _to_int32(min(ep_state.cur_step, ep_state.num_step - 1)), - 'num_step': _to_int32(ep_state.num_step), - 'target': _to_float32(ep_state.target), - 'position': _to_float32(ep_state.position), - 'features': features, - } - - -class Action: - denominator = 4 - - @property - def action_space(self): - return spaces.Discrete(self.denominator + 1) - - def __call__(self, action: Any, ep_state: EpisodicState) -> Any: - if not self.validate(action): - raise ValueError(f'Action space does not contain action. Space: {self.action_space} Sample: {action}') - act_ = self.to_volume(action, ep_state) - return act_ - - def validate(self, action: Any) -> bool: - return self.action_space.contains(action) - - def to_volume(self, action: Any, ep_state: EpisodicState) -> Any: - exec_vol = ep_state.position / self.denominator * action - if ep_state.cur_step + 1 >= ep_state.num_step: - exec_vol = ep_state.position - # TODO: might need to check whether the stock is tradable or whether it satisfies trade unit? - return exec_vol - - -class Reward: - weight = 1.0 - - def __call__(self, ep_state: EpisodicState, st_state: StepState) -> Tuple[float, Dict[str, float]]: - rew, info = 0., {} - if ep_state.done: - ep_rew, ep_info = self._to_tuple(self.episode_end(ep_state)) - rew += ep_rew - info.update({f'ep/{k}': v for k, v in ep_info.items()}) - st_rew, st_info = self._to_tuple(self.step_end(ep_state, st_state)) - rew += st_rew - info.update({f'st/{k}': v for k, v in st_info.items()}) - return rew * self.weight, info - - @staticmethod - def _to_tuple(x): - if isinstance(x, tuple): - return x - return x, {} - - def episode_end(self, ep_state: EpisodicState) -> Tuple[float, Dict[str, float]]: - return 0. - - def step_end(self, ep_state: EpisodicState, st_state: StepState) -> Tuple[float, Dict[str, float]]: - assert ep_state.target > 0 - baseline_price = st_state.pa_twap - pa = baseline_price * st_state.exec_vol.sum() / ep_state.target - penalty = -100 * ((st_state.exec_vol / ep_state.target) ** 2).sum() # penalize too much volume at one step - reward = pa + penalty - return reward, {'pa': pa, 'penalty': penalty} - - -def _to_int32(val): return np.array(int(val), dtype=np.int32) -def _to_float32(val): return np.array(val, dtype=np.float32) - -### End of RL strategy ### - - -if __name__ == '__main__': - fire.Fire(RlWorkflow) diff --git a/examples/nested_decision_execution/workflow.py b/examples/nested_decision_execution/workflow.py index a90e7281c..b6c1362fd 100644 --- a/examples/nested_decision_execution/workflow.py +++ b/examples/nested_decision_execution/workflow.py @@ -1,7 +1,6 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -from typing import Optional import qlib import fire @@ -171,17 +170,11 @@ class NestedDecisionExecutionWorkflow: sr = SignalRecord(model, dataset, recorder) sr.generate() - def _load_model(self, load): - return R.get_recorder(load, experiment_name="train").load_object("params.pkl") - - def backtest(self, load_model: Optional[str] = None): + def backtest(self): self._init_qlib() model = init_instance_by_config(self.task["model"]) dataset = init_instance_by_config(self.task["dataset"]) - if load_model is None: - self._train_model(model, dataset) - else: - model = self._load_model(load_model) + self._train_model(model, dataset) strategy_config = { "class": "TopkDropoutStrategy", "module_path": "qlib.contrib.strategy.model_strategy",