From c26bee126bc920654b5ab9526a90314bd835c595 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <Yuge.Zhang@microsoft.com>
Date: Fri, 28 May 2021 17:31:08 +0800
Subject: [PATCH 01/28] Support loading for backtest

---
 examples/multi_level_trading/workflow.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/examples/multi_level_trading/workflow.py b/examples/multi_level_trading/workflow.py
index 8096fc76f..2b70d4411 100644
--- a/examples/multi_level_trading/workflow.py
+++ b/examples/multi_level_trading/workflow.py
@@ -1,6 +1,7 @@
 #  Copyright (c) Microsoft Corporation.
 #  Licensed under the MIT License.
 
+from typing import Optional
 
 import qlib
 import fire
@@ -124,11 +125,17 @@ class MultiLevelTradingWorkflow:
             sr = SignalRecord(model, dataset, recorder)
             sr.generate()
 
-    def backtest(self):
+    def _load_model(self, load):
+        return R.get_recorder(load, experiment_name="train").load_object("params.pkl")
+
+    def backtest(self, load_model: Optional[str] = None):
         self._init_qlib()
         model = init_instance_by_config(self.task["model"])
         dataset = init_instance_by_config(self.task["dataset"])
-        self._train_model(model, dataset)
+        if load_model is None:
+            self._train_model(model, dataset)
+        else:
+            model = self._load_model(load_model)
         strategy_config = {
             "class": "TopkDropoutStrategy",
             "module_path": "qlib.contrib.strategy.model_strategy",

From d3dac068df5e21d54bb453bb1b9a3eaacf389a06 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <Yuge.Zhang@microsoft.com>
Date: Tue, 1 Jun 2021 11:33:44 +0800
Subject: [PATCH 02/28] Update simple playground

---
 qlib/strategy/__init__.py |   2 +
 qlib/strategy/base.py     |   2 +
 rl_playground.py          | 137 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 141 insertions(+)
 create mode 100644 rl_playground.py

diff --git a/qlib/strategy/__init__.py b/qlib/strategy/__init__.py
index 59e481eb9..e3fcd8e26 100644
--- a/qlib/strategy/__init__.py
+++ b/qlib/strategy/__init__.py
@@ -1,2 +1,4 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
+
+from .base import *
diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py
index 7828db609..37897da5a 100644
--- a/qlib/strategy/base.py
+++ b/qlib/strategy/base.py
@@ -7,6 +7,8 @@ from ..data.dataset.utils import convert_index_format
 from ..rl.interpreter import ActionInterpreter, StateInterpreter
 from ..utils import init_instance_by_config
 
+__all__ = ['BaseStrategy', 'ModelStrategy', 'RLStrategy', 'RLIntStrategy']
+
 
 class BaseStrategy:
     """Base strategy for trading"""
diff --git a/rl_playground.py b/rl_playground.py
new file mode 100644
index 000000000..3a4291495
--- /dev/null
+++ b/rl_playground.py
@@ -0,0 +1,137 @@
+import logging
+import pickle
+from enum import Enum
+from typing import Iterable, Optional, Any
+
+import gym
+import numpy as np
+
+import torch
+from torch.utils.data import Dataset
+
+from qlib.backtest import get_exchange, Account, BaseExecutor
+from qlib.rl.interpreter import StateInterpreter, ActionInterpreter
+from qlib.utils import init_instance_by_config
+
+
+def get_executor(start_time, end_time, executor, benchmark="SH000300", account=1e9, exchange_kwargs={}):
+    trade_account = Account(
+        init_cash=account,
+        benchmark_config={
+            "benchmark": benchmark,
+            "start_time": start_time,
+            "end_time": end_time,
+        },
+    )
+    trade_exchange = get_exchange(**exchange_kwargs)
+
+    common_infra = {
+        "trade_account": trade_account,
+        "trade_exchange": trade_exchange,
+    }
+
+    trade_executor = init_instance_by_config(executor, accept_types=BaseExecutor, common_infra=common_infra)
+
+    return common_infra, trade_executor
+
+
+class QlibOrderDataset(Dataset):
+    def __init__(self, order_file):
+        with open(order_file, 'rb') as f:
+            self.orders = pickle.load(f)
+
+    def __len__(self):
+        return len(self.orders)
+
+    def __getitem__(self, index):
+        return self.orders[index]
+
+
+class OrderEnv(gym.Env):
+    def __init__(self,
+                 state_interpreter: StateInterpreter,
+                 action_interpreter: ActionInterpreter,
+                 reward: Any,
+                 dataloader: Iterable,
+                 executor: BaseExecutor):
+        self.action_interpreter = action_interpreter
+        self.state_interpreter = state_interpreter
+        self.reward = reward
+        self.dataloader = dataloader
+        self.executor = executor
+
+    @property
+    def action_space(self):
+        return self.action.action_space
+
+    @property
+    def observation_space(self):
+        return self.observation.observation_space
+
+    def reset(self):
+        try:
+            self.cur_order = next(self.dataloader)
+        except StopIteration:
+            self.dataloader = None
+            return None
+
+        self.executor.reset(start_time=self.cur_order.start_time, end_time=self.cur_order.end_time)
+        self.level_infra = self.executor.get_level_infra()
+        self.execute_result = []
+
+        # TODO: how to fetch data after feature engineering?
+
+        # TODO: can be rewritten as dataclasses.asdict(self.cur_order) is Order is written to be a dataclass
+        return self.state_interpreter(self.cur_order, self.level_infra)
+
+    def step(self, action):
+        assert self.dataloader is not None
+
+        assert not self.executor.finished()
+
+        trade_decision = self.action_interpreter(action)
+        self.execute_result.extend(self.executor.execute(trade_decision))
+        reward, rew_info = self.reward()
+
+        done = self.executor.finished()
+        info = {
+            'action_history': self.action_history,
+            'category': self.ep_state.flow_dir.value,
+            'reward': rew_info
+        }
+        if self.ep_state.done:
+            info['logs'] = self.ep_state.logs()
+            info['index'] = {
+                'ins': self._sample.ins,
+                'date': self._sample.date
+            }
+
+        # TODO: how to collect metrics
+        return self.state_interpreter(self.cur_order, self.level_infra), reward, done, info
+
+
+def _main():
+    executor_config = {
+        "class": "SimulatorExecutor",
+        "module_path": "qlib.backtest.executor",
+        "kwargs": {
+            "time_per_step": "day",
+            "verbose": True,
+            "generate_report": True,
+        }
+    }
+    # TODO: why is there a benchmark?
+    trade_start_time = "2017-01-01"
+    trade_end_time = "2020-08-01"
+    benchmark = "SH000300"
+    executor = get_executor(
+        trade_start_time, trade_end_time, executor_config,
+        benchmark, 1000000000, exchange_kwargs={
+            "freq": "day",
+            "limit_threshold": 0.095,
+            "deal_price": "close",
+            "open_cost": 0.0005,
+            "close_cost": 0.0015,
+            "min_cost": 5,
+        }
+    )

From 449e3f40c88ea6acb9ad8884c2a52515bf54b5af Mon Sep 17 00:00:00 2001
From: Yuge Zhang <Yuge.Zhang@microsoft.com>
Date: Tue, 1 Jun 2021 17:51:29 +0800
Subject: [PATCH 03/28] Update init in backtest

---
 qlib/backtest/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/qlib/backtest/__init__.py b/qlib/backtest/__init__.py
index 33c2cb2d8..1d9e91bb3 100644
--- a/qlib/backtest/__init__.py
+++ b/qlib/backtest/__init__.py
@@ -6,6 +6,7 @@ from .exchange import Exchange
 from .executor import BaseExecutor
 from .backtest import backtest as backtest_func
 from .backtest import collect_data as data_generator
+from .order import Order
 
 from .utils import CommonInfrastructure
 from ..strategy.base import BaseStrategy

From 83535bff6af1e6b288f9d00110424b547afd55a5 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <Yuge.Zhang@microsoft.com>
Date: Tue, 1 Jun 2021 18:08:11 +0800
Subject: [PATCH 04/28] Playground checkpoint

---
 rl_playground.py | 307 ++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 263 insertions(+), 44 deletions(-)

diff --git a/rl_playground.py b/rl_playground.py
index 3a4291495..de1fb15dd 100644
--- a/rl_playground.py
+++ b/rl_playground.py
@@ -1,17 +1,20 @@
-import logging
 import pickle
-from enum import Enum
-from typing import Iterable, Optional, Any
+from dataclasses import dataclass
+from typing import Iterable, Any
 
-import gym
 import numpy as np
-
-import torch
-from torch.utils.data import Dataset
-
-from qlib.backtest import get_exchange, Account, BaseExecutor
+import gym
+import qlib
+from qlib.backtest import get_exchange, Account, BaseExecutor, CommonInfrastructure, Order
+from qlib.config import REG_CN
+from qlib.data import D
 from qlib.rl.interpreter import StateInterpreter, ActionInterpreter
-from qlib.utils import init_instance_by_config
+from qlib.tests.data import GetData
+from qlib.utils import init_instance_by_config, exists_qlib_data
+from torch.utils.data import Dataset, DataLoader
+from tianshou.data import Batch, Collector
+from tianshou.env import DummyVectorEnv
+from tianshou.policy import BasePolicy
 
 
 def get_executor(start_time, end_time, executor, benchmark="SH000300", account=1e9, exchange_kwargs={}):
@@ -25,14 +28,10 @@ def get_executor(start_time, end_time, executor, benchmark="SH000300", account=1
     )
     trade_exchange = get_exchange(**exchange_kwargs)
 
-    common_infra = {
-        "trade_account": trade_account,
-        "trade_exchange": trade_exchange,
-    }
-
+    common_infra = CommonInfrastructure(trade_account=trade_account, trade_exchange=trade_exchange)
     trade_executor = init_instance_by_config(executor, accept_types=BaseExecutor, common_infra=common_infra)
 
-    return common_infra, trade_executor
+    return trade_executor
 
 
 class QlibOrderDataset(Dataset):
@@ -47,19 +46,180 @@ class QlibOrderDataset(Dataset):
         return self.orders[index]
 
 
-class OrderEnv(gym.Env):
+class DummyCallable:
+    def __call__(self, *args, **kwargs):
+        if args:
+            return args[0]
+        if kwargs:
+            for v in kwargs.values():
+                return v
+
+
+class DummyPolicy(BasePolicy):
+    def forward(self, batch, state=None, **kwargs):
+        return Batch(act=0)
+
+    def learn(self, *args, **kwargs):
+        pass
+
+
+@dataclass
+class EpisodicState:
+    """
+    A simplified data structure for RL-related components to process observations and rewards
+    """
+    # requirements
+    start_time: int
+    end_time: int
+    num_step: int
+    time_per_step: int
+    target: float
+    target_limit: float
+    vol_limit: Optional[float]
+    flow_dir: int
+    market_price: np.ndarray
+    market_vol: np.ndarray
+
+    # agent state
+    cur_time: int = -1
+    cur_step: int = 0
+    done: bool = False
+    position: Optional[float] = None
+    exec_vol: Optional[np.ndarray] = None
+    last_step_duration: Optional[int] = None
+    position_history: Optional[np.ndarray] = None
+
+    # calculated statistics
+    turnover: Optional[float] = None
+    baseline_twap: Optional[float] = None
+    baseline_vwap: Optional[float] = None
+    exec_avg_price: Optional[float] = None
+    pa_twap: Optional[float] = None
+    pa_vwap: Optional[float] = None
+    fulfill_rate: Optional[float] = None
+
+    def __post_init__(self):
+        assert self.target >= 0
+        self.cur_time = self.start_time
+        self.position = self.target
+        self.position_history = np.full((self.num_step + 1), np.nan)
+        self.position_history[0] = self.position
+        self.baseline_twap = np.mean(self.market_price)
+        if self.market_vol.sum() == 0:
+            self.baseline_vwap = np.mean(self.market_price)
+        else:
+            self.baseline_vwap = np.average(self.market_price, weights=self.market_vol)
+
+    def update_stats(self):
+        market_price = self.market_price[:len(self.exec_vol)]
+        self.turnover = (self.exec_vol * market_price).sum()
+        # exec_vol can be zero
+        if np.isclose(self.exec_vol.sum(), 0):
+            self.exec_avg_price = market_price[0]
+        else:
+            self.exec_avg_price = np.average(market_price, weights=self.exec_vol)
+        self.pa_twap = price_advantage(self.exec_avg_price, self.baseline_twap, self.flow_dir)
+        self.pa_vwap = price_advantage(self.exec_avg_price, self.baseline_vwap, self.flow_dir)
+        self.fulfill_rate = (self.target - self.position) / self.target_limit
+        if abs(self.fulfill_rate - 1.0) < EPSILON:
+            self.fulfill_rate = 1.0
+        self.fulfill_rate *= 100
+
+    def logs(self):
+        logs = {
+            'stop_time': self.cur_time - self.start_time,
+            'stop_step': self.cur_step,
+            'turnover': self.turnover,
+            'baseline_twap': self.baseline_twap,
+            'baseline_vwap': self.baseline_vwap,
+            'exec_avg_price': self.exec_avg_price,
+            'pa_twap': self.pa_twap,
+            'pa_vwap': self.pa_vwap,
+            'ffr': self.fulfill_rate
+        }
+        return logs
+
+    def next_duration(self) -> int:
+        return min(self.time_per_step, self.end_time - self.cur_time)
+
+    def step(self, exec_vol):
+        self.last_step_duration = len(exec_vol)
+        self.position -= exec_vol.sum()
+        assert self.position > -EPSILON and (exec_vol > -EPSILON).all(), \
+            f'Execution volume is invalid: {exec_vol} (position = {self.position})'
+        self.position_history[self.cur_step + 1] = self.position
+        self.cur_time += self.last_step_duration
+        self.cur_step += 1
+        if self.cur_step == self.num_step:
+            assert self.cur_time == self.end_time
+        if self.exec_vol is None:
+            self.exec_vol = exec_vol
+        else:
+            self.exec_vol = np.concatenate((self.exec_vol, exec_vol))
+
+        self.done = self.position < EPSILON or self.cur_step == self.num_step
+        if self.done:
+            self.update_stats()
+
+        l, r = self.cur_time - self.last_step_duration - self.start_time, self.cur_time - self.start_time
+        assert 0 <= l < r
+        return StepState(self.exec_vol[l:r], self.market_vol[l:r], self.market_price[l:r], self)
+
+
+@dataclass
+class StepState:
+    exec_vol: np.ndarray
+    market_vol: np.ndarray
+    market_price: np.ndarray
+
+    # episode info
+    episode_state: EpisodicState
+
+    # calculated statistics
+    turnover: Optional[float] = None
+    exec_avg_price: Optional[float] = None
+    pa_twap: Optional[float] = None
+    pa_vwap: Optional[float] = None
+
+    def __post_init__(self):
+        assert len(self.exec_vol) == len(self.market_price) == len(self.market_vol)
+        self.turnover = (self.exec_vol * self.market_price).sum()
+        if np.isclose(self.market_vol.sum(), 0):
+            self.exec_avg_price = self.market_price[0]
+        else:
+            self.exec_avg_price = np.average(self.market_price, weights=self.market_vol)
+        self.pa_twap = price_advantage(self.exec_avg_price, self.episode_state.baseline_twap,
+                                       self.episode_state.flow_dir)
+        self.pa_vwap = price_advantage(self.exec_avg_price, self.episode_state.baseline_vwap,
+                                       self.episode_state.flow_dir)
+
+
+def price_advantage(exec_price: float, baseline_price: float, flow: FlowDirection) -> float:
+    if baseline_price == 0:
+        return 0.
+    if flow == FlowDirection.ACQUIRE:
+        return (1 - exec_price / baseline_price) * 10000
+    else:
+        return (exec_price / baseline_price - 1) * 10000
+
+
+
+class SingleOrderEnv(gym.Env):
+    MAX_STEPS = 10
     def __init__(self,
-                 state_interpreter: StateInterpreter,
-                 action_interpreter: ActionInterpreter,
+                 observation: StateInterpreter,
+                 action: ActionInterpreter,
                  reward: Any,
                  dataloader: Iterable,
                  executor: BaseExecutor):
-        self.action_interpreter = action_interpreter
-        self.state_interpreter = state_interpreter
+        self.action = action
+        self.observation = observation
         self.reward = reward
         self.dataloader = dataloader
         self.executor = executor
 
+        self.inner_frequency = self.executor.get_all_executor()[-1].time_per_step
+
     @property
     def action_space(self):
         return self.action.action_space
@@ -68,32 +228,53 @@ class OrderEnv(gym.Env):
     def observation_space(self):
         return self.observation.observation_space
 
+    def retrieve_data(self, cur_order: Order):
+        return D.features(
+            [cur_order.stock_id],
+            ['$open', '$close', '$high', '$low', '$volume'],
+            start_time=cur_order.start_time.date(),
+            end_time=cur_order.end_time.date(),
+            freq=self.inner_frequency
+        )
+
+    def initialize_state(self):
+        self.executor.reset(start_time=self.cur_order.start_time, end_time=self.cur_order.end_time)
+        return EpisodicState()
+
+    def update_state(self, action):
+        trade_decision = action
+        execute_result = self.executor.execute(trade_decision)
+
     def reset(self):
         try:
-            self.cur_order = next(self.dataloader)
+            cur_order = next(self.dataloader)
         except StopIteration:
             self.dataloader = None
             return None
 
-        self.executor.reset(start_time=self.cur_order.start_time, end_time=self.cur_order.end_time)
-        self.level_infra = self.executor.get_level_infra()
+        self.cur_sample = self._retrieve_data(cur_order)
         self.execute_result = []
+        self.ep_state = self.initialize_state()
+
+        self.action_history = np.full(self.MAX_STEPS, np.nan)
+        return self.observation(self.cur_sample, self.ep_state)
+
 
         # TODO: how to fetch data after feature engineering?
 
         # TODO: can be rewritten as dataclasses.asdict(self.cur_order) is Order is written to be a dataclass
-        return self.state_interpreter(self.cur_order, self.level_infra)
+        return self.observation
 
     def step(self, action):
         assert self.dataloader is not None
 
         assert not self.executor.finished()
 
-        trade_decision = self.action_interpreter(action)
-        self.execute_result.extend(self.executor.execute(trade_decision))
-        reward, rew_info = self.reward()
+        exec_vol = self.action(action, self.ep_state)
+        step_state = self.ep_state.step(exec_vol)
+
+        reward, rew_info = self.reward(self.ep_state, step_state)
 
-        done = self.executor.finished()
         info = {
             'action_history': self.action_history,
             'category': self.ep_state.flow_dir.value,
@@ -102,31 +283,45 @@ class OrderEnv(gym.Env):
         if self.ep_state.done:
             info['logs'] = self.ep_state.logs()
             info['index'] = {
-                'ins': self._sample.ins,
-                'date': self._sample.date
+                'ins': self.cur_sample.ins,
+                'date': self.cur_sample.date
             }
 
-        # TODO: how to collect metrics
-        return self.state_interpreter(self.cur_order, self.level_infra), reward, done, info
+        return self.observation(self.cur_sample, self.ep_state), reward, self.ep_state.done, info
+
+
+def _init_qlib():
+    provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
+    if not exists_qlib_data(provider_uri):
+        print(f"Qlib data is not found in {provider_uri}")
+        GetData().qlib_data(target_dir=provider_uri, region=REG_CN)
+    qlib.init(provider_uri=provider_uri, region=REG_CN)
 
 
 def _main():
-    executor_config = {
-        "class": "SimulatorExecutor",
-        "module_path": "qlib.backtest.executor",
-        "kwargs": {
-            "time_per_step": "day",
-            "verbose": True,
-            "generate_report": True,
-        }
-    }
+    _init_qlib()
+
     # TODO: why is there a benchmark?
     trade_start_time = "2017-01-01"
     trade_end_time = "2020-08-01"
     benchmark = "SH000300"
+    time_per_step = "day"
+    executor_config = {
+        "class": "SimulatorExecutor",
+        "module_path": "qlib.backtest.executor",
+        "kwargs": {
+            "time_per_step": time_per_step,
+            "verbose": True,
+            "generate_report": False,
+        }
+    }
     executor = get_executor(
-        trade_start_time, trade_end_time, executor_config,
-        benchmark, 1000000000, exchange_kwargs={
+        trade_start_time,
+        trade_end_time,
+        executor_config,
+        benchmark,
+        1000000000,
+        exchange_kwargs={
             "freq": "day",
             "limit_threshold": 0.095,
             "deal_price": "close",
@@ -135,3 +330,27 @@ def _main():
             "min_cost": 5,
         }
     )
+
+    import pdb; pdb.set_trace()
+
+    observation = DummyCallable()
+    action = DummyCallable()
+    reward_fn = DummyCallable()
+    # TODO: this probably won't work with multiprocess
+    dataloader = iter(DataLoader(QlibOrderDataset('rl.pkl'), batch_size=None, shuffle=True))
+
+    def dummy_env(): return OrderEnv(observation, action, reward_fn, dataloader, executor)
+    policy = DummyPolicy()
+
+    # env = dummy_env()
+    # obs = env.reset()
+    # print(obs.__dict__)
+
+    envs = DummyVectorEnv([dummy_env for _ in range(4)])
+    test_collector = Collector(policy, envs)
+    policy.eval()
+    test_collector.collect(n_episode=10)
+
+
+if __name__ == '__main__':
+    _main()

From 3200bb88c85a754b0282832741e3e0a2258e88b1 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <Yuge.Zhang@microsoft.com>
Date: Wed, 2 Jun 2021 15:11:38 +0800
Subject: [PATCH 05/28] Update an initial version of RL

---
 rl_playground.py | 293 +++++++++++++++++++++++++++++++----------------
 1 file changed, 194 insertions(+), 99 deletions(-)

diff --git a/rl_playground.py b/rl_playground.py
index de1fb15dd..cac9134c6 100644
--- a/rl_playground.py
+++ b/rl_playground.py
@@ -1,10 +1,12 @@
 import pickle
-from dataclasses import dataclass
-from typing import Iterable, Any
+from dataclasses import dataclass, asdict
+from typing import Iterable, Any, Optional, Tuple, Dict
 
-import numpy as np
 import gym
+import numpy as np
+import pandas as pd
 import qlib
+from gym import spaces
 from qlib.backtest import get_exchange, Account, BaseExecutor, CommonInfrastructure, Order
 from qlib.config import REG_CN
 from qlib.data import D
@@ -17,7 +19,10 @@ from tianshou.env import DummyVectorEnv
 from tianshou.policy import BasePolicy
 
 
-def get_executor(start_time, end_time, executor, benchmark="SH000300", account=1e9, exchange_kwargs={}):
+MAX_STEPS = 10
+
+
+def get_executor(start_time, end_time, executor, benchmark="SH000300", account=1e9, exchange_kwargs={}) -> BaseExecutor:
     trade_account = Account(
         init_cash=account,
         benchmark_config={
@@ -34,6 +39,19 @@ def get_executor(start_time, end_time, executor, benchmark="SH000300", account=1
     return trade_executor
 
 
+def price_advantage(exec_price: float, baseline_price: float, direction: int) -> float:
+    if baseline_price == 0:
+        return 0.
+    if direction == 1:
+        return (1 - exec_price / baseline_price) * 10000
+    else:
+        return (exec_price / baseline_price - 1) * 10000
+
+
+def _to_int32(val): return np.array(int(val), dtype=np.int32)
+def _to_float32(val): return np.array(val, dtype=np.float32)
+
+
 class QlibOrderDataset(Dataset):
     def __init__(self, order_file):
         with open(order_file, 'rb') as f:
@@ -46,18 +64,10 @@ class QlibOrderDataset(Dataset):
         return self.orders[index]
 
 
-class DummyCallable:
-    def __call__(self, *args, **kwargs):
-        if args:
-            return args[0]
-        if kwargs:
-            for v in kwargs.values():
-                return v
-
-
 class DummyPolicy(BasePolicy):
     def forward(self, batch, state=None, **kwargs):
-        return Batch(act=0)
+        print(batch)
+        return Batch(act=np.random.randint(5))
 
     def learn(self, *args, **kwargs):
         pass
@@ -69,20 +79,22 @@ class EpisodicState:
     A simplified data structure for RL-related components to process observations and rewards
     """
     # requirements
-    start_time: int
-    end_time: int
-    num_step: int
-    time_per_step: int
+    stock_id: int
+    start_time: pd.Timestamp
+    end_time: pd.Timestamp
+    direction: int
     target: float
-    target_limit: float
-    vol_limit: Optional[float]
-    flow_dir: int
+    num_step: int
+
+    # simplified market data used to calculate backtest metrics
+    # this may contains information from future so be careful
     market_price: np.ndarray
     market_vol: np.ndarray
 
     # agent state
-    cur_time: int = -1
+    cur_time: Optional[pd.Timestamp] = None
     cur_step: int = 0
+    cur_tick: int = 0  # tick is the most fine-grained time unit (typically minute)
     done: bool = False
     position: Optional[float] = None
     exec_vol: Optional[np.ndarray] = None
@@ -100,6 +112,7 @@ class EpisodicState:
 
     def __post_init__(self):
         assert self.target >= 0
+        assert len(self.market_price) == len(self.market_vol)
         self.cur_time = self.start_time
         self.position = self.target
         self.position_history = np.full((self.num_step + 1), np.nan)
@@ -118,10 +131,10 @@ class EpisodicState:
             self.exec_avg_price = market_price[0]
         else:
             self.exec_avg_price = np.average(market_price, weights=self.exec_vol)
-        self.pa_twap = price_advantage(self.exec_avg_price, self.baseline_twap, self.flow_dir)
-        self.pa_vwap = price_advantage(self.exec_avg_price, self.baseline_vwap, self.flow_dir)
-        self.fulfill_rate = (self.target - self.position) / self.target_limit
-        if abs(self.fulfill_rate - 1.0) < EPSILON:
+        self.pa_twap = price_advantage(self.exec_avg_price, self.baseline_twap, self.direction)
+        self.pa_vwap = price_advantage(self.exec_avg_price, self.baseline_vwap, self.direction)
+        self.fulfill_rate = (self.target - self.position) / self.target
+        if abs(self.fulfill_rate - 1.0) < 1e-5:
             self.fulfill_rate = 1.0
         self.fulfill_rate *= 100
 
@@ -139,35 +152,10 @@ class EpisodicState:
         }
         return logs
 
-    def next_duration(self) -> int:
-        return min(self.time_per_step, self.end_time - self.cur_time)
-
-    def step(self, exec_vol):
-        self.last_step_duration = len(exec_vol)
-        self.position -= exec_vol.sum()
-        assert self.position > -EPSILON and (exec_vol > -EPSILON).all(), \
-            f'Execution volume is invalid: {exec_vol} (position = {self.position})'
-        self.position_history[self.cur_step + 1] = self.position
-        self.cur_time += self.last_step_duration
-        self.cur_step += 1
-        if self.cur_step == self.num_step:
-            assert self.cur_time == self.end_time
-        if self.exec_vol is None:
-            self.exec_vol = exec_vol
-        else:
-            self.exec_vol = np.concatenate((self.exec_vol, exec_vol))
-
-        self.done = self.position < EPSILON or self.cur_step == self.num_step
-        if self.done:
-            self.update_stats()
-
-        l, r = self.cur_time - self.last_step_duration - self.start_time, self.cur_time - self.start_time
-        assert 0 <= l < r
-        return StepState(self.exec_vol[l:r], self.market_vol[l:r], self.market_price[l:r], self)
-
 
 @dataclass
 class StepState:
+    # market info and execution volume for current step
     exec_vol: np.ndarray
     market_vol: np.ndarray
     market_price: np.ndarray
@@ -189,23 +177,109 @@ class StepState:
         else:
             self.exec_avg_price = np.average(self.market_price, weights=self.market_vol)
         self.pa_twap = price_advantage(self.exec_avg_price, self.episode_state.baseline_twap,
-                                       self.episode_state.flow_dir)
+                                       self.episode_state.direction)
         self.pa_vwap = price_advantage(self.exec_avg_price, self.episode_state.baseline_vwap,
-                                       self.episode_state.flow_dir)
+                                       self.episode_state.direction)
 
 
-def price_advantage(exec_price: float, baseline_price: float, flow: FlowDirection) -> float:
-    if baseline_price == 0:
+class Observation:
+    def __init__(self, time_per_step):
+        self.time_per_step = time_per_step
+
+    def __call__(self, ep_state: EpisodicState) -> Any:
+        obs = self.observe(ep_state)
+        if not self.validate(obs):
+            raise ValueError(f'Observation space does not contain obs. Space: {self.observation_space} Sample: {obs}')
+        return obs
+
+    def validate(self, obs: Any) -> bool:
+        return self.observation_space.contains(obs)
+
+    @property
+    def observation_space(self):
+        space = {
+            'direction': spaces.Discrete(2),
+            'cur_step': spaces.Box(0, MAX_STEPS - 1, shape=(), dtype=np.int32),
+            'num_step': spaces.Box(MAX_STEPS, MAX_STEPS, shape=(), dtype=np.int32),
+            'target': spaces.Box(-1e-5, np.inf, shape=()),
+            'position': spaces.Box(-1e-5, np.inf, shape=()),
+            'features': spaces.Box(-np.inf, np.inf, shape=(5, ))
+        }
+        return spaces.Dict(space)
+
+    def observe(self, ep_state: EpisodicState) -> Any:
+        return {
+            'acquiring': _to_int32(ep_state.direction),
+            'cur_step': _to_int32(min(ep_state.cur_step, ep_state.num_step - 1)),
+            'num_step': _to_int32(ep_state.num_step),
+            'target': _to_float32(ep_state.target),
+            'position': _to_float32(ep_state.position),
+            'features': D.features(
+                [ep_state.stock_id],
+                ['$open', '$close', '$high', '$low', '$volume'],
+                start_time=ep_state.start_time,
+                end_time=ep_state.end_time,
+                freq=self.time_per_step
+            )
+        }
+
+
+class Action:
+    @property
+    def action_space(self):
+        return spaces.Discrete(5)
+
+    def __call__(self, action: Any, ep_state: EpisodicState) -> Any:
+        if not self.validate(action):
+            raise ValueError(f'Action space does not contain action. Space: {self.action_space} Sample: {action}')
+        act_ = self.to_volume(action, ep_state)
+        return act_
+
+    def validate(self, action: Any) -> bool:
+        return self.action_space.contains(action)
+
+    def to_volume(self, action: Any, ep_state: EpisodicState):
+        exec_vol = ep_state.position / 5 * action
+        if ep_state.cur_step + 1 >= ep_state.num_step:
+            exec_vol = ep_state.position
+        # TODO: might need to check whether the stock is tradable or whether it satisfies trade unit?
+        return exec_vol
+
+
+class Reward:
+    weight = 1.0
+
+    def __call__(self, ep_state: EpisodicState, st_state: StepState) -> Tuple[float, Dict[str, float]]:
+        rew, info = 0., {}
+        if ep_state.done:
+            ep_rew, ep_info = self._to_tuple(self.episode_end(ep_state))
+            rew += ep_rew
+            info.update({f'ep/{k}': v for k, v in ep_info.items()})
+        st_rew, st_info = self._to_tuple(self.step_end(ep_state, st_state))
+        rew += st_rew
+        info.update({f'st/{k}': v for k, v in st_info.items()})
+        return rew * self.weight, info
+
+    @staticmethod
+    def _to_tuple(x):
+        if isinstance(x, tuple):
+            return x
+        return x, {}
+
+    def episode_end(self, ep_state: EpisodicState) -> Tuple[float, Dict[str, float]]:
         return 0.
-    if flow == FlowDirection.ACQUIRE:
-        return (1 - exec_price / baseline_price) * 10000
-    else:
-        return (exec_price / baseline_price - 1) * 10000
+
+    def step_end(self, ep_state: EpisodicState, st_state: StepState) -> Tuple[float, Dict[str, float]]:
+        assert ep_state.target > 0
+        baseline_price = st_state.pa_twap
+        pa = baseline_price * st_state.exec_vol.sum() / ep_state.target
+        penalty = -self.penalty * ((st_state.exec_vol / ep_state.target) ** 2).sum()
+        reward = pa + penalty
+        return reward, {'pa': pa, 'penalty': penalty}
 
 
 
 class SingleOrderEnv(gym.Env):
-    MAX_STEPS = 10
     def __init__(self,
                  observation: StateInterpreter,
                  action: ActionInterpreter,
@@ -228,50 +302,73 @@ class SingleOrderEnv(gym.Env):
     def observation_space(self):
         return self.observation.observation_space
 
-    def retrieve_data(self, cur_order: Order):
+    def retrieve_backtest_data(self, field: str):
         return D.features(
-            [cur_order.stock_id],
+            [self.cur_order.stock_id],
             ['$open', '$close', '$high', '$low', '$volume'],
-            start_time=cur_order.start_time.date(),
-            end_time=cur_order.end_time.date(),
+            start_time=self.cur_order.start_time,
+            end_time=self.cur_order.end_time,
             freq=self.inner_frequency
-        )
+        )[field].to_numpy()
 
     def initialize_state(self):
         self.executor.reset(start_time=self.cur_order.start_time, end_time=self.cur_order.end_time)
-        return EpisodicState()
+        return EpisodicState(
+            stock_id=self.cur_order.stock_id,
+            start_time=self.cur_order.start_time,
+            end_time=self.cur_order.end_time,
+            direction=self.cur_order.direction,
+            target=self.cur_order.amount,
+            num_step=self.executor.trade_calendar.get_trade_len(),
+            market_price=self.retrieve_backtest_data('$close'),
+            market_vol=self.retrieve_backtest_data('$volume'),
+        )
 
-    def update_state(self, action):
-        trade_decision = action
-        execute_result = self.executor.execute(trade_decision)
+    def update_state(self, exec_vol):
+        trade_step = self.trade_calendar.get_trade_step()
+        trade_start_time = self.executor.trade_calendar.get_step_time(trade_step)
+        trade_end_time = self.executor.trade_calendar.get_step_time(trade_step, shift=1)
+        trade_decision = Order(**asdict(self.cur_order),
+            start_time=trade_start_time, end_time=trade_end_time, amount=exec_vol)
+        execute_result = self.executor.execute([trade_decision])
+        cur_tick = self.ep_state.cur_tick
+
+        inner_exec_vol = np.array([order.deal_amount for order, _, __, ___ in execute_result])
+        ticks_this_step = len(inner_exec_vol)
+        state = self.ep_state
+        state.cur_step = trade_step = self.executor.trade_calendar.get_trade_step()
+        state.cur_time = self.executor.trade_calendar.get_step_time(trade_step)
+        state.cur_tick += ticks_this_step
+        state.position -= np.sum(inner_exec_vol)
+        state.position_history[trade_step] = state.position
+        state.exec_vol = inner_exec_vol if state.exec_vol is None else np.concatenate((state.exec_vol, inner_exec_vol))
+
+        state.done = self.executor.finished()
+        if state.done:
+            state.update_stats()
+
+        l, r = cur_tick, cur_tick + ticks_this_step
+        assert 0 <= l < r
+        return StepState(inner_exec_vol, state.market_vol[l:r], state.market_price[l:r], state)
 
     def reset(self):
         try:
-            cur_order = next(self.dataloader)
+            self.cur_order = next(self.dataloader)
         except StopIteration:
             self.dataloader = None
             return None
 
-        self.cur_sample = self._retrieve_data(cur_order)
         self.execute_result = []
         self.ep_state = self.initialize_state()
 
-        self.action_history = np.full(self.MAX_STEPS, np.nan)
+        self.action_history = np.full(self.ep_state.num_step, np.nan)
         return self.observation(self.cur_sample, self.ep_state)
 
-
-        # TODO: how to fetch data after feature engineering?
-
-        # TODO: can be rewritten as dataclasses.asdict(self.cur_order) is Order is written to be a dataclass
-        return self.observation
-
     def step(self, action):
         assert self.dataloader is not None
 
-        assert not self.executor.finished()
-
         exec_vol = self.action(action, self.ep_state)
-        step_state = self.ep_state.step(exec_vol)
+        step_state = self.update_state(exec_vol)
 
         reward, rew_info = self.reward(self.ep_state, step_state)
 
@@ -283,8 +380,8 @@ class SingleOrderEnv(gym.Env):
         if self.ep_state.done:
             info['logs'] = self.ep_state.logs()
             info['index'] = {
-                'ins': self.cur_sample.ins,
-                'date': self.cur_sample.date
+                'ins': self.ep_state.stock_id,
+                'date': self.ep_state.start_time,
             }
 
         return self.observation(self.cur_sample, self.ep_state), reward, self.ep_state.done, info
@@ -331,25 +428,23 @@ def _main():
         }
     )
 
-    import pdb; pdb.set_trace()
+    observation = Observation(time_per_step)
+    action = Action()
+    reward_fn = Reward()
 
-    observation = DummyCallable()
-    action = DummyCallable()
-    reward_fn = DummyCallable()
-    # TODO: this probably won't work with multiprocess
-    dataloader = iter(DataLoader(QlibOrderDataset('rl.pkl'), batch_size=None, shuffle=True))
-
-    def dummy_env(): return OrderEnv(observation, action, reward_fn, dataloader, executor)
+    def dummy_env(): return SingleOrderEnv(
+        observation, action, reward_fn,
+        DataLoader(QlibOrderDataset('rl.pkl'), batch_size=None, shuffle=True), executor)
     policy = DummyPolicy()
 
-    # env = dummy_env()
-    # obs = env.reset()
-    # print(obs.__dict__)
+    env = dummy_env()
+    obs = env.reset()
+    print(obs)
 
-    envs = DummyVectorEnv([dummy_env for _ in range(4)])
-    test_collector = Collector(policy, envs)
-    policy.eval()
-    test_collector.collect(n_episode=10)
+    # envs = DummyVectorEnv([dummy_env for _ in range(4)])
+    # test_collector = Collector(policy, envs)
+    # policy.eval()
+    # test_collector.collect(n_episode=10)
 
 
 if __name__ == '__main__':

From d515efb46e069a7334e5ee26cebb6a3adffc7908 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <Yuge.Zhang@microsoft.com>
Date: Wed, 2 Jun 2021 16:41:18 +0800
Subject: [PATCH 06/28] Finish RL dummy example

---
 qlib/backtest/order.py |   4 +-
 rl_playground.py       | 345 +++++++++++++++++++++--------------------
 2 files changed, 183 insertions(+), 166 deletions(-)

diff --git a/qlib/backtest/order.py b/qlib/backtest/order.py
index e4bf41f1e..47a859aa3 100644
--- a/qlib/backtest/order.py
+++ b/qlib/backtest/order.py
@@ -2,7 +2,7 @@
 # Licensed under the MIT License.
 import pandas as pd
 from dataclasses import dataclass, field
-from typing import ClassVar
+from typing import ClassVar, Optional
 
 
 @dataclass
@@ -26,7 +26,7 @@ class Order:
     end_time: pd.Timestamp
     direction: int
     factor: float
-    deal_amount: float = field(init=False)
+    deal_amount: Optional[float] = None
     SELL: ClassVar[int] = 0
     BUY: ClassVar[int] = 1
 
diff --git a/rl_playground.py b/rl_playground.py
index cac9134c6..482615215 100644
--- a/rl_playground.py
+++ b/rl_playground.py
@@ -1,5 +1,6 @@
 import pickle
 from dataclasses import dataclass, asdict
+from pprint import pprint
 from typing import Iterable, Any, Optional, Tuple, Dict
 
 import gym
@@ -22,7 +23,7 @@ from tianshou.policy import BasePolicy
 MAX_STEPS = 10
 
 
-def get_executor(start_time, end_time, executor, benchmark="SH000300", account=1e9, exchange_kwargs={}) -> BaseExecutor:
+def get_executor(start_time, end_time, executor, exchange, benchmark="SH000300", account=1e9) -> BaseExecutor:
     trade_account = Account(
         init_cash=account,
         benchmark_config={
@@ -31,9 +32,8 @@ def get_executor(start_time, end_time, executor, benchmark="SH000300", account=1
             "end_time": end_time,
         },
     )
-    trade_exchange = get_exchange(**exchange_kwargs)
 
-    common_infra = CommonInfrastructure(trade_account=trade_account, trade_exchange=trade_exchange)
+    common_infra = CommonInfrastructure(trade_account=trade_account, trade_exchange=exchange)
     trade_executor = init_instance_by_config(executor, accept_types=BaseExecutor, common_infra=common_infra)
 
     return trade_executor
@@ -48,31 +48,6 @@ def price_advantage(exec_price: float, baseline_price: float, direction: int) ->
         return (exec_price / baseline_price - 1) * 10000
 
 
-def _to_int32(val): return np.array(int(val), dtype=np.int32)
-def _to_float32(val): return np.array(val, dtype=np.float32)
-
-
-class QlibOrderDataset(Dataset):
-    def __init__(self, order_file):
-        with open(order_file, 'rb') as f:
-            self.orders = pickle.load(f)
-
-    def __len__(self):
-        return len(self.orders)
-
-    def __getitem__(self, index):
-        return self.orders[index]
-
-
-class DummyPolicy(BasePolicy):
-    def forward(self, batch, state=None, **kwargs):
-        print(batch)
-        return Batch(act=np.random.randint(5))
-
-    def learn(self, *args, **kwargs):
-        pass
-
-
 @dataclass
 class EpisodicState:
     """
@@ -182,103 +157,6 @@ class StepState:
                                        self.episode_state.direction)
 
 
-class Observation:
-    def __init__(self, time_per_step):
-        self.time_per_step = time_per_step
-
-    def __call__(self, ep_state: EpisodicState) -> Any:
-        obs = self.observe(ep_state)
-        if not self.validate(obs):
-            raise ValueError(f'Observation space does not contain obs. Space: {self.observation_space} Sample: {obs}')
-        return obs
-
-    def validate(self, obs: Any) -> bool:
-        return self.observation_space.contains(obs)
-
-    @property
-    def observation_space(self):
-        space = {
-            'direction': spaces.Discrete(2),
-            'cur_step': spaces.Box(0, MAX_STEPS - 1, shape=(), dtype=np.int32),
-            'num_step': spaces.Box(MAX_STEPS, MAX_STEPS, shape=(), dtype=np.int32),
-            'target': spaces.Box(-1e-5, np.inf, shape=()),
-            'position': spaces.Box(-1e-5, np.inf, shape=()),
-            'features': spaces.Box(-np.inf, np.inf, shape=(5, ))
-        }
-        return spaces.Dict(space)
-
-    def observe(self, ep_state: EpisodicState) -> Any:
-        return {
-            'acquiring': _to_int32(ep_state.direction),
-            'cur_step': _to_int32(min(ep_state.cur_step, ep_state.num_step - 1)),
-            'num_step': _to_int32(ep_state.num_step),
-            'target': _to_float32(ep_state.target),
-            'position': _to_float32(ep_state.position),
-            'features': D.features(
-                [ep_state.stock_id],
-                ['$open', '$close', '$high', '$low', '$volume'],
-                start_time=ep_state.start_time,
-                end_time=ep_state.end_time,
-                freq=self.time_per_step
-            )
-        }
-
-
-class Action:
-    @property
-    def action_space(self):
-        return spaces.Discrete(5)
-
-    def __call__(self, action: Any, ep_state: EpisodicState) -> Any:
-        if not self.validate(action):
-            raise ValueError(f'Action space does not contain action. Space: {self.action_space} Sample: {action}')
-        act_ = self.to_volume(action, ep_state)
-        return act_
-
-    def validate(self, action: Any) -> bool:
-        return self.action_space.contains(action)
-
-    def to_volume(self, action: Any, ep_state: EpisodicState):
-        exec_vol = ep_state.position / 5 * action
-        if ep_state.cur_step + 1 >= ep_state.num_step:
-            exec_vol = ep_state.position
-        # TODO: might need to check whether the stock is tradable or whether it satisfies trade unit?
-        return exec_vol
-
-
-class Reward:
-    weight = 1.0
-
-    def __call__(self, ep_state: EpisodicState, st_state: StepState) -> Tuple[float, Dict[str, float]]:
-        rew, info = 0., {}
-        if ep_state.done:
-            ep_rew, ep_info = self._to_tuple(self.episode_end(ep_state))
-            rew += ep_rew
-            info.update({f'ep/{k}': v for k, v in ep_info.items()})
-        st_rew, st_info = self._to_tuple(self.step_end(ep_state, st_state))
-        rew += st_rew
-        info.update({f'st/{k}': v for k, v in st_info.items()})
-        return rew * self.weight, info
-
-    @staticmethod
-    def _to_tuple(x):
-        if isinstance(x, tuple):
-            return x
-        return x, {}
-
-    def episode_end(self, ep_state: EpisodicState) -> Tuple[float, Dict[str, float]]:
-        return 0.
-
-    def step_end(self, ep_state: EpisodicState, st_state: StepState) -> Tuple[float, Dict[str, float]]:
-        assert ep_state.target > 0
-        baseline_price = st_state.pa_twap
-        pa = baseline_price * st_state.exec_vol.sum() / ep_state.target
-        penalty = -self.penalty * ((st_state.exec_vol / ep_state.target) ** 2).sum()
-        reward = pa + penalty
-        return reward, {'pa': pa, 'penalty': penalty}
-
-
-
 class SingleOrderEnv(gym.Env):
     def __init__(self,
                  observation: StateInterpreter,
@@ -313,7 +191,7 @@ class SingleOrderEnv(gym.Env):
 
     def initialize_state(self):
         self.executor.reset(start_time=self.cur_order.start_time, end_time=self.cur_order.end_time)
-        return EpisodicState(
+        state = EpisodicState(
             stock_id=self.cur_order.stock_id,
             start_time=self.cur_order.start_time,
             end_time=self.cur_order.end_time,
@@ -323,29 +201,37 @@ class SingleOrderEnv(gym.Env):
             market_price=self.retrieve_backtest_data('$close'),
             market_vol=self.retrieve_backtest_data('$volume'),
         )
+        state.cur_step = self.executor.trade_calendar.get_trade_step()
+        assert state.cur_step == 0
+        state.cur_time, _ = self.executor.trade_calendar.get_step_time(state.cur_step)
+        return state
 
     def update_state(self, exec_vol):
-        trade_step = self.trade_calendar.get_trade_step()
-        trade_start_time = self.executor.trade_calendar.get_step_time(trade_step)
-        trade_end_time = self.executor.trade_calendar.get_step_time(trade_step, shift=1)
-        trade_decision = Order(**asdict(self.cur_order),
-            start_time=trade_start_time, end_time=trade_end_time, amount=exec_vol)
+        calendar = self.executor.trade_calendar
+        state = self.ep_state
+
+        trade_step = calendar.get_trade_step()
+        trade_start_time, trade_end_time = calendar.get_step_time(trade_step)
+        order_kwargs = asdict(self.cur_order)
+        order_kwargs.update(start_time=trade_start_time, end_time=trade_end_time, amount=exec_vol)
+        trade_decision = Order(**order_kwargs)
         execute_result = self.executor.execute([trade_decision])
-        cur_tick = self.ep_state.cur_tick
+        cur_tick = state.cur_tick
 
         inner_exec_vol = np.array([order.deal_amount for order, _, __, ___ in execute_result])
         ticks_this_step = len(inner_exec_vol)
-        state = self.ep_state
-        state.cur_step = trade_step = self.executor.trade_calendar.get_trade_step()
-        state.cur_time = self.executor.trade_calendar.get_step_time(trade_step)
+        state.cur_step = trade_step = calendar.get_trade_step()
         state.cur_tick += ticks_this_step
         state.position -= np.sum(inner_exec_vol)
         state.position_history[trade_step] = state.position
-        state.exec_vol = inner_exec_vol if state.exec_vol is None else np.concatenate((state.exec_vol, inner_exec_vol))
-
         state.done = self.executor.finished()
+        state.exec_vol = inner_exec_vol if state.exec_vol is None else \
+            np.concatenate((state.exec_vol, inner_exec_vol))
+
         if state.done:
             state.update_stats()
+        else:
+            state.cur_time, _ = calendar.get_step_time(trade_step)
 
         l, r = cur_tick, cur_tick + ticks_this_step
         assert 0 <= l < r
@@ -362,19 +248,23 @@ class SingleOrderEnv(gym.Env):
         self.ep_state = self.initialize_state()
 
         self.action_history = np.full(self.ep_state.num_step, np.nan)
-        return self.observation(self.cur_sample, self.ep_state)
+        return self.observation(self.ep_state)
 
     def step(self, action):
         assert self.dataloader is not None
+        assert not self.executor.finished()
+        self.action_history[self.ep_state.cur_step] = action
 
         exec_vol = self.action(action, self.ep_state)
         step_state = self.update_state(exec_vol)
+        if self.executor.finished():
+            assert self.ep_state.done
 
         reward, rew_info = self.reward(self.ep_state, step_state)
 
         info = {
             'action_history': self.action_history,
-            'category': self.ep_state.flow_dir.value,
+            'category': self.ep_state.direction,
             'reward': rew_info
         }
         if self.ep_state.done:
@@ -383,8 +273,9 @@ class SingleOrderEnv(gym.Env):
                 'ins': self.ep_state.stock_id,
                 'date': self.ep_state.start_time,
             }
+            pprint(info)
 
-        return self.observation(self.cur_sample, self.ep_state), reward, self.ep_state.done, info
+        return self.observation(self.ep_state), reward, self.ep_state.done, info
 
 
 def _init_qlib():
@@ -412,39 +303,165 @@ def _main():
             "generate_report": False,
         }
     }
-    executor = get_executor(
-        trade_start_time,
-        trade_end_time,
-        executor_config,
-        benchmark,
-        1000000000,
-        exchange_kwargs={
-            "freq": "day",
-            "limit_threshold": 0.095,
-            "deal_price": "close",
-            "open_cost": 0.0005,
-            "close_cost": 0.0015,
-            "min_cost": 5,
-        }
+    exchange = get_exchange(
+        freq="day",
+        limit_threshold=0.095,
+        deal_price="close",
+        open_cost=0.0005,
+        close_cost=0.0015,
+        min_cost=5
     )
 
     observation = Observation(time_per_step)
     action = Action()
     reward_fn = Reward()
 
-    def dummy_env(): return SingleOrderEnv(
-        observation, action, reward_fn,
-        DataLoader(QlibOrderDataset('rl.pkl'), batch_size=None, shuffle=True), executor)
+    def dummy_env():
+        executor = get_executor(
+            trade_start_time,
+            trade_end_time,
+            executor_config,
+            exchange,
+            benchmark,
+            1000000000,
+        )
+        return SingleOrderEnv(
+            observation, action, reward_fn,
+            iter(DataLoader(QlibOrderDataset('rl.pkl'), batch_size=None, shuffle=True)), executor)
+
     policy = DummyPolicy()
 
-    env = dummy_env()
-    obs = env.reset()
-    print(obs)
+    envs = DummyVectorEnv([dummy_env for _ in range(4)])
+    test_collector = Collector(policy, envs)
+    policy.eval()
+    test_collector.collect(n_episode=10)
 
-    # envs = DummyVectorEnv([dummy_env for _ in range(4)])
-    # test_collector = Collector(policy, envs)
-    # policy.eval()
-    # test_collector.collect(n_episode=10)
+
+### This is a full RL strategy ###
+
+
+class QlibOrderDataset(Dataset):
+    def __init__(self, order_file):
+        with open(order_file, 'rb') as f:
+            self.orders = pickle.load(f)
+
+    def __len__(self):
+        return len(self.orders)
+
+    def __getitem__(self, index):
+        return self.orders[index]
+
+
+class DummyPolicy(BasePolicy):
+    def forward(self, batch, state=None, **kwargs):
+        return Batch(act=np.random.randint(0, 5, size=(len(batch), )))
+
+    def learn(self, *args, **kwargs):
+        pass
+
+
+class Observation:
+    def __init__(self, time_per_step):
+        self.time_per_step = time_per_step
+
+    def __call__(self, ep_state: EpisodicState) -> Any:
+        obs = self.observe(ep_state)
+        if not self.validate(obs):
+            raise ValueError(f'Observation space does not contain obs. Space: {self.observation_space} Sample: {obs}')
+        return obs
+
+    def validate(self, obs: Any) -> bool:
+        return self.observation_space.contains(obs)
+
+    @property
+    def observation_space(self):
+        space = {
+            'direction': spaces.Discrete(2),
+            'cur_step': spaces.Box(0, MAX_STEPS, shape=(), dtype=np.int32),
+            'num_step': spaces.Box(0, MAX_STEPS, shape=(), dtype=np.int32),
+            'target': spaces.Box(-1e-5, np.inf, shape=()),
+            'position': spaces.Box(-1e-5, np.inf, shape=()),
+            'features': spaces.Box(-np.inf, np.inf, shape=(5, ))
+        }
+        return spaces.Dict(space)
+
+    def observe(self, ep_state: EpisodicState) -> Any:
+        return {
+            'direction': _to_int32(ep_state.direction),
+            'cur_step': _to_int32(min(ep_state.cur_step, ep_state.num_step - 1)),
+            'num_step': _to_int32(ep_state.num_step),
+            'target': _to_float32(ep_state.target),
+            'position': _to_float32(ep_state.position),
+            'features': D.features(
+                [ep_state.stock_id],
+                ['$open', '$close', '$high', '$low', '$volume'],
+                start_time=ep_state.start_time,
+                end_time=ep_state.end_time,
+                freq=self.time_per_step
+            ).loc[(ep_state.stock_id, ep_state.cur_time)].to_numpy(),
+        }
+
+
+class Action:
+    denominator = 4
+
+    @property
+    def action_space(self):
+        return spaces.Discrete(self.denominator + 1)
+
+    def __call__(self, action: Any, ep_state: EpisodicState) -> Any:
+        if not self.validate(action):
+            raise ValueError(f'Action space does not contain action. Space: {self.action_space} Sample: {action}')
+        act_ = self.to_volume(action, ep_state)
+        return act_
+
+    def validate(self, action: Any) -> bool:
+        return self.action_space.contains(action)
+
+    def to_volume(self, action: Any, ep_state: EpisodicState):
+        exec_vol = ep_state.position / self.denominator * action
+        if ep_state.cur_step + 1 >= ep_state.num_step:
+            exec_vol = ep_state.position
+        # TODO: might need to check whether the stock is tradable or whether it satisfies trade unit?
+        return exec_vol
+
+
+class Reward:
+    weight = 1.0
+
+    def __call__(self, ep_state: EpisodicState, st_state: StepState) -> Tuple[float, Dict[str, float]]:
+        rew, info = 0., {}
+        if ep_state.done:
+            ep_rew, ep_info = self._to_tuple(self.episode_end(ep_state))
+            rew += ep_rew
+            info.update({f'ep/{k}': v for k, v in ep_info.items()})
+        st_rew, st_info = self._to_tuple(self.step_end(ep_state, st_state))
+        rew += st_rew
+        info.update({f'st/{k}': v for k, v in st_info.items()})
+        return rew * self.weight, info
+
+    @staticmethod
+    def _to_tuple(x):
+        if isinstance(x, tuple):
+            return x
+        return x, {}
+
+    def episode_end(self, ep_state: EpisodicState) -> Tuple[float, Dict[str, float]]:
+        return 0.
+
+    def step_end(self, ep_state: EpisodicState, st_state: StepState) -> Tuple[float, Dict[str, float]]:
+        assert ep_state.target > 0
+        baseline_price = st_state.pa_twap
+        pa = baseline_price * st_state.exec_vol.sum() / ep_state.target
+        penalty = -100 * ((st_state.exec_vol / ep_state.target) ** 2).sum()  # penalize too much volume at one step
+        reward = pa + penalty
+        return reward, {'pa': pa, 'penalty': penalty}
+
+
+def _to_int32(val): return np.array(int(val), dtype=np.int32)
+def _to_float32(val): return np.array(val, dtype=np.float32)
+
+### End of RL strategy ###
 
 
 if __name__ == '__main__':

From cc8339acd925a2df0027ae64bb0b8a4a360ed504 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <Yuge.Zhang@microsoft.com>
Date: Wed, 2 Jun 2021 16:49:52 +0800
Subject: [PATCH 07/28] Add a few comments

---
 rl_orders        | Bin 0 -> 3464 bytes
 rl_playground.py |  16 +++++++++++++---
 2 files changed, 13 insertions(+), 3 deletions(-)
 create mode 100644 rl_orders

diff --git a/rl_orders b/rl_orders
new file mode 100644
index 0000000000000000000000000000000000000000..7902b901c000bfd82fb7fcc0386c588f3f78cbb4
GIT binary patch
literal 3464
zcmai$eM}Q~7{^;->?j+k)cJzoL`)bn&F}ioCT}CMX-(Nc=HkTe`a-UQzU+G4Bus`Q
zI>BQSJ%TO}L`B0UZjDnjmStp_EHR5j&`k+(i2|Dw#>|9GmWBPU9rq*4-LLmYFYP_~
z-1m8&@9%k97u&MuNk#Z7=QFwFx2oKBjh%8-vaSHD@i9&p!*h=nhwn%DXZG@YU=$Hx
zeU3_-+sSi8=}SfcOhCtTag@gw^s+p+%p3Iht2GWE<I;G7Rjiw07^)fu1RPWq#qdWU
zF&qN#PPW?b5`I}!VdfhSe889RnU)w%xskV8ty|{c75=a&Sl(Vpv=+TQSY7`hpWhGT
zxCEz*s<Qp}R*QmD1z3t<ZLHhl;<+?G<#kd4jSVIg(6E8X4L*WF9)E!6;<wFzfv;~Q
z6pEE!zVV<{ZM6y-IKt(1Mo4rDd}utg+rRvy`G@gNH{)`!Zl9O8D7m-_%E9^=o=%_>
zVH$dr2E9}wiP)^8GkM_zf_6G4Qc*e%7IFswTD`%@(*z_fDI|2)$?me_$pzt9dbveG
zub@-uH2P_Jtwl+v!=Uwrv709+a%EvAg9!UZb-%Y|U<n@f>HDQ}%7Dpe7Gb78?v?ND
zOvJ;c2G2?=6Z~Qzvqjjb>Wh7UcMV5+Dwk$~(<%vrH3Pt?6Je)8NAJDdB)qbph&f3Z
zR7L`Tl>J^@(iBX}!o$vOJcNOvZZ?uRz2~V7cv$<eG)%At0H^_%dnZRc7oNbwI(B|7
zrwka4K&;uN8m1`p`4qkZwTB1ftf4gk1iMijden6?7q4uf_GFHXX@OCTy7}jwQ*EQ2
zN<3`Cu2quC1fKyNs##An&HUW47!UiZElILwLIZR_FV^gB@31@WY&ssclX+e?&1{oN
zglU?_Girub;b9pnpQJLuv_>N`ZODF0d10vz53|RWOTvWXWLR*}S6m-_MO`qBw?_Vz
zOvwfaHPdTR&GPqMiNE!58D3eX@~EUTAx)hQS%Yc(Zt9o#=kT!BHPTZ<u!c4pHQ>$G
zyve?R77v@K@XIPQk;rE=LqiGwG)HMbd#Oea21ql4)sN1mzA(KGudM4wP7)?GK&vN3
zpB-%P?w@Rk#lub{zbe^Hp=M?if;GDQ>ANoK@vzksJ+jK+zXnm+{qJwi7HzqS_u2N2
z?U8{0?M*?M&Wx<__>JOEE~Uiam2G|PUCA_e=-?4BqrI6j=WQ+x#p7X*KD#TKrl5?>
zMmylzdy8|`Em7uK_o75;Wx{bHkuvqQnz8(kpTaA9N7X5>%z(zWZ+e<IkQGH){?wE#
z%%Dfzyn13yUmo!-zMFMd(<0I(x(Q5ZYK#>mj5i%p<J0`>_1m(_;NTX0w)?lfma&mM
zJgkmgFAFm**a5@A(LP&wl!fx_;d;4l0>gssba_1WTw7=aU$fym6_POMGq?lL)OfK!
zcs@f#;?qp&|4=qfok=U!?D*>1*`lPCcvwS^^mYpGO%nNxFPkXaoCNT&+GB*IvhX|u
zNLk6u!L7u)O?cSd99D7(3pInIR!q~dZQOS8Oq80b_n(qe28RZM{b0Er^8OlS1gRdL
Jo`<<K{{rMQg4X~5

literal 0
HcmV?d00001

diff --git a/rl_playground.py b/rl_playground.py
index 482615215..fa2022dcb 100644
--- a/rl_playground.py
+++ b/rl_playground.py
@@ -16,7 +16,7 @@ from qlib.tests.data import GetData
 from qlib.utils import init_instance_by_config, exists_qlib_data
 from torch.utils.data import Dataset, DataLoader
 from tianshou.data import Batch, Collector
-from tianshou.env import DummyVectorEnv
+from tianshou.env import DummyVectorEnv, SubprocVectorEnv
 from tianshou.policy import BasePolicy
 
 
@@ -51,7 +51,8 @@ def price_advantage(exec_price: float, baseline_price: float, direction: int) ->
 @dataclass
 class EpisodicState:
     """
-    A simplified data structure for RL-related components to process observations and rewards
+    A simplified data structure as the input of RL-related components to calculate observations and rewards.
+    Some of the metrics info are calculated on-the-fly in this class.
     """
     # requirements
     stock_id: int
@@ -181,6 +182,7 @@ class SingleOrderEnv(gym.Env):
         return self.observation.observation_space
 
     def retrieve_backtest_data(self, field: str):
+        # Retrieve backtest data for RL-specific use (including reward calculation)
         return D.features(
             [self.cur_order.stock_id],
             ['$open', '$close', '$high', '$low', '$volume'],
@@ -190,6 +192,7 @@ class SingleOrderEnv(gym.Env):
         )[field].to_numpy()
 
     def initialize_state(self):
+        # Synchronous state for executor to EpisodicState
         self.executor.reset(start_time=self.cur_order.start_time, end_time=self.cur_order.end_time)
         state = EpisodicState(
             stock_id=self.cur_order.stock_id,
@@ -207,6 +210,7 @@ class SingleOrderEnv(gym.Env):
         return state
 
     def update_state(self, exec_vol):
+        # Synchronous exec_vol to executor and synchronous back to EpisodicState
         calendar = self.executor.trade_calendar
         state = self.ep_state
 
@@ -273,6 +277,7 @@ class SingleOrderEnv(gym.Env):
                 'ins': self.ep_state.stock_id,
                 'date': self.ep_state.start_time,
             }
+            # TODO: collect logs
             pprint(info)
 
         return self.observation(self.ep_state), reward, self.ep_state.done, info
@@ -327,13 +332,18 @@ def _main():
         )
         return SingleOrderEnv(
             observation, action, reward_fn,
-            iter(DataLoader(QlibOrderDataset('rl.pkl'), batch_size=None, shuffle=True)), executor)
+            iter(DataLoader(QlibOrderDataset('rl_orders'), batch_size=None, shuffle=True)), executor)
 
     policy = DummyPolicy()
 
+    # This can not be replaced with SubprocVectorEnv
+    # File "/xxx/qlib/qlib/data/data.py", line 462, in dataset_processor
+    # p = Pool(processes=workers)
+    # AssertionError: daemonic processes are not allowed to have children
     envs = DummyVectorEnv([dummy_env for _ in range(4)])
     test_collector = Collector(policy, envs)
     policy.eval()
+    # TODO: create a queue for all orders and make it auto-complete when all the orders are processed
     test_collector.collect(n_episode=10)
 
 

From 231440561324e3592e7eb5ed82fafe8a2a9d55ce Mon Sep 17 00:00:00 2001
From: Yuge Zhang <Yuge.Zhang@microsoft.com>
Date: Wed, 2 Jun 2021 16:53:39 +0800
Subject: [PATCH 08/28] Rename files

---
 .../nested_decision_execution/assets/orders         | Bin
 .../nested_decision_execution/rl_dummy.py           |   2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename rl_orders => examples/nested_decision_execution/assets/orders (100%)
 rename rl_playground.py => examples/nested_decision_execution/rl_dummy.py (99%)

diff --git a/rl_orders b/examples/nested_decision_execution/assets/orders
similarity index 100%
rename from rl_orders
rename to examples/nested_decision_execution/assets/orders
diff --git a/rl_playground.py b/examples/nested_decision_execution/rl_dummy.py
similarity index 99%
rename from rl_playground.py
rename to examples/nested_decision_execution/rl_dummy.py
index fa2022dcb..1ea444cdf 100644
--- a/rl_playground.py
+++ b/examples/nested_decision_execution/rl_dummy.py
@@ -332,7 +332,7 @@ def _main():
         )
         return SingleOrderEnv(
             observation, action, reward_fn,
-            iter(DataLoader(QlibOrderDataset('rl_orders'), batch_size=None, shuffle=True)), executor)
+            iter(DataLoader(QlibOrderDataset('assets/orders'), batch_size=None, shuffle=True)), executor)
 
     policy = DummyPolicy()
 

From f5ac6230e13e80b1eee1a33ecb0e590b3e072758 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <Yuge.Zhang@microsoft.com>
Date: Wed, 2 Jun 2021 22:04:54 +0800
Subject: [PATCH 09/28] Refactor for strategy

---
 .../nested_decision_execution/rl_dummy.py     | 134 ++++++++++--------
 1 file changed, 71 insertions(+), 63 deletions(-)

diff --git a/examples/nested_decision_execution/rl_dummy.py b/examples/nested_decision_execution/rl_dummy.py
index 1ea444cdf..3eec91789 100644
--- a/examples/nested_decision_execution/rl_dummy.py
+++ b/examples/nested_decision_execution/rl_dummy.py
@@ -1,7 +1,7 @@
 import pickle
 from dataclasses import dataclass, asdict
 from pprint import pprint
-from typing import Iterable, Any, Optional, Tuple, Dict
+from typing import Iterable, Any, Optional, Tuple, Dict, List
 
 import gym
 import numpy as np
@@ -128,6 +128,48 @@ class EpisodicState:
         }
         return logs
 
+    @classmethod
+    def from_order_and_executor(cls, order: Order, executor: BaseExecutor, frequency: str) -> "EpisodicState":
+        # Synchronous state for executor to EpisodicState
+        executor.reset(start_time=order.start_time, end_time=order.end_time)
+        state = cls(
+            stock_id=order.stock_id,
+            start_time=order.start_time,
+            end_time=order.end_time,
+            direction=order.direction,
+            target=order.amount,
+            num_step=executor.trade_calendar.get_trade_len(),
+            market_price=_retrieve_backtest_data(order, '$close', frequency),
+            market_vol=_retrieve_backtest_data(order, '$volume', frequency),
+        )
+        state.cur_step = executor.trade_calendar.get_trade_step()
+        assert state.cur_step == 0
+        state.cur_time, _ = executor.trade_calendar.get_step_time(state.cur_step)
+        return state
+
+    def update(self, execute_result: List[Order], executor: BaseExecutor) -> "StepState":
+        exec_vol = np.array([order.deal_amount for order, _, __, ___ in execute_result])
+        # Synchronous exec_vol to executor and synchronous back to EpisodicState
+        calendar = executor.trade_calendar
+        cur_tick = self.cur_tick
+        ticks_this_step = len(exec_vol)
+        self.cur_step = trade_step = calendar.get_trade_step()
+        self.cur_tick += ticks_this_step
+        self.position -= np.sum(exec_vol)
+        self.position_history[trade_step] = self.position
+        self.done = executor.finished()
+        self.exec_vol = exec_vol if self.exec_vol is None else \
+            np.concatenate((self.exec_vol, exec_vol))
+
+        if self.done:
+            self.update_stats()
+        else:
+            self.cur_time, _ = calendar.get_step_time(trade_step)
+
+        l, r = cur_tick, cur_tick + ticks_this_step
+        assert 0 <= l < r
+        return StepState(exec_vol, self.market_vol[l:r], self.market_price[l:r], self)
+
 
 @dataclass
 class StepState:
@@ -158,6 +200,28 @@ class StepState:
                                        self.episode_state.direction)
 
 
+def _retrieve_backtest_data(order: Order, field: str, frequency: str) -> np.ndarray:
+    # Retrieve backtest data for RL-specific use (including reward calculation)
+    return D.features(
+        [order.stock_id],
+        ['$open', '$close', '$high', '$low', '$volume'],
+        start_time=order.start_time,
+        end_time=order.end_time,
+        freq=frequency
+    )[field].to_numpy()
+
+
+def create_sub_order(exec_vol: float, executor: BaseExecutor, original_order: Order) -> Order:
+    # Convert a real number to an order
+    calendar = executor.trade_calendar
+    trade_step = calendar.get_trade_step()
+    trade_start_time, trade_end_time = calendar.get_step_time(trade_step)
+    order_kwargs = asdict(original_order)
+    order_kwargs.update(start_time=trade_start_time, end_time=trade_end_time, amount=exec_vol)
+    trade_decision = Order(**order_kwargs)
+    return trade_decision
+
+
 class SingleOrderEnv(gym.Env):
     def __init__(self,
                  observation: StateInterpreter,
@@ -181,66 +245,6 @@ class SingleOrderEnv(gym.Env):
     def observation_space(self):
         return self.observation.observation_space
 
-    def retrieve_backtest_data(self, field: str):
-        # Retrieve backtest data for RL-specific use (including reward calculation)
-        return D.features(
-            [self.cur_order.stock_id],
-            ['$open', '$close', '$high', '$low', '$volume'],
-            start_time=self.cur_order.start_time,
-            end_time=self.cur_order.end_time,
-            freq=self.inner_frequency
-        )[field].to_numpy()
-
-    def initialize_state(self):
-        # Synchronous state for executor to EpisodicState
-        self.executor.reset(start_time=self.cur_order.start_time, end_time=self.cur_order.end_time)
-        state = EpisodicState(
-            stock_id=self.cur_order.stock_id,
-            start_time=self.cur_order.start_time,
-            end_time=self.cur_order.end_time,
-            direction=self.cur_order.direction,
-            target=self.cur_order.amount,
-            num_step=self.executor.trade_calendar.get_trade_len(),
-            market_price=self.retrieve_backtest_data('$close'),
-            market_vol=self.retrieve_backtest_data('$volume'),
-        )
-        state.cur_step = self.executor.trade_calendar.get_trade_step()
-        assert state.cur_step == 0
-        state.cur_time, _ = self.executor.trade_calendar.get_step_time(state.cur_step)
-        return state
-
-    def update_state(self, exec_vol):
-        # Synchronous exec_vol to executor and synchronous back to EpisodicState
-        calendar = self.executor.trade_calendar
-        state = self.ep_state
-
-        trade_step = calendar.get_trade_step()
-        trade_start_time, trade_end_time = calendar.get_step_time(trade_step)
-        order_kwargs = asdict(self.cur_order)
-        order_kwargs.update(start_time=trade_start_time, end_time=trade_end_time, amount=exec_vol)
-        trade_decision = Order(**order_kwargs)
-        execute_result = self.executor.execute([trade_decision])
-        cur_tick = state.cur_tick
-
-        inner_exec_vol = np.array([order.deal_amount for order, _, __, ___ in execute_result])
-        ticks_this_step = len(inner_exec_vol)
-        state.cur_step = trade_step = calendar.get_trade_step()
-        state.cur_tick += ticks_this_step
-        state.position -= np.sum(inner_exec_vol)
-        state.position_history[trade_step] = state.position
-        state.done = self.executor.finished()
-        state.exec_vol = inner_exec_vol if state.exec_vol is None else \
-            np.concatenate((state.exec_vol, inner_exec_vol))
-
-        if state.done:
-            state.update_stats()
-        else:
-            state.cur_time, _ = calendar.get_step_time(trade_step)
-
-        l, r = cur_tick, cur_tick + ticks_this_step
-        assert 0 <= l < r
-        return StepState(inner_exec_vol, state.market_vol[l:r], state.market_price[l:r], state)
-
     def reset(self):
         try:
             self.cur_order = next(self.dataloader)
@@ -249,7 +253,9 @@ class SingleOrderEnv(gym.Env):
             return None
 
         self.execute_result = []
-        self.ep_state = self.initialize_state()
+        self.ep_state = EpisodicState.from_order_and_executor(
+            self.cur_order, self.executor, self.inner_frequency
+        )
 
         self.action_history = np.full(self.ep_state.num_step, np.nan)
         return self.observation(self.ep_state)
@@ -260,7 +266,9 @@ class SingleOrderEnv(gym.Env):
         self.action_history[self.ep_state.cur_step] = action
 
         exec_vol = self.action(action, self.ep_state)
-        step_state = self.update_state(exec_vol)
+        trade_decision = create_sub_order(exec_vol, self.executor, self.cur_order)
+        execute_result = self.executor.execute([trade_decision])
+        step_state = self.ep_state.update(execute_result, self.executor)
         if self.executor.finished():
             assert self.ep_state.done
 

From bf02fc23f8a63e901ba969b546bc45366f6038d7 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <Yuge.Zhang@microsoft.com>
Date: Wed, 2 Jun 2021 23:20:27 +0800
Subject: [PATCH 10/28] Add RL strategy demo

---
 .../nested_decision_execution/rl_dummy.py     | 78 +++++++++++++++----
 qlib/backtest/__init__.py                     |  1 +
 2 files changed, 62 insertions(+), 17 deletions(-)

diff --git a/examples/nested_decision_execution/rl_dummy.py b/examples/nested_decision_execution/rl_dummy.py
index 3eec91789..61f1bba59 100644
--- a/examples/nested_decision_execution/rl_dummy.py
+++ b/examples/nested_decision_execution/rl_dummy.py
@@ -1,17 +1,19 @@
 import pickle
+from collections import OrderedDict, defaultdict
 from dataclasses import dataclass, asdict
 from pprint import pprint
-from typing import Iterable, Any, Optional, Tuple, Dict, List
+from typing import Iterable, Any, Optional, OrderedDict, Tuple, Dict, List
 
 import gym
 import numpy as np
 import pandas as pd
 import qlib
 from gym import spaces
-from qlib.backtest import get_exchange, Account, BaseExecutor, CommonInfrastructure, Order
+from qlib.backtest import get_exchange, Account, BaseExecutor, CommonInfrastructure, Order, TradeCalendarManager
 from qlib.config import REG_CN
 from qlib.data import D
 from qlib.rl.interpreter import StateInterpreter, ActionInterpreter
+from qlib.strategy import BaseStrategy
 from qlib.tests.data import GetData
 from qlib.utils import init_instance_by_config, exists_qlib_data
 from torch.utils.data import Dataset, DataLoader
@@ -129,35 +131,36 @@ class EpisodicState:
         return logs
 
     @classmethod
-    def from_order_and_executor(cls, order: Order, executor: BaseExecutor, frequency: str) -> "EpisodicState":
+    def from_order_and_executor(cls, order: Order, calendar: TradeCalendarManager, frequency: str) -> "EpisodicState":
         # Synchronous state for executor to EpisodicState
-        executor.reset(start_time=order.start_time, end_time=order.end_time)
         state = cls(
             stock_id=order.stock_id,
             start_time=order.start_time,
             end_time=order.end_time,
             direction=order.direction,
             target=order.amount,
-            num_step=executor.trade_calendar.get_trade_len(),
+            num_step=calendar.get_trade_len(),
             market_price=_retrieve_backtest_data(order, '$close', frequency),
             market_vol=_retrieve_backtest_data(order, '$volume', frequency),
         )
-        state.cur_step = executor.trade_calendar.get_trade_step()
+        state.cur_step = calendar.get_trade_step()
         assert state.cur_step == 0
-        state.cur_time, _ = executor.trade_calendar.get_step_time(state.cur_step)
+        state.cur_time, _ = calendar.get_step_time(state.cur_step)
         return state
 
-    def update(self, execute_result: List[Order], executor: BaseExecutor) -> "StepState":
+    def update(self, execute_result: List[Order], calendar: TradeCalendarManager, done: Optional[bool] = None) -> "StepState":
         exec_vol = np.array([order.deal_amount for order, _, __, ___ in execute_result])
         # Synchronous exec_vol to executor and synchronous back to EpisodicState
-        calendar = executor.trade_calendar
         cur_tick = self.cur_tick
         ticks_this_step = len(exec_vol)
         self.cur_step = trade_step = calendar.get_trade_step()
         self.cur_tick += ticks_this_step
         self.position -= np.sum(exec_vol)
         self.position_history[trade_step] = self.position
-        self.done = executor.finished()
+        if done is not None:
+            self.done = done
+        else:
+            self.done = self.position < 1e-5
         self.exec_vol = exec_vol if self.exec_vol is None else \
             np.concatenate((self.exec_vol, exec_vol))
 
@@ -211,9 +214,8 @@ def _retrieve_backtest_data(order: Order, field: str, frequency: str) -> np.ndar
     )[field].to_numpy()
 
 
-def create_sub_order(exec_vol: float, executor: BaseExecutor, original_order: Order) -> Order:
+def create_sub_order(exec_vol: float, calendar: TradeCalendarManager, original_order: Order) -> Order:
     # Convert a real number to an order
-    calendar = executor.trade_calendar
     trade_step = calendar.get_trade_step()
     trade_start_time, trade_end_time = calendar.get_step_time(trade_step)
     order_kwargs = asdict(original_order)
@@ -253,8 +255,9 @@ class SingleOrderEnv(gym.Env):
             return None
 
         self.execute_result = []
+        self.executor.reset(start_time=self.cur_order.start_time, end_time=self.cur_order.end_time)
         self.ep_state = EpisodicState.from_order_and_executor(
-            self.cur_order, self.executor, self.inner_frequency
+            self.cur_order, self.executor.trade_calendar, self.inner_frequency
         )
 
         self.action_history = np.full(self.ep_state.num_step, np.nan)
@@ -266,9 +269,9 @@ class SingleOrderEnv(gym.Env):
         self.action_history[self.ep_state.cur_step] = action
 
         exec_vol = self.action(action, self.ep_state)
-        trade_decision = create_sub_order(exec_vol, self.executor, self.cur_order)
+        trade_decision = create_sub_order(exec_vol, self.executor.trade_calendar, self.cur_order)
         execute_result = self.executor.execute([trade_decision])
-        step_state = self.ep_state.update(execute_result, self.executor)
+        step_state = self.ep_state.update(execute_result, self.executor.trade_calendar)
         if self.executor.finished():
             assert self.ep_state.done
 
@@ -291,6 +294,47 @@ class SingleOrderEnv(gym.Env):
         return self.observation(self.ep_state), reward, self.ep_state.done, info
 
 
+class RLStrategy(BaseStrategy):
+    """When inference and do the backtest from end to end, use this strategy."""
+    # TODO This strategy is still for code demo purpose only.
+    # It has not been end-to-end tested.
+
+    def __init__(
+        self,
+        observation: "Observation",
+        action: "Action",
+        policy: BasePolicy,
+        **kwargs
+    ):
+        super().__init__(**kwargs)
+        self.observation = observation
+        self.action = action
+        self.policy = policy
+
+    def reset(self, outer_trade_decision: List[Order] = None, **kwargs):
+        super().reset(outer_trade_decision=outer_trade_decision, **kwargs)
+        if outer_trade_decision is not None:
+            self.states = OrderedDict()  # explicitly make it ordered
+            for order in outer_trade_decision:
+                # TODO: how to get inner frequency
+                state = EpisodicState.from_order_and_executor(order, self.trade_calendar, "day")
+                self.states[order.stock_id, order.direction] = state
+
+    def generate_trade_decision(self, execute_result=None):
+        # apply results from the last step
+        if execute_result is not None:
+            orders = defaultdict(list)
+            for order, _, __, in execute_result:
+                orders[order.stock_id, order.direction].append(order)
+            for (stock_id, direction), state in self.states.items():
+                state.update(orders[stock_id, direction])
+    
+        obs_batch = Batch([{"obs": self.observation(state)} for state in self.states.values()])
+        act = self.policy(obs_batch)
+        exec_vols = [self.action(a) for a in act.act]
+        return [create_sub_order(v, self.trade_calendar, order) for v in exec_vols]
+
+
 def _init_qlib():
     provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
     if not exists_qlib_data(provider_uri):
@@ -299,7 +343,7 @@ def _init_qlib():
     qlib.init(provider_uri=provider_uri, region=REG_CN)
 
 
-def _main():
+def _main_tianshou():
     _init_qlib()
 
     # TODO: why is there a benchmark?
@@ -483,4 +527,4 @@ def _to_float32(val): return np.array(val, dtype=np.float32)
 
 
 if __name__ == '__main__':
-    _main()
+    _main_tianshou()
diff --git a/qlib/backtest/__init__.py b/qlib/backtest/__init__.py
index f80f7ebeb..c053269ef 100644
--- a/qlib/backtest/__init__.py
+++ b/qlib/backtest/__init__.py
@@ -7,6 +7,7 @@ from .executor import BaseExecutor
 from .backtest import backtest as backtest_func
 from .backtest import collect_data as data_generator
 from .order import Order
+from .utils import TradeCalendarManager
 
 from .utils import CommonInfrastructure
 from .order import Order

From c43805eff60475eddc5f3f17ce39936cc81de335 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <Yuge.Zhang@microsoft.com>
Date: Fri, 4 Jun 2021 12:20:27 +0800
Subject: [PATCH 11/28] Update end-to-end example and requirements

---
 .../requirements.txt                          |   2 +
 .../nested_decision_execution/rl_dummy.py     | 175 +++++++++++-------
 2 files changed, 113 insertions(+), 64 deletions(-)
 create mode 100644 examples/nested_decision_execution/requirements.txt

diff --git a/examples/nested_decision_execution/requirements.txt b/examples/nested_decision_execution/requirements.txt
new file mode 100644
index 000000000..2ad0a826f
--- /dev/null
+++ b/examples/nested_decision_execution/requirements.txt
@@ -0,0 +1,2 @@
+tianshou>=0.4.1
+torch>=1.8.0
diff --git a/examples/nested_decision_execution/rl_dummy.py b/examples/nested_decision_execution/rl_dummy.py
index 61f1bba59..4a8f50ad0 100644
--- a/examples/nested_decision_execution/rl_dummy.py
+++ b/examples/nested_decision_execution/rl_dummy.py
@@ -4,12 +4,14 @@ from dataclasses import dataclass, asdict
 from pprint import pprint
 from typing import Iterable, Any, Optional, OrderedDict, Tuple, Dict, List
 
+import fire
 import gym
 import numpy as np
 import pandas as pd
 import qlib
 from gym import spaces
-from qlib.backtest import get_exchange, Account, BaseExecutor, CommonInfrastructure, Order, TradeCalendarManager
+from qlib.backtest import get_exchange, Account, BaseExecutor, CommonInfrastructure, Order, TradeCalendarManager, backtest_func
+from qlib.backtest.executor import NestedExecutor, SimulatorExecutor
 from qlib.config import REG_CN
 from qlib.data import D
 from qlib.rl.interpreter import StateInterpreter, ActionInterpreter
@@ -21,6 +23,8 @@ from tianshou.data import Batch, Collector
 from tianshou.env import DummyVectorEnv, SubprocVectorEnv
 from tianshou.policy import BasePolicy
 
+from workflow import NestedDecisonExecutionWorkflow
+
 
 MAX_STEPS = 10
 
@@ -324,79 +328,122 @@ class RLStrategy(BaseStrategy):
         # apply results from the last step
         if execute_result is not None:
             orders = defaultdict(list)
-            for order, _, __, in execute_result:
-                orders[order.stock_id, order.direction].append(order)
+            for e in execute_result:
+                orders[e[0].stock_id, e[0].direction].append(e)
             for (stock_id, direction), state in self.states.items():
-                state.update(orders[stock_id, direction])
-    
+                state.update(orders[stock_id, direction], self.trade_calendar)
+
+        if not self.states:
+            return []
+
         obs_batch = Batch([{"obs": self.observation(state)} for state in self.states.values()])
         act = self.policy(obs_batch)
-        exec_vols = [self.action(a) for a in act.act]
-        return [create_sub_order(v, self.trade_calendar, order) for v in exec_vols]
+        exec_vols = [self.action(a, s) for a, s in zip(act.act, self.states.values())]
+        return [create_sub_order(v, self.trade_calendar, o) for v, o in zip(exec_vols, self.outer_trade_decision)]
 
 
-def _init_qlib():
-    provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
-    if not exists_qlib_data(provider_uri):
-        print(f"Qlib data is not found in {provider_uri}")
-        GetData().qlib_data(target_dir=provider_uri, region=REG_CN)
-    qlib.init(provider_uri=provider_uri, region=REG_CN)
+class RlWorkflow(NestedDecisonExecutionWorkflow):
 
+    def tianshou(self):
+        self._init_qlib()
 
-def _main_tianshou():
-    _init_qlib()
-
-    # TODO: why is there a benchmark?
-    trade_start_time = "2017-01-01"
-    trade_end_time = "2020-08-01"
-    benchmark = "SH000300"
-    time_per_step = "day"
-    executor_config = {
-        "class": "SimulatorExecutor",
-        "module_path": "qlib.backtest.executor",
-        "kwargs": {
-            "time_per_step": time_per_step,
-            "verbose": True,
-            "generate_report": False,
+        # TODO: why is there a benchmark?
+        trade_start_time = "2017-01-01"
+        trade_end_time = "2020-08-01"
+        benchmark = "SH000300"
+        time_per_step = "day"
+        executor_config = {
+            "class": "SimulatorExecutor",
+            "module_path": "qlib.backtest.executor",
+            "kwargs": {
+                "time_per_step": time_per_step,
+                "verbose": True,
+                "generate_report": False,
+            }
         }
-    }
-    exchange = get_exchange(
-        freq="day",
-        limit_threshold=0.095,
-        deal_price="close",
-        open_cost=0.0005,
-        close_cost=0.0015,
-        min_cost=5
-    )
-
-    observation = Observation(time_per_step)
-    action = Action()
-    reward_fn = Reward()
-
-    def dummy_env():
-        executor = get_executor(
-            trade_start_time,
-            trade_end_time,
-            executor_config,
-            exchange,
-            benchmark,
-            1000000000,
+        exchange = get_exchange(
+            freq="day",
+            limit_threshold=0.095,
+            deal_price="close",
+            open_cost=0.0005,
+            close_cost=0.0015,
+            min_cost=5
         )
-        return SingleOrderEnv(
-            observation, action, reward_fn,
-            iter(DataLoader(QlibOrderDataset('assets/orders'), batch_size=None, shuffle=True)), executor)
 
-    policy = DummyPolicy()
+        observation = Observation(time_per_step)
+        action = Action()
+        reward_fn = Reward()
 
-    # This can not be replaced with SubprocVectorEnv
-    # File "/xxx/qlib/qlib/data/data.py", line 462, in dataset_processor
-    # p = Pool(processes=workers)
-    # AssertionError: daemonic processes are not allowed to have children
-    envs = DummyVectorEnv([dummy_env for _ in range(4)])
-    test_collector = Collector(policy, envs)
-    policy.eval()
-    # TODO: create a queue for all orders and make it auto-complete when all the orders are processed
-    test_collector.collect(n_episode=10)
+        def dummy_env():
+            executor = get_executor(
+                trade_start_time,
+                trade_end_time,
+                executor_config,
+                exchange,
+                benchmark,
+                1000000000,
+            )
+            return SingleOrderEnv(
+                observation, action, reward_fn,
+                iter(DataLoader(QlibOrderDataset('assets/orders'), batch_size=None, shuffle=True)), executor)
+
+        policy = DummyPolicy()
+
+        # This can not be replaced with SubprocVectorEnv
+        # File "/xxx/qlib/qlib/data/data.py", line 462, in dataset_processor
+        # p = Pool(processes=workers)
+        # AssertionError: daemonic processes are not allowed to have children
+        envs = DummyVectorEnv([dummy_env for _ in range(4)])
+        test_collector = Collector(policy, envs)
+        policy.eval()
+        # TODO: create a queue for all orders and make it auto-complete when all the orders are processed
+        test_collector.collect(n_episode=10)
+
+    def rl_day(self, load_model: Optional[str] = None):
+        self._init_qlib()
+        model = init_instance_by_config(self.task["model"])
+        dataset = init_instance_by_config(self.task["dataset"])
+        if load_model is None:
+            self._train_model(model, dataset)
+        else:
+            model = self._load_model(load_model)
+        trade_start_time = "2017-01-01"
+        trade_end_time = "2020-08-01"
+        trade_account = Account(
+            init_cash=int(1e9),
+            benchmark_config={
+                "benchmark": "SH000300",
+                "start_time": trade_start_time,
+                "end_time": trade_end_time,
+            },
+        )
+        exchange = get_exchange(
+            freq="day",
+            limit_threshold=0.095,
+            deal_price="close",
+            open_cost=0.0005,
+            close_cost=0.0015,
+            min_cost=5
+        )
+        common_infra = CommonInfrastructure(trade_account=trade_account, trade_exchange=exchange)
+        strategy = init_instance_by_config({
+            "class": "TopkDropoutStrategy",
+            "module_path": "qlib.contrib.strategy.model_strategy",
+            "kwargs": {
+                "model": model,
+                "dataset": dataset,
+                "topk": 50,
+                "n_drop": 5,
+            },
+        }, common_infra=common_infra)
+        executor = NestedExecutor(
+            time_per_step="week",
+            inner_executor=SimulatorExecutor(time_per_step="day", verbose=True),
+            inner_strategy=RLStrategy(Observation("day"), Action(), DummyPolicy()),
+            common_infra=common_infra
+        )
+        report_dict = backtest_func(trade_start_time, trade_end_time, strategy, executor)
+        print(report_dict)
 
 
 ### This is a full RL strategy ###
@@ -527,4 +574,4 @@ def _to_float32(val): return np.array(val, dtype=np.float32)
 
 
 if __name__ == '__main__':
-    _main_tianshou()
+    fire.Fire(RlWorkflow)

From 1581ef12accdb32f41a5272c189105184992abd6 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <Yuge.Zhang@microsoft.com>
Date: Fri, 4 Jun 2021 13:01:49 +0800
Subject: [PATCH 12/28] Update impl for robustness

---
 .../nested_decision_execution/rl_dummy.py     | 34 ++++++++++++-------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/examples/nested_decision_execution/rl_dummy.py b/examples/nested_decision_execution/rl_dummy.py
index 4a8f50ad0..cd0961f66 100644
--- a/examples/nested_decision_execution/rl_dummy.py
+++ b/examples/nested_decision_execution/rl_dummy.py
@@ -152,8 +152,13 @@ class EpisodicState:
         state.cur_time, _ = calendar.get_step_time(state.cur_step)
         return state
 
-    def update(self, execute_result: List[Order], calendar: TradeCalendarManager, done: Optional[bool] = None) -> "StepState":
-        exec_vol = np.array([order.deal_amount for order, _, __, ___ in execute_result])
+    def update(self, execute_result: List[Order], calendar: TradeCalendarManager,
+               done: Optional[bool] = None, length: Optional[int] = None) -> "StepState":
+        if length is not None:
+            exec_vol = np.zeros(length)
+            exec_vol[:len(execute_result)] = np.array([order.deal_amount for order, _, __, ___ in execute_result])
+        else:
+            exec_vol = np.array([order.deal_amount for order, _, __, ___ in execute_result])
         # Synchronous exec_vol to executor and synchronous back to EpisodicState
         cur_tick = self.cur_tick
         ticks_this_step = len(exec_vol)
@@ -300,8 +305,6 @@ class SingleOrderEnv(gym.Env):
 
 class RLStrategy(BaseStrategy):
     """When inference and do the backtest from end to end, use this strategy."""
-    # TODO This strategy is still for code demo purpose only.
-    # It has not been end-to-end tested.
 
     def __init__(
         self,
@@ -315,12 +318,15 @@ class RLStrategy(BaseStrategy):
         self.action = action
         self.policy = policy
 
+        # TODO: how to get inner frequency and trade len
+        self.inner_frequency = "day"
+        self.inner_trade_len = 1
+
     def reset(self, outer_trade_decision: List[Order] = None, **kwargs):
         super().reset(outer_trade_decision=outer_trade_decision, **kwargs)
         if outer_trade_decision is not None:
             self.states = OrderedDict()  # explicitly make it ordered
             for order in outer_trade_decision:
-                # TODO: how to get inner frequency
                 state = EpisodicState.from_order_and_executor(order, self.trade_calendar, "day")
                 self.states[order.stock_id, order.direction] = state
 
@@ -331,7 +337,7 @@ class RLStrategy(BaseStrategy):
             for e in execute_result:
                 orders[e[0].stock_id, e[0].direction].append(e)
             for (stock_id, direction), state in self.states.items():
-                state.update(orders[stock_id, direction], self.trade_calendar)
+                state.update(orders[stock_id, direction], self.trade_calendar, length=self.inner_trade_len)
 
         if not self.states:
             return []
@@ -495,19 +501,21 @@ class Observation:
         return spaces.Dict(space)
 
     def observe(self, ep_state: EpisodicState) -> Any:
+        features = D.features(
+            [ep_state.stock_id],
+            ['$open', '$close', '$high', '$low', '$volume'],
+            start_time=ep_state.start_time,
+            end_time=ep_state.end_time,
+            freq=self.time_per_step
+        ).loc[(ep_state.stock_id, ep_state.cur_time)].to_numpy()
+        features = np.nan_to_num(features)
         return {
             'direction': _to_int32(ep_state.direction),
             'cur_step': _to_int32(min(ep_state.cur_step, ep_state.num_step - 1)),
             'num_step': _to_int32(ep_state.num_step),
             'target': _to_float32(ep_state.target),
             'position': _to_float32(ep_state.position),
-            'features': D.features(
-                [ep_state.stock_id],
-                ['$open', '$close', '$high', '$low', '$volume'],
-                start_time=ep_state.start_time,
-                end_time=ep_state.end_time,
-                freq=self.time_per_step
-            ).loc[(ep_state.stock_id, ep_state.cur_time)].to_numpy(),
+            'features': features,
         }
 
 

From 76be5d50e50904d1eb712ca91c57d76dcf3d9b1d Mon Sep 17 00:00:00 2001
From: Yuge Zhang <Yuge.Zhang@microsoft.com>
Date: Mon, 7 Jun 2021 10:56:12 +0800
Subject: [PATCH 13/28] Refine example

---
 examples/nested_decision_execution/rl_dummy.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/examples/nested_decision_execution/rl_dummy.py b/examples/nested_decision_execution/rl_dummy.py
index cd0961f66..c42e28be4 100644
--- a/examples/nested_decision_execution/rl_dummy.py
+++ b/examples/nested_decision_execution/rl_dummy.py
@@ -319,6 +319,7 @@ class RLStrategy(BaseStrategy):
         self.policy = policy
 
         # TODO: how to get inner frequency and trade len
+        # This should be no longer required when PA is provided by qlib.
         self.inner_frequency = "day"
         self.inner_trade_len = 1
 
@@ -432,6 +433,12 @@ class RlWorkflow(NestedDecisonExecutionWorkflow):
             min_cost=5
         )
         common_infra = CommonInfrastructure(trade_account=trade_account, trade_exchange=exchange)
+        executor = NestedExecutor(
+            time_per_step="week",
+            inner_executor=SimulatorExecutor(time_per_step="day", verbose=True),
+            inner_strategy=RLStrategy(Observation("day"), Action(), DummyPolicy()),
+            common_infra=common_infra
+        )
         strategy = init_instance_by_config({
             "class": "TopkDropoutStrategy",
             "module_path": "qlib.contrib.strategy.model_strategy",
@@ -442,12 +449,6 @@ class RlWorkflow(NestedDecisonExecutionWorkflow):
                 "n_drop": 5,
             },
         }, common_infra=common_infra)
-        executor = NestedExecutor(
-            time_per_step="week",
-            inner_executor=SimulatorExecutor(time_per_step="day", verbose=True),
-            inner_strategy=RLStrategy(Observation("day"), Action(), DummyPolicy()),
-            common_infra=common_infra
-        )
         report_dict = backtest_func(trade_start_time, trade_end_time, strategy, executor)
         print(report_dict)
 
@@ -463,7 +464,7 @@ class QlibOrderDataset(Dataset):
     def __len__(self):
         return len(self.orders)
 
-    def __getitem__(self, index):
+    def __getitem__(self, index) -> Order:
         return self.orders[index]
 
 
@@ -535,7 +536,7 @@ class Action:
     def validate(self, action: Any) -> bool:
         return self.action_space.contains(action)
 
-    def to_volume(self, action: Any, ep_state: EpisodicState):
+    def to_volume(self, action: Any, ep_state: EpisodicState) -> Any:
         exec_vol = ep_state.position / self.denominator * action
         if ep_state.cur_step + 1 >= ep_state.num_step:
             exec_vol = ep_state.position

From 7525854beda2c0c0303b265c97b52c994561221c Mon Sep 17 00:00:00 2001
From: v-mingzhehan <v-mingzhehan@microsoft.com>
Date: Tue, 22 Jun 2021 03:47:39 +0000
Subject: [PATCH 14/28] Add shortcut in init

---
 qlib/backtest/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qlib/backtest/__init__.py b/qlib/backtest/__init__.py
index 91eedd736..edfc907cd 100644
--- a/qlib/backtest/__init__.py
+++ b/qlib/backtest/__init__.py
@@ -8,7 +8,7 @@ from .backtest import backtest_loop
 from .backtest import collect_data_loop
 
 from .order import Order
-from .utils import CommonInfrastructure
+from .utils import CommonInfrastructure, TradeCalendarManager
 from ..strategy.base import BaseStrategy
 from ..utils import init_instance_by_config
 from ..log import get_module_logger

From 583fbbef3ce714bdc4b3130b74620f79873119bb Mon Sep 17 00:00:00 2001
From: v-mingzhehan <v-mingzhehan@microsoft.com>
Date: Tue, 22 Jun 2021 07:07:19 +0000
Subject: [PATCH 15/28] Resolve init conflict

---
 qlib/backtest/__init__.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/qlib/backtest/__init__.py b/qlib/backtest/__init__.py
index 107f97782..ae07cdbdf 100644
--- a/qlib/backtest/__init__.py
+++ b/qlib/backtest/__init__.py
@@ -7,15 +7,9 @@ from .exchange import Exchange
 from .executor import BaseExecutor
 from .backtest import backtest_loop
 from .backtest import collect_data_loop
-<<<<<<< HEAD
 
 from .order import Order
 from .utils import CommonInfrastructure, TradeCalendarManager
-=======
-from .utils import CommonInfrastructure
-from .order import Order
-
->>>>>>> ab97e8248443789ce1e0f90a9b5596e5fee60566
 from ..strategy.base import BaseStrategy
 from ..utils import init_instance_by_config
 from ..log import get_module_logger

From 2b4a493617d759d28f49768310c43c99daa169f9 Mon Sep 17 00:00:00 2001
From: v-mingzhehan <v-mingzhehan@microsoft.com>
Date: Thu, 1 Jul 2021 09:41:08 +0000
Subject: [PATCH 16/28] Order patch

---
 qlib/backtest/order.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/qlib/backtest/order.py b/qlib/backtest/order.py
index b013d8723..32c4121fc 100644
--- a/qlib/backtest/order.py
+++ b/qlib/backtest/order.py
@@ -12,6 +12,7 @@ if TYPE_CHECKING:
     from qlib.backtest.exchange import Exchange
 from qlib.backtest.utils import TradeCalendarManager
 import warnings
+import numpy as np
 import pandas as pd
 from dataclasses import dataclass, field
 from typing import ClassVar, Optional, Union, List, Set, Tuple
@@ -47,7 +48,7 @@ class Order:
 
     direction: int
     factor: float
-    deal_amount: float = field(init=False)
+    deal_amount: Optional[float] = None
 
     # FIXME:
     # for compatible now.
@@ -62,11 +63,11 @@ class Order:
         self.deal_amount = 0
 
     @staticmethod
-    def parse_dir(direction: Union[str, int, OrderDir]) -> OrderDir:
+    def parse_dir(direction: Union[str, int, np.integer, OrderDir]) -> OrderDir:
         if isinstance(direction, OrderDir):
             return direction
-        elif isinstance(direction, int):
-            return OrderDir(direction)
+        elif isinstance(direction, (int, float, np.integer, np.floating)):
+            return OrderDir(int(direction))
         elif isinstance(direction, str):
             dl = direction.lower()
             if dl.strip() == "sell":

From 7048bef7c69e3a3e56bbf8ffb34b85eac490c192 Mon Sep 17 00:00:00 2001
From: Young <afe.young@gmail.com>
Date: Sun, 4 Jul 2021 06:41:34 +0000
Subject: [PATCH 17/28] fix ffr and order amount

---
 qlib/backtest/account.py  |  7 +++++--
 qlib/backtest/executor.py |  2 ++
 qlib/backtest/order.py    | 31 +++++++++++++++++++++++++++++--
 qlib/backtest/report.py   | 29 +++++++++++++++++++++--------
 4 files changed, 57 insertions(+), 12 deletions(-)

diff --git a/qlib/backtest/account.py b/qlib/backtest/account.py
index 6167ee407..0d89dde87 100644
--- a/qlib/backtest/account.py
+++ b/qlib/backtest/account.py
@@ -9,7 +9,7 @@ import pandas as pd
 
 from .position import BasePosition, InfPosition, Position
 from .report import Report, Indicator
-from .order import Order
+from .order import BaseTradeDecision, Order
 from .exchange import Exchange
 
 """
@@ -226,6 +226,7 @@ class Account:
         trade_end_time: pd.Timestamp,
         trade_exchange: Exchange,
         atomic: bool,
+        outer_trade_decision: BaseTradeDecision,
         generate_report: bool = False,
         trade_info: list = None,
         inner_order_indicators: Indicator = None,
@@ -276,7 +277,9 @@ class Account:
         if atomic:
             self.indicator.update_order_indicators(trade_start_time, trade_end_time, trade_info, trade_exchange)
         else:
-            self.indicator.agg_order_indicators(inner_order_indicators, indicator_config)
+            self.indicator.agg_order_indicators(
+                inner_order_indicators, indicator_config=indicator_config, outer_trade_decision=outer_trade_decision
+            )
 
         self.indicator.cal_trade_indicators(trade_start_time, self.freq, indicator_config)
         self.indicator.record(trade_start_time)
diff --git a/qlib/backtest/executor.py b/qlib/backtest/executor.py
index 3f7b2f4ed..7341e5225 100644
--- a/qlib/backtest/executor.py
+++ b/qlib/backtest/executor.py
@@ -299,6 +299,7 @@ class NestedExecutor(BaseExecutor):
                 trade_end_time,
                 self.trade_exchange,
                 atomic=False,
+                outer_trade_decision=trade_decision,
                 generate_report=self.generate_report,
                 inner_order_indicators=inner_order_indicators,
                 indicator_config=self.indicator_config,
@@ -409,6 +410,7 @@ class SimulatorExecutor(BaseExecutor):
             trade_end_time,
             self.trade_exchange,
             atomic=True,
+            outer_trade_decision=trade_decision,
             generate_report=self.generate_report,
             trade_info=execute_result,
             indicator_config=self.indicator_config,
diff --git a/qlib/backtest/order.py b/qlib/backtest/order.py
index 32c4121fc..64ff2a56f 100644
--- a/qlib/backtest/order.py
+++ b/qlib/backtest/order.py
@@ -40,7 +40,7 @@ class Order:
     """
 
     stock_id: str
-    amount: float
+    amount: float  # `amount` is a non-negative value
 
     # The interval of the order which belongs to (NOTE: this is not the expected order dealing range time)
     start_time: pd.Timestamp
@@ -48,7 +48,7 @@ class Order:
 
     direction: int
     factor: float
-    deal_amount: Optional[float] = None
+    deal_amount: Optional[float] = None  # `deal_amount` is a non-negative value
 
     # FIXME:
     # for compatible now.
@@ -62,6 +62,33 @@ class Order:
             raise NotImplementedError("direction not supported, `Order.SELL` for sell, `Order.BUY` for buy")
         self.deal_amount = 0
 
+    @property
+    def amount_delta(self) -> float:
+        """
+        return the delta of amount.
+        - Positive value indicates buying `amount` of share
+        - Negative value indicates selling `amount` of share
+        """
+        return self.amount * self.sign
+
+    @property
+    def deal_amount_delta(self) -> float:
+        """
+        return the delta of deal_amount.
+        - Positive value indicates buying `deal_amount` of share
+        - Negative value indicates selling `deal_amount` of share
+        """
+        return self.deal_amount * self.sign
+
+    @property
+    def sign(self) -> float:
+        """
+        return the sign of trading
+        - `+1` indicates buying
+        - `-1` value indicates selling
+        """
+        return self.direction * 2 - 1
+
     @staticmethod
     def parse_dir(direction: Union[str, int, np.integer, OrderDir]) -> OrderDir:
         if isinstance(direction, OrderDir):
diff --git a/qlib/backtest/report.py b/qlib/backtest/report.py
index f217ea169..4f645c564 100644
--- a/qlib/backtest/report.py
+++ b/qlib/backtest/report.py
@@ -4,6 +4,8 @@
 
 from collections import OrderedDict
 from logging import warning
+from typing import List
+from qlib.backtest.order import BaseTradeDecision, Order
 import pandas as pd
 import pathlib
 import warnings
@@ -241,13 +243,13 @@ class Indicator:
         trade_cost = dict()
 
         for order, _trade_val, _trade_cost, _trade_price in trade_info:
-            amount[order.stock_id] = order.amount * (order.direction * 2 - 1)
-            deal_amount[order.stock_id] = order.deal_amount * (order.direction * 2 - 1)
+            amount[order.stock_id] = order.amount_delta
+            deal_amount[order.stock_id] = order.deal_amount_delta
             trade_price[order.stock_id] = _trade_price
-            trade_value[order.stock_id] = _trade_val * (order.direction * 2 - 1)
+            trade_value[order.stock_id] = _trade_val * order.sign
             trade_cost[order.stock_id] = _trade_cost
 
-        self.order_indicator["amount"] = pd.Series(amount)
+        self.order_indicator["amount"] = self.order_indicator["inner_amount"] = pd.Series(amount)
         self.order_indicator["deal_amount"] = pd.Series(deal_amount)
         self.order_indicator["trade_price"] = pd.Series(trade_price)
         self.order_indicator["trade_value"] = pd.Series(trade_value)
@@ -271,13 +273,13 @@ class Indicator:
         ) / self.order_indicator["base_price"]
 
     def _agg_order_trade_info(self, inner_order_indicators):
-        amount = pd.Series()
+        inner_amount = pd.Series()
         deal_amount = pd.Series()
         trade_price = pd.Series()
         trade_value = pd.Series()
         trade_cost = pd.Series()
         for _order_indicator in inner_order_indicators:
-            amount = amount.add(_order_indicator["amount"], fill_value=0)
+            inner_amount = inner_amount.add(_order_indicator["inner_amount"], fill_value=0)
             deal_amount = deal_amount.add(_order_indicator["deal_amount"], fill_value=0)
             trade_price = trade_price.add(
                 _order_indicator["trade_price"] * _order_indicator["deal_amount"], fill_value=0
@@ -285,13 +287,21 @@ class Indicator:
             trade_value = trade_value.add(_order_indicator["trade_value"], fill_value=0)
             trade_cost = trade_cost.add(_order_indicator["trade_cost"], fill_value=0)
 
-        self.order_indicator["amount"] = amount
+        self.order_indicator["inner_amount"] = inner_amount
         self.order_indicator["deal_amount"] = deal_amount
         trade_price /= self.order_indicator["deal_amount"]
         self.order_indicator["trade_price"] = trade_price
         self.order_indicator["trade_value"] = trade_value
         self.order_indicator["trade_cost"] = trade_cost
 
+    def _update_trade_amount(self, outer_trade_decision: BaseTradeDecision):
+        # NOTE: these indicator is designed for order execution, so the
+        decision: List[Order] = outer_trade_decision.get_decision()
+        if decision is None:
+            self.order_indicator["amount"] = pd.Series()
+        else:
+            self.order_indicator["amount"] = pd.Series({order.stock_id: order.amount_delta for order in decision})
+
     def _agg_order_fulfill_rate(self):
         self.order_indicator["ffr"] = self.order_indicator["deal_amount"] / self.order_indicator["amount"]
 
@@ -367,8 +377,11 @@ class Indicator:
         self._update_order_fulfill_rate()
         self._update_order_price_advantage(trade_exchange, trade_start_time, trade_end_time)
 
-    def agg_order_indicators(self, inner_order_indicators, indicator_config={}):
+    def agg_order_indicators(
+        self, inner_order_indicators, outer_trade_decision: BaseTradeDecision, indicator_config={}
+    ):
         self._agg_order_trade_info(inner_order_indicators)
+        self._update_trade_amount(outer_trade_decision)
         self._agg_order_fulfill_rate()
         pa_config = indicator_config.get("pa_config", {})
         self._agg_order_price_advantage(inner_order_indicators, base_price=pa_config.get("base_price", "twap"))

From 82645233e7cf4efcc9cfecfa3bdc3bf67c10b237 Mon Sep 17 00:00:00 2001
From: v-mingzhehan <v-mingzhehan@microsoft.com>
Date: Tue, 6 Jul 2021 03:50:34 +0000
Subject: [PATCH 18/28] Support order dataframe

---
 qlib/contrib/strategy/rule_strategy.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/qlib/contrib/strategy/rule_strategy.py b/qlib/contrib/strategy/rule_strategy.py
index d18eb2a27..8152b13de 100644
--- a/qlib/contrib/strategy/rule_strategy.py
+++ b/qlib/contrib/strategy/rule_strategy.py
@@ -714,12 +714,12 @@ class FileOrderStrategy(BaseStrategy):
     - This class provides an interface for user to read orders from csv files.
     """
 
-    def __init__(self, file: Union[IO, str, Path], index_range: Tuple[int, int] = None, *args, **kwargs):
+    def __init__(self, file: Union[IO, str, Path, pd.DataFrame], index_range: Tuple[int, int] = None, *args, **kwargs):
         """
 
         Parameters
         ----------
-        file : Union[IO, str, Path]
+        file : Union[IO, str, Path, pd.DataFrame]
             this parameters will specify the info of expected orders
 
             Here is an example of the content
@@ -741,8 +741,11 @@ class FileOrderStrategy(BaseStrategy):
 
         """
         super().__init__(*args, **kwargs)
-        with get_io_object(file) as f:
-            self.order_df = pd.read_csv(f, dtype={"datetime": np.str})
+        if isinstance(file, pd.DataFrame):
+            self.order_df = file
+        else:
+            with get_io_object(file) as f:
+                self.order_df = pd.read_csv(f, dtype={"datetime": np.str})
 
         self.order_df["datetime"] = self.order_df["datetime"].apply(pd.Timestamp)
         self.order_df = self.order_df.set_index(["datetime", "instrument"])

From 354f7e68c2f9065971887c9c35b278215873ba7a Mon Sep 17 00:00:00 2001
From: v-mingzhehan <v-mingzhehan@microsoft.com>
Date: Tue, 6 Jul 2021 08:47:55 +0000
Subject: [PATCH 19/28] Constrain TWAP trade step

---
 qlib/contrib/strategy/rule_strategy.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/qlib/contrib/strategy/rule_strategy.py b/qlib/contrib/strategy/rule_strategy.py
index 8152b13de..3ca325bf6 100644
--- a/qlib/contrib/strategy/rule_strategy.py
+++ b/qlib/contrib/strategy/rule_strategy.py
@@ -108,8 +108,8 @@ class TWAPStrategy(BaseStrategy):
         start_idx, end_idx = get_start_end_idx(self, self.outer_trade_decision)
         trade_len = end_idx - start_idx + 1
 
-        if trade_step < start_idx:
-            # It is not time to start trading
+        if trade_step < start_idx or trade_step > end_idx:
+            # It is not time to start trading or trading has ended.
             return TradeDecisionWO(order_list=[], strategy=self)
 
         rel_trade_step = trade_step - start_idx  # trade_step relative to start_idx

From dd8231edebff2dc8108ce28450f507a14263f434 Mon Sep 17 00:00:00 2001
From: Young <afe.young@gmail.com>
Date: Tue, 6 Jul 2021 11:09:25 +0000
Subject: [PATCH 20/28] simplify the portfolio-based report

---
 qlib/backtest/account.py  | 52 ++++++++++++++++++++++++++++++---------
 qlib/backtest/backtest.py |  8 +++---
 qlib/backtest/executor.py | 44 +++++++++++----------------------
 qlib/backtest/order.py    |  1 -
 qlib/strategy/base.py     |  2 +-
 5 files changed, 61 insertions(+), 46 deletions(-)

diff --git a/qlib/backtest/account.py b/qlib/backtest/account.py
index 0d89dde87..b394d5823 100644
--- a/qlib/backtest/account.py
+++ b/qlib/backtest/account.py
@@ -64,34 +64,49 @@ class AccumulatedInfo:
 
 class Account:
     def __init__(
-        self, init_cash: float = 1e9, freq: str = "day", benchmark_config: dict = {}, pos_type: str = "Position"
+        self,
+        init_cash: float = 1e9,
+        freq: str = "day",
+        benchmark_config: dict = {},
+        pos_type: str = "Position",
+        port_metr_enabled: bool = True,
     ):
-        self.pos_type = pos_type
+        self._pos_type = pos_type
+        self._port_metr_enabled = port_metr_enabled
         self.init_vars(init_cash, freq, benchmark_config)
 
+    def is_port_metr_enabled(self):
+        """
+        Is portfolio-based metrics enabled.
+        """
+        return self._port_metr_enabled and not self.current.skip_update()
+
     def init_vars(self, init_cash, freq: str, benchmark_config: dict):
 
         # init cash
         self.init_cash = init_cash
         self.current: BasePosition = init_instance_by_config(
             {
-                "class": self.pos_type,
+                "class": self._pos_type,
                 "kwargs": {"cash": init_cash},
                 "module_path": "qlib.backtest.position",
             }
         )
         self.accum_info = AccumulatedInfo()
+        self.report = None
+        self.positions = {}
         self.reset(freq=freq, benchmark_config=benchmark_config, init_report=True)
 
     def reset_report(self, freq, benchmark_config):
         # portfolio related metrics
-        self.report = Report(freq, benchmark_config)
-        self.positions = {}
+        if self.is_port_metr_enabled():
+            self.report = Report(freq, benchmark_config)
+            self.positions = {}
 
         # trading related matric(e.g. high-frequency trading)
         self.indicator = Indicator()
 
-    def reset(self, freq=None, benchmark_config=None, init_report=False):
+    def reset(self, freq=None, benchmark_config=None, init_report=False, port_metr_enabled: bool = None):
         """reset freq and report of account
 
         Parameters
@@ -108,6 +123,9 @@ class Account:
         if benchmark_config is not None:
             self.benchmark_config = benchmark_config
 
+        if port_metr_enabled is not None:
+            self._port_metr_enabled = port_metr_enabled
+
         if freq is not None or benchmark_config is not None or init_report:
             self.reset_report(self.freq, self.benchmark_config)
 
@@ -137,7 +155,7 @@ class Account:
             self.accum_info.add_return_value(profit)  # note here do not consider cost
 
     def update_order(self, order, trade_val, cost, trade_price):
-        if self.current.skip_update():
+        if not self.is_port_metr_enabled():
             # TODO: supporting polymorphism for account
             # updating order for infinite position is meaningless
             return
@@ -160,12 +178,14 @@ class Account:
     def update_bar_count(self):
         """at the end of the trading bar, update holding bar, count of stock"""
         # update holding day count
+        # NOTE: updating bar_count does not only serve portfolio metrics, it also serve the strategy
         if not self.current.skip_update():
             self.current.add_count_all(bar=self.freq)
 
     def update_current(self, trade_start_time, trade_end_time, trade_exchange):
         """update current to make rtn consistent with earning at the end of bar"""
         # update price for stock in the position and the profit from changed_price
+        # NOTE: updating position does not only serve portfolio metrics, it also serve the strategy
         if not self.current.skip_update():
             stock_list = self.current.get_stock_list()
             for code in stock_list:
@@ -227,7 +247,6 @@ class Account:
         trade_exchange: Exchange,
         atomic: bool,
         outer_trade_decision: BaseTradeDecision,
-        generate_report: bool = False,
         trade_info: list = None,
         inner_order_indicators: Indicator = None,
         indicator_config: dict = {},
@@ -246,8 +265,6 @@ class Account:
             whether the trading executor is atomic, which means there is no higher-frequency trading executor inside it
             - if atomic is True, calculate the indicators with trade_info
             - else, aggregate indicators with inner indicators
-        generate_report : bool, optional
-            whether to generate report, by default False
         trade_info : List[(Order, float, float, float)], optional
             trading information, by default None
             - necessary if atomic is True
@@ -267,7 +284,7 @@ class Account:
         # TODO:  `update_bar_count` and  `update_current` should placed in Position and be merged.
         self.update_bar_count()
         self.update_current(trade_start_time, trade_end_time, trade_exchange)
-        if generate_report:
+        if self.is_port_metr_enabled():
             # report is portfolio related analysis
             self.update_report(trade_start_time, trade_end_time)
 
@@ -283,3 +300,16 @@ class Account:
 
         self.indicator.cal_trade_indicators(trade_start_time, self.freq, indicator_config)
         self.indicator.record(trade_start_time)
+
+    def get_report(self):
+        """get the history report and postions instance"""
+        if self.is_port_metr_enabled():
+            _report = self.report.generate_report_dataframe()
+            _positions = self.get_positions()
+            return _report, _positions
+        else:
+            raise ValueError("generate_report should be True if you want to generate report")
+
+    def get_trade_indicator(self) -> Indicator:
+        """get the trade indicator instance, which has pa/pos/ffr info."""
+        return self.indicator
diff --git a/qlib/backtest/backtest.py b/qlib/backtest/backtest.py
index 48d06db6c..573c874b0 100644
--- a/qlib/backtest/backtest.py
+++ b/qlib/backtest/backtest.py
@@ -69,13 +69,13 @@ def collect_data_loop(
         all_executors = trade_executor.get_all_executors()
 
         all_reports = {
-            "{}{}".format(*Freq.parse(_executor.time_per_step)): _executor.get_report()
+            "{}{}".format(*Freq.parse(_executor.time_per_step)): _executor.trade_account.get_report()
             for _executor in all_executors
-            if _executor.generate_report
+            if _executor.trade_account.is_port_metr_enabled()
         }
         all_indicators = {}
         for _executor in all_executors:
             key = "{}{}".format(*Freq.parse(_executor.time_per_step))
-            all_indicators[key] = _executor.get_trade_indicator().generate_trade_indicators_dataframe()
-            all_indicators[key + "_obj"] = _executor.get_trade_indicator()
+            all_indicators[key] = _executor.trade_account.get_trade_indicator().generate_trade_indicators_dataframe()
+            all_indicators[key + "_obj"] = _executor.trade_account.get_trade_indicator()
         return_value.update({"report": all_reports, "indicator": all_indicators})
diff --git a/qlib/backtest/executor.py b/qlib/backtest/executor.py
index 14d97e825..adea9dde0 100644
--- a/qlib/backtest/executor.py
+++ b/qlib/backtest/executor.py
@@ -103,8 +103,10 @@ class BaseExecutor:
             self.common_infra.update(common_infra)
 
         if common_infra.has("trade_account"):
+            # NOTE: there is a trick in the code.
+            # copy is used instead of deepcopy. So positions are shared
             self.trade_account = copy.copy(common_infra.get("trade_account"))
-            self.trade_account.reset(freq=self.time_per_step, init_report=True)
+            self.trade_account.reset(freq=self.time_per_step, init_report=True, port_metr_enabled=self.generate_report)
 
     def reset(self, track_data: bool = None, common_infra: CommonInfrastructure = None, **kwargs):
         """
@@ -167,19 +169,6 @@ class BaseExecutor:
             yield trade_decision
         return self.execute(trade_decision)
 
-    def get_report(self):
-        """get the history report and postions instance"""
-        if self.generate_report:
-            _report = self.trade_account.report.generate_report_dataframe()
-            _positions = self.trade_account.get_positions()
-            return _report, _positions
-        else:
-            raise ValueError("generate_report should be True if you want to generate report")
-
-    def get_trade_indicator(self) -> Indicator:
-        """get the trade indicator instance, which has pa/pos/ffr info."""
-        return self.trade_account.indicator
-
     def get_all_executors(self):
         """get all executors"""
         return [self]
@@ -289,21 +278,19 @@ class NestedExecutor(BaseExecutor):
             _inner_execute_result = yield from self.inner_executor.collect_data(trade_decision=_inner_trade_decision)
 
             execute_result.extend(_inner_execute_result)
-            inner_order_indicators.append(self.inner_executor.get_trade_indicator().get_order_indicator())
+            inner_order_indicators.append(self.inner_executor.trade_account.get_trade_indicator().get_order_indicator())
 
-        if hasattr(self, "trade_account"):
-            trade_step = self.trade_calendar.get_trade_step()
-            trade_start_time, trade_end_time = self.trade_calendar.get_step_time(trade_step)
-            self.trade_account.update_bar_end(
-                trade_start_time,
-                trade_end_time,
-                self.trade_exchange,
-                atomic=False,
-                outer_trade_decision=trade_decision,
-                generate_report=self.generate_report,
-                inner_order_indicators=inner_order_indicators,
-                indicator_config=self.indicator_config,
-            )
+        trade_step = self.trade_calendar.get_trade_step()
+        trade_start_time, trade_end_time = self.trade_calendar.get_step_time(trade_step)
+        self.trade_account.update_bar_end(
+            trade_start_time,
+            trade_end_time,
+            self.trade_exchange,
+            atomic=False,
+            outer_trade_decision=trade_decision,
+            inner_order_indicators=inner_order_indicators,
+            indicator_config=self.indicator_config,
+        )
 
         self.trade_calendar.step()
         if return_value is not None:
@@ -457,7 +444,6 @@ class SimulatorExecutor(BaseExecutor):
             self.trade_exchange,
             atomic=True,
             outer_trade_decision=trade_decision,
-            generate_report=self.generate_report,
             trade_info=execute_result,
             indicator_config=self.indicator_config,
         )
diff --git a/qlib/backtest/order.py b/qlib/backtest/order.py
index 64ff2a56f..535309d91 100644
--- a/qlib/backtest/order.py
+++ b/qlib/backtest/order.py
@@ -56,7 +56,6 @@ class Order:
     SELL: ClassVar[OrderDir] = OrderDir.SELL
     BUY: ClassVar[OrderDir] = OrderDir.BUY
 
-
     def __post_init__(self):
         if self.direction not in {Order.SELL, Order.BUY}:
             raise NotImplementedError("direction not supported, `Order.SELL` for sell, `Order.BUY` for buy")
diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py
index bac59acfb..a787c098f 100644
--- a/qlib/strategy/base.py
+++ b/qlib/strategy/base.py
@@ -10,7 +10,7 @@ from ..utils import init_instance_by_config
 from ..backtest.utils import CommonInfrastructure, LevelInfrastructure, TradeCalendarManager
 from ..backtest.order import BaseTradeDecision
 
-__all__ = ['BaseStrategy', 'ModelStrategy', 'RLStrategy', 'RLIntStrategy']
+__all__ = ["BaseStrategy", "ModelStrategy", "RLStrategy", "RLIntStrategy"]
 
 
 class BaseStrategy:

From 6fd50a5bfa3a20d153bd6b86ec8305a725bef228 Mon Sep 17 00:00:00 2001
From: Young <afe.young@gmail.com>
Date: Tue, 6 Jul 2021 12:08:53 +0000
Subject: [PATCH 21/28] Supporting skip empty decisions

---
 qlib/backtest/executor.py | 44 ++++++++++++++++++++++++++-------------
 qlib/backtest/order.py    |  5 ++++-
 2 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/qlib/backtest/executor.py b/qlib/backtest/executor.py
index adea9dde0..c4807ebde 100644
--- a/qlib/backtest/executor.py
+++ b/qlib/backtest/executor.py
@@ -191,6 +191,7 @@ class NestedExecutor(BaseExecutor):
         generate_report: bool = False,
         verbose: bool = False,
         track_data: bool = False,
+        skip_empty_decision: bool = True,
         trade_exchange: Exchange = None,
         common_infra: CommonInfrastructure = None,
         **kwargs,
@@ -206,6 +207,11 @@ class NestedExecutor(BaseExecutor):
             exchange that provides market info, used to generate report
             - If generate_report is None, trade_exchange will be ignored
             - Else If `trade_exchange` is None, self.trade_exchange will be set with common_infra
+        skip_empty_decision: bool
+            Will the executor skip the inner loop when the decision is empty.
+            It should be False in following cases
+            - The decisions may be updated by steps
+            - The inner executor may not follow the decisions from the outer strategy
         """
         self.inner_executor = init_instance_by_config(
             inner_executor, common_infra=common_infra, accept_types=BaseExecutor
@@ -214,6 +220,8 @@ class NestedExecutor(BaseExecutor):
             inner_strategy, common_infra=common_infra, accept_types=BaseStrategy
         )
 
+        self._skip_empty_decision = skip_empty_decision
+
         super(NestedExecutor, self).__init__(
             time_per_step=time_per_step,
             start_time=start_time,
@@ -259,26 +267,32 @@ class NestedExecutor(BaseExecutor):
     def collect_data(self, trade_decision: BaseTradeDecision, return_value=None):
         if self.track_data:
             yield trade_decision
-        self._init_sub_trading(trade_decision)
         execute_result = []
         inner_order_indicators = []
-        _inner_execute_result = None
-        while not self.inner_executor.finished():
-            # outter strategy have chance to update decision each iterator
-            updated_trade_decision = trade_decision.update(self.inner_executor.trade_calendar)
-            if updated_trade_decision is not None:
-                trade_decision = updated_trade_decision
-                # NEW UPDATE
-                # create a hook for inner strategy to update outter decision
-                self.inner_strategy.alter_outer_trade_decision(trade_decision)
 
-            _inner_trade_decision = self.inner_strategy.generate_trade_decision(_inner_execute_result)
+        if not (trade_decision.empty() and self._skip_empty_decision):
+            _inner_execute_result = None
+            self._init_sub_trading(trade_decision)
+            while not self.inner_executor.finished():
+                # outter strategy have chance to update decision each iterator
+                updated_trade_decision = trade_decision.update(self.inner_executor.trade_calendar)
+                if updated_trade_decision is not None:
+                    trade_decision = updated_trade_decision
+                    # NEW UPDATE
+                    # create a hook for inner strategy to update outter decision
+                    self.inner_strategy.alter_outer_trade_decision(trade_decision)
 
-            # NOTE: Trade Calendar will step forward in the follow line
-            _inner_execute_result = yield from self.inner_executor.collect_data(trade_decision=_inner_trade_decision)
+                _inner_trade_decision = self.inner_strategy.generate_trade_decision(_inner_execute_result)
 
-            execute_result.extend(_inner_execute_result)
-            inner_order_indicators.append(self.inner_executor.trade_account.get_trade_indicator().get_order_indicator())
+                # NOTE: Trade Calendar will step forward in the follow line
+                _inner_execute_result = yield from self.inner_executor.collect_data(
+                    trade_decision=_inner_trade_decision
+                )
+
+                execute_result.extend(_inner_execute_result)
+                inner_order_indicators.append(
+                    self.inner_executor.trade_account.get_trade_indicator().get_order_indicator()
+                )
 
         trade_step = self.trade_calendar.get_trade_step()
         trade_start_time, trade_end_time = self.trade_calendar.get_step_time(trade_step)
diff --git a/qlib/backtest/order.py b/qlib/backtest/order.py
index 535309d91..1953426fd 100644
--- a/qlib/backtest/order.py
+++ b/qlib/backtest/order.py
@@ -197,7 +197,7 @@ class BaseTradeDecision:
             Example:
                 []:
                     Decision not available
-                concrete_decision:
+                [concrete_decision]:
                     available
         """
         raise NotImplementedError(f"This type of input is not supported")
@@ -236,6 +236,9 @@ class BaseTradeDecision:
         """
         raise NotImplementedError(f"Please implement the `func` method")
 
+    def empty(self) -> bool:
+        return len(self.get_decision()) == 0
+
 
 class TradeDecisionWO(BaseTradeDecision):
     """

From 32ae6e42597bb3f64523d42255c116bcbc1524ab Mon Sep 17 00:00:00 2001
From: Young <afe.young@gmail.com>
Date: Thu, 8 Jul 2021 05:54:36 +0000
Subject: [PATCH 22/28] fix calculating base_price

---
 qlib/backtest/account.py  |  12 ++-
 qlib/backtest/exchange.py |  20 ++---
 qlib/backtest/order.py    |   5 +-
 qlib/backtest/report.py   | 151 +++++++++++++++++++++++++++-----------
 4 files changed, 130 insertions(+), 58 deletions(-)

diff --git a/qlib/backtest/account.py b/qlib/backtest/account.py
index b394d5823..67f7b056a 100644
--- a/qlib/backtest/account.py
+++ b/qlib/backtest/account.py
@@ -3,6 +3,7 @@
 
 
 import copy
+from typing import Dict, List
 from qlib.utils import init_instance_by_config
 import warnings
 import pandas as pd
@@ -248,7 +249,7 @@ class Account:
         atomic: bool,
         outer_trade_decision: BaseTradeDecision,
         trade_info: list = None,
-        inner_order_indicators: Indicator = None,
+        inner_order_indicators: List[Dict[str, pd.Series]] = None,
         indicator_config: dict = {},
     ):
         """update account at each trading bar step
@@ -292,10 +293,15 @@ class Account:
         self.indicator.clear()
 
         if atomic:
-            self.indicator.update_order_indicators(trade_start_time, trade_end_time, trade_info, trade_exchange)
+            self.indicator.update_order_indicators(trade_info)
         else:
             self.indicator.agg_order_indicators(
-                inner_order_indicators, indicator_config=indicator_config, outer_trade_decision=outer_trade_decision
+                trade_start_time,
+                trade_end_time,
+                inner_order_indicators,
+                outer_trade_decision=outer_trade_decision,
+                trade_exchange=trade_exchange,
+                indicator_config=indicator_config,
             )
 
         self.indicator.cal_trade_indicators(trade_start_time, self.freq, indicator_config)
diff --git a/qlib/backtest/exchange.py b/qlib/backtest/exchange.py
index 26fae378f..3794651dc 100644
--- a/qlib/backtest/exchange.py
+++ b/qlib/backtest/exchange.py
@@ -281,27 +281,27 @@ class Exchange:
 
         return trade_val, trade_cost, trade_price
 
-    def get_quote_info(self, stock_id, start_time, end_time):
-        return resam_ts_data(self.quote[stock_id], start_time, end_time, method=ts_data_last)
+    def get_quote_info(self, stock_id, start_time, end_time, method=ts_data_last):
+        return resam_ts_data(self.quote[stock_id], start_time, end_time, method=method)
 
-    def get_close(self, stock_id, start_time, end_time):
-        return resam_ts_data(self.quote[stock_id]["$close"], start_time, end_time, method=ts_data_last)
+    def get_close(self, stock_id, start_time, end_time, method=ts_data_last):
+        return resam_ts_data(self.quote[stock_id]["$close"], start_time, end_time, method=method)
 
-    def get_volume(self, stock_id, start_time, end_time):
-        return resam_ts_data(self.quote[stock_id]["$volume"], start_time, end_time, method="sum")
+    def get_volume(self, stock_id, start_time, end_time, method="sum"):
+        return resam_ts_data(self.quote[stock_id]["$volume"], start_time, end_time, method=method)
 
-    def get_deal_price(self, stock_id, start_time, end_time, direction: OrderDir):
+    def get_deal_price(self, stock_id, start_time, end_time, direction: OrderDir, method=ts_data_last):
         if direction == OrderDir.SELL:
             pstr = self.sell_price
         elif direction == OrderDir.BUY:
             pstr = self.buy_price
         else:
             raise NotImplementedError(f"This type of input is not supported")
-        deal_price = resam_ts_data(self.quote[stock_id][pstr], start_time, end_time, method=ts_data_last)
-        if np.isclose(deal_price, 0.0) or np.isnan(deal_price):
+        deal_price = resam_ts_data(self.quote[stock_id][pstr], start_time, end_time, method=method)
+        if method is not None and (np.isclose(deal_price, 0.0) or np.isnan(deal_price)):
             self.logger.warning(f"(stock_id:{stock_id}, trade_time:{(start_time, end_time)}, {pstr}): {deal_price}!!!")
             self.logger.warning(f"setting deal_price to close price")
-            deal_price = self.get_close(stock_id, start_time, end_time)
+            deal_price = self.get_close(stock_id, start_time, end_time, method)
         return deal_price
 
     def get_factor(self, stock_id, start_time, end_time) -> Union[float, None]:
diff --git a/qlib/backtest/order.py b/qlib/backtest/order.py
index 1953426fd..20c97aa90 100644
--- a/qlib/backtest/order.py
+++ b/qlib/backtest/order.py
@@ -93,7 +93,10 @@ class Order:
         if isinstance(direction, OrderDir):
             return direction
         elif isinstance(direction, (int, float, np.integer, np.floating)):
-            return OrderDir(int(direction))
+            if direction > 0:
+                return Order.BUY
+            else:
+                return Order.SELL
         elif isinstance(direction, str):
             dl = direction.lower()
             if dl.strip() == "sell":
diff --git a/qlib/backtest/report.py b/qlib/backtest/report.py
index ce2812bd0..43a6a455b 100644
--- a/qlib/backtest/report.py
+++ b/qlib/backtest/report.py
@@ -4,9 +4,11 @@
 
 from collections import OrderedDict
 from logging import warning
-from typing import List
-from qlib.backtest.order import BaseTradeDecision, Order
+from qlib.backtest.exchange import Exchange
+from typing import Dict, List
+from qlib.backtest.order import BaseTradeDecision, Order, OrderDir
 import pandas as pd
+import numpy as np
 import pathlib
 import warnings
 from pandas.core import groupby
@@ -221,6 +223,33 @@ class Report:
 
 
 class Indicator:
+    """
+    `Indicator` is implemented in a aggregate way.
+    All the metrics are calculated aggregately.
+    All the metrics are calculated for a seperated stock and in a specific step on a specific level.
+
+    | indicator    | desc.                                                        |
+    |--------------+--------------------------------------------------------------|
+    | amount       | the *target* amount given by the outer strategy              |
+    | inner_amount | the total *target* amount of inner strategy                  |
+    | trade_price  | the average deal price                                       |
+    | trade_value  | the total trade value                                        |
+    | trade_cost   | the total trade cost  (base price need drection)             |
+    | trade_dir    | the trading direction                                        |
+    | ffr          | full fill rate                                               |
+    | pa           | price advantage                                              |
+    | pos          | win rate                                                     |
+    | base_price   | the price of baseline                                        |
+    | base_volume  | the volume of baseline (for weighted aggregating base_price) |
+
+    **NOTE**:
+    The `base_price` and `base_volume` can't be NaN when there are not trading on that step. Otherwise
+    aggregating get wrong results.
+
+    So `base_price` will not be calculated in a aggregate way!!
+
+    """
+
     def __init__(self):
         self.order_indicator_his = OrderedDict()
         self.order_indicator = OrderedDict()
@@ -241,6 +270,7 @@ class Indicator:
         trade_price = dict()
         trade_value = dict()
         trade_cost = dict()
+        trade_dir = dict()
 
         for order, _trade_val, _trade_cost, _trade_price in trade_info:
             amount[order.stock_id] = order.amount_delta
@@ -248,36 +278,32 @@ class Indicator:
             trade_price[order.stock_id] = _trade_price
             trade_value[order.stock_id] = _trade_val * order.sign
             trade_cost[order.stock_id] = _trade_cost
+            trade_dir[order.stock_id] = order.direction
 
         self.order_indicator["amount"] = self.order_indicator["inner_amount"] = pd.Series(amount)
         self.order_indicator["deal_amount"] = pd.Series(deal_amount)
+        # NOTE: trade_price and baseline price will be same on the lowest-level
         self.order_indicator["trade_price"] = pd.Series(trade_price)
         self.order_indicator["trade_value"] = pd.Series(trade_value)
         self.order_indicator["trade_cost"] = pd.Series(trade_cost)
+        self.order_indicator["trade_dir"] = pd.Series(trade_dir)
 
     def _update_order_fulfill_rate(self):
         self.order_indicator["ffr"] = self.order_indicator["deal_amount"] / self.order_indicator["amount"]
 
-    def _update_order_price_advantage(self, trade_exchange, trade_start_time, trade_end_time):
-        self.order_indicator["base_price"] = self.order_indicator["trade_price"]
-        instruments = list(self.order_indicator["base_price"].index)
-        self.order_indicator["volume"] = pd.Series(
-            [
-                trade_exchange.get_volume(stock_id=inst, start_time=trade_start_time, end_time=trade_end_time)
-                for inst in instruments
-            ],
-            index=instruments,
-        )
-        self.order_indicator["pa"] = (
-            self.order_indicator["trade_price"] - self.order_indicator["base_price"]
-        ) / self.order_indicator["base_price"]
+    def _update_order_price_advantage(self):
+        # NOTE:
+        # trade_price and baseline price will be same on the lowest-level
+        # So Pa should be 0
+        self.order_indicator["pa"] = 0
 
-    def _agg_order_trade_info(self, inner_order_indicators):
+    def _agg_order_trade_info(self, inner_order_indicators: List[Dict[str, pd.Series]]):
         inner_amount = pd.Series()
         deal_amount = pd.Series()
         trade_price = pd.Series()
         trade_value = pd.Series()
         trade_cost = pd.Series()
+        trade_dir = pd.Series()
         for _order_indicator in inner_order_indicators:
             inner_amount = inner_amount.add(_order_indicator["inner_amount"], fill_value=0)
             deal_amount = deal_amount.add(_order_indicator["deal_amount"], fill_value=0)
@@ -286,6 +312,9 @@ class Indicator:
             )
             trade_value = trade_value.add(_order_indicator["trade_value"], fill_value=0)
             trade_cost = trade_cost.add(_order_indicator["trade_cost"], fill_value=0)
+            trade_dir = trade_dir.add(_order_indicator["trade_dir"])
+
+        trade_dir = trade_dir.apply(Order.parse_dir)
 
         self.order_indicator["inner_amount"] = inner_amount
         self.order_indicator["deal_amount"] = deal_amount
@@ -293,6 +322,7 @@ class Indicator:
         self.order_indicator["trade_price"] = trade_price
         self.order_indicator["trade_value"] = trade_value
         self.order_indicator["trade_cost"] = trade_cost
+        self.order_indicator["trade_dir"] = trade_dir
 
     def _update_trade_amount(self, outer_trade_decision: BaseTradeDecision):
         # NOTE: these indicator is designed for order execution, so the
@@ -305,34 +335,59 @@ class Indicator:
     def _agg_order_fulfill_rate(self):
         self.order_indicator["ffr"] = self.order_indicator["deal_amount"] / self.order_indicator["amount"]
 
-    def _agg_order_price_advantage(self, inner_order_indicators, base_price="twap"):
-        base_price = base_price.lower()
-        volume = pd.Series()
-        for _order_indicator in inner_order_indicators:
-            volume = volume.add(_order_indicator["volume"], fill_value=0)
-        self.order_indicator["volume"] = volume
+    def _agg_order_price_advantage(
+        self,
+        inner_order_indicators: List[Dict[str, pd.Series]],
+        trade_start_time: pd.Timestamp,
+        trade_end_time: pd.Timestamp,
+        trade_exchange: Exchange,
+        pa_config: dict = {},
+    ):
+        """
 
-        if base_price == "twap":
-            base_price = pd.Series()
-            price_count = pd.Series()
-            for _order_indicator in inner_order_indicators:
-                base_price = base_price.add(_order_indicator["base_price"], fill_value=0)
-                price_count = price_count.add(pd.Series(1, index=_order_indicator["base_price"].index), fill_value=0)
-            base_price /= price_count
-            self.order_indicator["base_price"] = base_price
+        Parameters
+        ----------
+        inner_order_indicators : List[Dict[str, pd.Series]]
+            the indicators of account of inner executor
+        trade_start_time : pd.Timestamp
+            the start_time of the trade period, for slicing
+        trade_end_time : pd.Timestamp
+            the end_time of the trade period, for slicing (so it may include more time at the end)
+        trade_exchange : Exchange
+            for retrieving trading price
+        pa_config : dict
+            For example
+            {
+                "agg": "twap",  # "vwap"
+                "price": "$close",  # TODO: this is not supported now!!!!!
+                                    # default to use deal price of the exchange
+            }
+        """
 
-        elif base_price == "vwap":
-            base_price = pd.Series()
-            for _order_indicator in inner_order_indicators:
-                base_price = base_price.add(_order_indicator["base_price"] * _order_indicator["volume"], fill_value=0)
-            base_price /= self.order_indicator["volume"]
-            self.order_indicator["base_price"] = base_price
+        agg = pa_config.get("agg", "twap").lower()
+        price = pa_config.get("price", "deal_price").lower()
 
-        else:
-            raise ValueError(f"base_price {base_price} is not supported!")
+        base_price = {}
+        for inst, dir in self.order_indicator["trade_dir"].items():
 
-        self.order_indicator["pa"] = self.order_indicator["trade_price"] / self.order_indicator["base_price"] - 1
-        # print("trade_price", self.order_indicator["trade_price"], "base_price", self.order_indicator["base_price"], "pa", self.order_indicator["pa"]* (2 * (self.order_indicator["amount"] < 0).astype(int) - 1))
+            if price == "deal_price":
+                price_s = trade_exchange.get_deal_price(inst, trade_start_time, trade_end_time, dir, method=None)
+            else:
+                raise NotImplementedError(f"This type of input is not supported")
+
+            # there are some zeros in the trading price. These cases are known meaningless
+            price_s = price_s.mask(np.isclose(price_s, 0))
+
+            if agg == "vwap":
+                volume_s = trade_exchange.get_volume(inst, trade_start_time, trade_end_time, method=None)
+                base_price[inst] = ((price_s * volume_s).sum() / volume_s.sum()).item()
+            elif agg == "twap":
+                base_price[inst] = price_s.mean().item()
+
+        base_price = pd.Series(base_price)
+
+        # update PA
+        self.order_indicator["pa"] = self.order_indicator["trade_price"] / base_price - 1
 
     def _cal_trade_fulfill_rate(self, method="mean"):
         if method == "mean":
@@ -372,19 +427,27 @@ class Indicator:
     def _cal_trade_order_count(self):
         return self.order_indicator["amount"].count()
 
-    def update_order_indicators(self, trade_start_time, trade_end_time, trade_info, trade_exchange):
+    def update_order_indicators(self, trade_info: list):
         self._update_order_trade_info(trade_info=trade_info)
         self._update_order_fulfill_rate()
-        self._update_order_price_advantage(trade_exchange, trade_start_time, trade_end_time)
+        self._update_order_price_advantage()
 
     def agg_order_indicators(
-        self, inner_order_indicators, outer_trade_decision: BaseTradeDecision, indicator_config={}
+        self,
+        trade_start_time,
+        trade_end_time,
+        inner_order_indicators: List[Dict[str, pd.Series]],
+        outer_trade_decision: BaseTradeDecision,
+        trade_exchange: Exchange,
+        indicator_config={},
     ):
         self._agg_order_trade_info(inner_order_indicators)
         self._update_trade_amount(outer_trade_decision)
         self._agg_order_fulfill_rate()
         pa_config = indicator_config.get("pa_config", {})
-        self._agg_order_price_advantage(inner_order_indicators, base_price=pa_config.get("base_price", "twap"))
+        self._agg_order_price_advantage(
+            inner_order_indicators, trade_start_time, trade_end_time, trade_exchange, pa_config=pa_config
+        )
 
     def cal_trade_indicators(self, trade_start_time, freq, indicator_config={}):
         show_indicator = indicator_config.get("show_indicator", False)

From eada8640b9d8f9e81fad9244c692853a62789c8c Mon Sep 17 00:00:00 2001
From: Young <afe.young@gmail.com>
Date: Thu, 8 Jul 2021 13:37:20 +0000
Subject: [PATCH 23/28] align range limit

---
 qlib/backtest/__init__.py              |   2 +-
 qlib/backtest/account.py               |  19 +-
 qlib/backtest/backtest.py              |   5 +-
 qlib/backtest/executor.py              | 275 +++++++++++++++----------
 qlib/backtest/order.py                 |  64 ++++--
 qlib/backtest/report.py                | 200 ++++++++++++------
 qlib/backtest/utils.py                 |  64 +++++-
 qlib/contrib/strategy/rule_strategy.py |  26 +--
 qlib/strategy/base.py                  |  15 +-
 9 files changed, 438 insertions(+), 232 deletions(-)

diff --git a/qlib/backtest/__init__.py b/qlib/backtest/__init__.py
index fa57e354b..ab3d29408 100644
--- a/qlib/backtest/__init__.py
+++ b/qlib/backtest/__init__.py
@@ -13,7 +13,7 @@ from .executor import BaseExecutor
 from .backtest import backtest_loop
 from .backtest import collect_data_loop
 from .order import Order
-from .utils import CommonInfrastructure, TradeCalendarManager
+from .utils import CommonInfrastructure, LevelInfrastructure, TradeCalendarManager
 from ..utils import init_instance_by_config
 from ..log import get_module_logger
 from ..config import C
diff --git a/qlib/backtest/account.py b/qlib/backtest/account.py
index 67f7b056a..3ef1cdd03 100644
--- a/qlib/backtest/account.py
+++ b/qlib/backtest/account.py
@@ -3,7 +3,7 @@
 
 
 import copy
-from typing import Dict, List
+from typing import Dict, List, Tuple
 from qlib.utils import init_instance_by_config
 import warnings
 import pandas as pd
@@ -250,6 +250,7 @@ class Account:
         outer_trade_decision: BaseTradeDecision,
         trade_info: list = None,
         inner_order_indicators: List[Dict[str, pd.Series]] = None,
+        decision_list: List[Tuple[BaseTradeDecision, pd.Timestamp, pd.Timestamp]] = None,
         indicator_config: dict = {},
     ):
         """update account at each trading bar step
@@ -274,6 +275,9 @@ class Account:
             indicators of inner executor, by default None
             - necessary if atomic is False
             - used to aggregate outer indicators
+        decision_list: List[Tuple[BaseTradeDecision, pd.Timestamp, pd.Timestamp]] = None,
+            The decision list of the inner level: List[Tuple[<decision>, <start_time>, <end_time>]]
+            The inner level
         indicator_config : dict, optional
             config of calculating indicators, by default {}
         """
@@ -289,22 +293,27 @@ class Account:
             # report is portfolio related analysis
             self.update_report(trade_start_time, trade_end_time)
 
-        # indicator is trading (e.g. high-frequency order execution) related analysis
-        self.indicator.clear()
+        # TODO: will skip empty decisions make it faster?  `outer_trade_decision.empty():`
 
+        # indicator is trading (e.g. high-frequency order execution) related analysis
+        self.indicator.reset()
+
+        # aggregate the information for each order
         if atomic:
             self.indicator.update_order_indicators(trade_info)
         else:
             self.indicator.agg_order_indicators(
-                trade_start_time,
-                trade_end_time,
                 inner_order_indicators,
+                decision_list=decision_list,
                 outer_trade_decision=outer_trade_decision,
                 trade_exchange=trade_exchange,
                 indicator_config=indicator_config,
             )
 
+        # aggregate all the order metrics a single step
         self.indicator.cal_trade_indicators(trade_start_time, self.freq, indicator_config)
+
+        # record the metrics
         self.indicator.record(trade_start_time)
 
     def get_report(self):
diff --git a/qlib/backtest/backtest.py b/qlib/backtest/backtest.py
index 573c874b0..89b8c7830 100644
--- a/qlib/backtest/backtest.py
+++ b/qlib/backtest/backtest.py
@@ -55,14 +55,13 @@ def collect_data_loop(
         trade decision
     """
     trade_executor.reset(start_time=start_time, end_time=end_time)
-    level_infra = trade_executor.get_level_infra()
-    trade_strategy.reset(level_infra=level_infra)
+    trade_strategy.reset(level_infra=trade_executor.get_level_infra())
 
     with tqdm(total=trade_executor.trade_calendar.get_trade_len(), desc="backtest loop") as bar:
         _execute_result = None
         while not trade_executor.finished():
             _trade_decision: BaseTradeDecision = trade_strategy.generate_trade_decision(_execute_result)
-            _execute_result = yield from trade_executor.collect_data(_trade_decision)
+            _execute_result = yield from trade_executor.collect_data(_trade_decision, level=0)
             bar.update(1)
 
     if return_value is not None:
diff --git a/qlib/backtest/executor.py b/qlib/backtest/executor.py
index c4807ebde..b99380c54 100644
--- a/qlib/backtest/executor.py
+++ b/qlib/backtest/executor.py
@@ -1,13 +1,16 @@
+from abc import abstractclassmethod, abstractmethod
 import copy
+from types import GeneratorType
+from qlib.backtest.account import Account
 import warnings
 import pandas as pd
-from typing import List, Union
+from typing import List, Tuple, Union
 
 from qlib.backtest.report import Indicator
 
-from .order import Order, BaseTradeDecision
+from .order import EmptyTradeDecision, Order, BaseTradeDecision
 from .exchange import Exchange
-from .utils import TradeCalendarManager, CommonInfrastructure, LevelInfrastructure
+from .utils import TradeCalendarManager, CommonInfrastructure, LevelInfrastructure, get_start_end_idx
 
 from ..utils import init_instance_by_config
 from ..utils.time import Freq
@@ -26,6 +29,7 @@ class BaseExecutor:
         generate_report: bool = False,
         verbose: bool = False,
         track_data: bool = False,
+        trade_exchange: Exchange = None,
         common_infra: CommonInfrastructure = None,
         **kwargs,
     ):
@@ -62,8 +66,8 @@ class BaseExecutor:
                 {
                     'show_indicator': True,
                     'pa_config': {
-                        'base_value': 'twap',
-                        'weight_method': 'value_weighted',
+                        "agg": "twap",  # "vwap"
+                        "price": "$close", # default to use deal price of the exchange
                     },
                     'ffr_config':{
                         'weight_method': 'value_weighted',
@@ -77,6 +81,12 @@ class BaseExecutor:
             whether to generate trade_decision, will be used when training rl agent
             - If `self.track_data` is true, when making data for training, the input `trade_decision` of `execute` will be generated by `collect_data`
             - Else,  `trade_decision` will not be generated
+
+        trade_exchange : Exchange
+            exchange that provides market info, used to generate report
+            - If generate_report is None, trade_exchange will be ignored
+            - Else If `trade_exchange` is None, self.trade_exchange will be set with common_infra
+
         common_infra : CommonInfrastructure, optional:
             common infrastructure for backtesting, may including:
             - trade_account : Account, optional
@@ -90,7 +100,9 @@ class BaseExecutor:
         self.generate_report = generate_report
         self.verbose = verbose
         self.track_data = track_data
-        self.reset(start_time=start_time, end_time=end_time, track_data=track_data, common_infra=common_infra)
+        self._trade_exchange = trade_exchange
+        self.level_infra = LevelInfrastructure()
+        self.reset(start_time=start_time, end_time=end_time, common_infra=common_infra)
 
     def reset_common_infra(self, common_infra):
         """
@@ -105,60 +117,106 @@ class BaseExecutor:
         if common_infra.has("trade_account"):
             # NOTE: there is a trick in the code.
             # copy is used instead of deepcopy. So positions are shared
-            self.trade_account = copy.copy(common_infra.get("trade_account"))
+            self.trade_account: Account = copy.copy(common_infra.get("trade_account"))
             self.trade_account.reset(freq=self.time_per_step, init_report=True, port_metr_enabled=self.generate_report)
 
-    def reset(self, track_data: bool = None, common_infra: CommonInfrastructure = None, **kwargs):
+    @property
+    def trade_exchange(self) -> Exchange:
+        """get trade exchange in a prioritized order"""
+        return getattr(self, "_trade_exchange", None) or self.common_infra.get("trade_exchange")
+
+    @property
+    def trade_calendar(self) -> TradeCalendarManager:
+        """
+        Though trade calendar can be accessed from multiple sources, but managing in a centralized way will make the
+        code easier
+        """
+        return self.level_infra.get("trade_calendar")
+
+    def reset(self, common_infra: CommonInfrastructure = None, **kwargs):
         """
         - reset `start_time` and `end_time`, used in trade calendar
-        - reset `track_data`, used when making data for multi-level training
         - reset `common_infra`, used to reset `trade_account`, `trade_exchange`, .etc
         """
 
-        if track_data is not None:
-            self.track_data = track_data
-
         if "start_time" in kwargs or "end_time" in kwargs:
             start_time = kwargs.get("start_time")
             end_time = kwargs.get("end_time")
-            self.trade_calendar = TradeCalendarManager(
-                freq=self.time_per_step, start_time=start_time, end_time=end_time
-            )
-
+            self.level_infra.reset_cal(freq=self.time_per_step, start_time=start_time, end_time=end_time)
         if common_infra is not None:
             self.reset_common_infra(common_infra)
 
     def get_level_infra(self):
-        return LevelInfrastructure(trade_calendar=self.trade_calendar)
+        return self.level_infra
 
     def finished(self):
         return self.trade_calendar.finished()
 
-    def execute(self, trade_decision):
+    def execute(self, trade_decision: BaseTradeDecision, level: int = 0):
         """execute the trade decision and return the executed result
 
+        NOTE: this function is never used directly in the framework. Should we delete it?
+
         Parameters
         ----------
         trade_decision : BaseTradeDecision
 
+        level : int
+            the level of current executor
+
         Returns
         ----------
         execute_result : List[object]
             the executed result for trade decision
         """
-        raise NotImplementedError("execute is not implemented!")
+        return_value = {}
+        for _decision in self.collect_data(trade_decision, return_value=return_value, level=level):
+            pass
+        return return_value.get("execute_result")
 
-    def collect_data(self, trade_decision):
+    @abstractclassmethod
+    def _collect_data(self, trade_decision: BaseTradeDecision, level: int = 0) -> Tuple[List[object], dict]:
+        """
+        Please refer to the doc of collect_data
+        The only difference between `_collect_data` and `collect_data` is that some common steps are moved into
+        collect_data
+
+        Parameters
+        ----------
+        Please refer to the doc of collect_data
+
+
+        Returns
+        -------
+        Tuple[List[object], dict]:
+            (<the executed result for trade decision>, <the extra kwargs for `self.trade_account.update_bar_end`>)
+        """
+
+    def collect_data(
+        self, trade_decision: BaseTradeDecision, return_value: dict = None, level: int = 0
+    ) -> List[object]:
         """Generator for collecting the trade decision data for rl training
 
+        his function will make a step forward
+
         Parameters
         ----------
         trade_decision : BaseTradeDecision
 
+        level : int
+            the level of current executor. 0 indicates the top level
+
+        return_value : dict
+            the mem address to return the value
+            e.g.  {"return_value": <the executed result>}
+
         Returns
         ----------
         execute_result : List[object]
-            the executed result for trade decision
+            the executed result for trade decision.
+            ** NOTE!!!! **:
+            1) This is necessary,  The return value of geenrator will be used in NestedExecutor
+            2) Please note the executed results are not merged.
 
         Yields
         -------
@@ -167,7 +225,36 @@ class BaseExecutor:
         """
         if self.track_data:
             yield trade_decision
-        return self.execute(trade_decision)
+
+        atomic = not issubclass(self.__class__, NestedExecutor)  #  issubclass(A, A) is True
+
+        if atomic and trade_decision.get_range_limit(default_value=None) is not None:
+            raise ValueError("atomic executor doesn't support specify `range_limit`")
+
+        obj = self._collect_data(trade_decision=trade_decision, level=level)
+
+        if isinstance(obj, GeneratorType):
+            res, kwargs = yield from obj
+        else:
+            # Some concrete executor don't have inner decisions
+            res, kwargs = obj
+
+        trade_start_time, trade_end_time = self.trade_calendar.get_cur_step_time()
+        # Account will not be changed in this function
+        self.trade_account.update_bar_end(
+            trade_start_time,
+            trade_end_time,
+            self.trade_exchange,
+            atomic=atomic,
+            outer_trade_decision=trade_decision,
+            indicator_config=self.indicator_config,
+            **kwargs,
+        )
+
+        self.trade_calendar.step()
+        if return_value is not None:
+            return_value.update({"execute_result": res})
+        return res
 
     def get_all_executors(self):
         """get all executors"""
@@ -192,7 +279,7 @@ class NestedExecutor(BaseExecutor):
         verbose: bool = False,
         track_data: bool = False,
         skip_empty_decision: bool = True,
-        trade_exchange: Exchange = None,
+        align_range_limit: bool = True,
         common_infra: CommonInfrastructure = None,
         **kwargs,
     ):
@@ -203,24 +290,24 @@ class NestedExecutor(BaseExecutor):
             trading env in each trading bar.
         inner_strategy : BaseStrategy
             trading strategy in each trading bar
-        trade_exchange : Exchange
-            exchange that provides market info, used to generate report
-            - If generate_report is None, trade_exchange will be ignored
-            - Else If `trade_exchange` is None, self.trade_exchange will be set with common_infra
         skip_empty_decision: bool
-            Will the executor skip the inner loop when the decision is empty.
+            Will the executor skip call inner loop when the decision is empty.
             It should be False in following cases
             - The decisions may be updated by steps
             - The inner executor may not follow the decisions from the outer strategy
+        align_range_limit: bool
+            force to align the index_range decision
+            It is only for nested executor, because range_limit is given by outer strategy
         """
-        self.inner_executor = init_instance_by_config(
+        self.inner_executor: BaseExecutor = init_instance_by_config(
             inner_executor, common_infra=common_infra, accept_types=BaseExecutor
         )
-        self.inner_strategy = init_instance_by_config(
+        self.inner_strategy: BaseStrategy = init_instance_by_config(
             inner_strategy, common_infra=common_infra, accept_types=BaseStrategy
         )
 
         self._skip_empty_decision = skip_empty_decision
+        self._align_range_limit = align_range_limit
 
         super(NestedExecutor, self).__init__(
             time_per_step=time_per_step,
@@ -234,82 +321,82 @@ class NestedExecutor(BaseExecutor):
             **kwargs,
         )
 
-        if trade_exchange is not None:
-            self.trade_exchange = trade_exchange
-
     def reset_common_infra(self, common_infra):
         """
         reset infrastructure for trading
-            - reset trade_exchange
             - reset inner_strategyand inner_executor common infra
         """
         super(NestedExecutor, self).reset_common_infra(common_infra)
 
-        if common_infra.has("trade_exchange"):
-            self.trade_exchange = common_infra.get("trade_exchange")
-
         self.inner_executor.reset_common_infra(common_infra)
         self.inner_strategy.reset_common_infra(common_infra)
 
     def _init_sub_trading(self, trade_decision):
-        trade_step = self.trade_calendar.get_trade_step()
-        trade_start_time, trade_end_time = self.trade_calendar.get_step_time(trade_step)
+        trade_start_time, trade_end_time = self.trade_calendar.get_cur_step_time()
         self.inner_executor.reset(start_time=trade_start_time, end_time=trade_end_time)
         sub_level_infra = self.inner_executor.get_level_infra()
+        self.level_infra.set_sub_level_infra(sub_level_infra)
         self.inner_strategy.reset(level_infra=sub_level_infra, outer_trade_decision=trade_decision)
 
-    def execute(self, trade_decision):
-        return_value = {}
-        for _decision in self.collect_data(trade_decision, return_value):
-            pass
-        return return_value.get("execute_result")
+    def _update_trade_decision(self, trade_decision: BaseTradeDecision) -> BaseTradeDecision:
+        # outter strategy have chance to update decision each iterator
+        updated_trade_decision = trade_decision.update(self.inner_executor.trade_calendar)
+        if updated_trade_decision is not None:
+            trade_decision = updated_trade_decision
+            # NEW UPDATE
+            # create a hook for inner strategy to update outter decision
+            self.inner_strategy.alter_outer_trade_decision(trade_decision)
+        return trade_decision
 
-    def collect_data(self, trade_decision: BaseTradeDecision, return_value=None):
-        if self.track_data:
-            yield trade_decision
+    # def _get_inner_trade_decision(self, outer_trade_decision: BaseTradeDecision, inner_execute_result):
+    #     # In some cases, the inner strategy can be skipped, but the inner executor should keep running
+    #     if outer_trade_decision.empty() and self._skip_empty_decision:
+    #         return EmptyTradeDecision(self.inner_strategy)
+    #     return self.inner_strategy.generate_trade_decision(inner_execute_result)
+    # _inner_trade_decision = self._get_inner_trade_decision(trade_decision, _inner_execute_result)
+
+    def _collect_data(self, trade_decision: BaseTradeDecision, level: int = 0):
         execute_result = []
         inner_order_indicators = []
+        decision_list = []
+        # NOTE:
+        # - this is necessary to calculating the steps in sub level
+        # - more detailed information will be set into trade decision
+        self._init_sub_trading(trade_decision)
 
-        if not (trade_decision.empty() and self._skip_empty_decision):
-            _inner_execute_result = None
-            self._init_sub_trading(trade_decision)
-            while not self.inner_executor.finished():
-                # outter strategy have chance to update decision each iterator
-                updated_trade_decision = trade_decision.update(self.inner_executor.trade_calendar)
-                if updated_trade_decision is not None:
-                    trade_decision = updated_trade_decision
-                    # NEW UPDATE
-                    # create a hook for inner strategy to update outter decision
-                    self.inner_strategy.alter_outer_trade_decision(trade_decision)
+        _inner_execute_result = None
+        while not self.inner_executor.finished():
+            trade_decision = self._update_trade_decision(trade_decision)
+
+            if trade_decision.empty() and self._skip_empty_decision:
+                # give one chance for outer stategy to update the strategy
+                # - For updating some information in the sub executor(the strategy have no knowledge of the inner
+                # executor when generating the decision)
+                break
+
+            sub_cal: TradeCalendarManager = self.inner_executor.trade_calendar
+            start_idx, end_idx = get_start_end_idx(sub_cal, trade_decision)
+            if not self._align_range_limit or start_idx <= sub_cal.get_trade_step() <= end_idx:
+                # if force align the range limit, skip the steps outside the decision range limit
 
                 _inner_trade_decision = self.inner_strategy.generate_trade_decision(_inner_execute_result)
+                # NOTE sub_cal.get_cur_step_time() must be called before collect_data in case of step shifting
+                decision_list.append((_inner_trade_decision, *sub_cal.get_cur_step_time()))
 
                 # NOTE: Trade Calendar will step forward in the follow line
                 _inner_execute_result = yield from self.inner_executor.collect_data(
-                    trade_decision=_inner_trade_decision
+                    trade_decision=_inner_trade_decision, level=level + 1
                 )
-
                 execute_result.extend(_inner_execute_result)
+
                 inner_order_indicators.append(
                     self.inner_executor.trade_account.get_trade_indicator().get_order_indicator()
                 )
+            else:
+                # do nothing and just step forward
+                sub_cal.step()
 
-        trade_step = self.trade_calendar.get_trade_step()
-        trade_start_time, trade_end_time = self.trade_calendar.get_step_time(trade_step)
-        self.trade_account.update_bar_end(
-            trade_start_time,
-            trade_end_time,
-            self.trade_exchange,
-            atomic=False,
-            outer_trade_decision=trade_decision,
-            inner_order_indicators=inner_order_indicators,
-            indicator_config=self.indicator_config,
-        )
-
-        self.trade_calendar.step()
-        if return_value is not None:
-            return_value.update({"execute_result": execute_result})
-        return execute_result
+        return execute_result, {"inner_order_indicators": inner_order_indicators, "decision_list": decision_list}
 
     def get_all_executors(self):
         """get all executors, including self and inner_executor.get_all_executors()"""
@@ -337,17 +424,13 @@ class SimulatorExecutor(BaseExecutor):
         generate_report: bool = False,
         verbose: bool = False,
         track_data: bool = False,
-        trade_exchange: Exchange = None,
         common_infra: CommonInfrastructure = None,
-        trade_type: str = TT_PARAL,
+        trade_type: str = TT_SERIAL,
         **kwargs,
     ):
         """
         Parameters
         ----------
-        trade_exchange : Exchange
-            exchange that provides market info, used to deal order and generate report
-            - If `trade_exchange` is None, self.trade_exchange will be set with common_infra
         trade_type: str
             please refer to the doc of `TT_SERIAL` & `TT_PARAL`
         """
@@ -362,20 +445,9 @@ class SimulatorExecutor(BaseExecutor):
             common_infra=common_infra,
             **kwargs,
         )
-        if trade_exchange is not None:
-            self.trade_exchange = trade_exchange
 
         self.trade_type = trade_type
 
-    def reset_common_infra(self, common_infra):
-        """
-        reset infrastructure for trading
-            - reset trade_exchange
-        """
-        super(SimulatorExecutor, self).reset_common_infra(common_infra)
-        if common_infra.has("trade_exchange"):
-            self.trade_exchange = common_infra.get("trade_exchange")
-
     def _get_order_iterator(self, trade_decision: BaseTradeDecision) -> List[Order]:
         """
 
@@ -405,10 +477,9 @@ class SimulatorExecutor(BaseExecutor):
             raise NotImplementedError(f"This type of input is not supported")
         return order_it
 
-    def execute(self, trade_decision: BaseTradeDecision):
+    def _collect_data(self, trade_decision: BaseTradeDecision, level: int = 0):
 
-        trade_step = self.trade_calendar.get_trade_step()
-        trade_start_time, trade_end_time = self.trade_calendar.get_step_time(trade_step)
+        trade_start_time, _ = self.trade_calendar.get_cur_step_time()
         execute_result = []
 
         for order in self._get_order_iterator(trade_decision):
@@ -450,16 +521,4 @@ class SimulatorExecutor(BaseExecutor):
                     print("[W {:%Y-%m-%d %H:%M:%S}]: {} wrong.".format(trade_start_time, order.stock_id))
                 # do nothing
                 pass
-
-        # Account will not be changed in this function
-        self.trade_account.update_bar_end(
-            trade_start_time,
-            trade_end_time,
-            self.trade_exchange,
-            atomic=True,
-            outer_trade_decision=trade_decision,
-            trade_info=execute_result,
-            indicator_config=self.indicator_config,
-        )
-        self.trade_calendar.step()
-        return execute_result
+        return execute_result, {"trade_info": execute_result}
diff --git a/qlib/backtest/order.py b/qlib/backtest/order.py
index 20c97aa90..1a88ded93 100644
--- a/qlib/backtest/order.py
+++ b/qlib/backtest/order.py
@@ -3,6 +3,7 @@
 # TODO: rename it with decision.py
 from __future__ import annotations
 from enum import IntEnum
+from qlib.log import get_module_logger
 
 # try to fix circular imports when enabling type hints
 from typing import TYPE_CHECKING
@@ -179,7 +180,7 @@ class BaseTradeDecision:
         2. Same as `case 1.3`
     """
 
-    def __init__(self, strategy: BaseStrategy):
+    def __init__(self, strategy: BaseStrategy, idx_range: Tuple[int, int] = None):
         """
         Parameters
         ----------
@@ -187,6 +188,8 @@ class BaseTradeDecision:
             The strategy who make the decision
         """
         self.strategy = strategy
+        self.total_step = None  # upper strategy has no knowledge about the sub executor before `_init_sub_trading`
+        self.idx_range = idx_range
 
     def get_decision(self) -> List[object]:
         """
@@ -207,7 +210,11 @@ class BaseTradeDecision:
 
     def update(self, trade_calendar: TradeCalendarManager) -> Union["BaseTradeDecision", None]:
         """
-        Be called at the **start** of each step
+        Be called at the **start** of each step.
+
+        This function is designn for following purpose
+        1) Leave a hook for the strategy who make `self` decision to update the decision itself
+        2) Update some information from the inner executor calendar
 
         Parameters
         ----------
@@ -221,13 +228,27 @@ class BaseTradeDecision:
         BaseTradeDecision:
             New update, use new decision
         """
+        # purpose 1)
+        self.total_step = trade_calendar.get_trade_len()
+        if self.idx_range is not None:
+            logger = get_module_logger("decision")
+            start_idx, end_idx = self.idx_range
+            if start_idx < 0 or end_idx >= self.total_step:
+                logger.warning(f"{self.idx_range} go beyound the total_step({self.total_step}), it will be clipped")
+                self.idx_range = max(0, start_idx), min(self.total_step - 1, end_idx)
+
+        # purpose 2)
         return self.strategy.update_trade_decision(self, trade_calendar)
 
-    def get_range_limit(self) -> Tuple[int, int]:
+    def get_range_limit(self, **kwargs) -> Tuple[int, int]:
         """
         return the expected step range for limiting the decision execution time
         Both left and right are **closed**
 
+        **kwargs:
+            {"default_value": <default_value>}
+            # using dict is for distinguish no value provided or None provided
+
         Returns
         -------
         Tuple[int, int]:
@@ -235,12 +256,32 @@ class BaseTradeDecision:
         Raises
         ------
         NotImplementedError:
-            If the decision can't provide a unified start and end
+            If the following criteria meet
+            1) the decision can't provide a unified start and end
+            2) default_value is None
         """
-        raise NotImplementedError(f"Please implement the `func` method")
+        if self.idx_range is None:
+            if "default_value" in kwargs:
+                return kwargs["default_value"]
+            else:
+                # Default to get full index
+                raise NotImplementedError(f"The decision didn't provide an index range")
+        return self.idx_range
 
     def empty(self) -> bool:
-        return len(self.get_decision()) == 0
+        for obj in self.get_decision():
+            if isinstance(obj, Order):
+                # Zero amount order will be treated as empty
+                if not np.isclose(obj.amount, 0.0):
+                    return False
+            else:
+                return True
+        return True
+
+
+class EmptyTradeDecision(BaseTradeDecision):
+    def empty(self) -> bool:
+        return True
 
 
 class TradeDecisionWO(BaseTradeDecision):
@@ -249,16 +290,9 @@ class TradeDecisionWO(BaseTradeDecision):
     Besides, the time_range is also included.
     """
 
-    def __init__(self, order_list: List[Order], strategy: BaseStrategy, idx_range: Tuple = None):
-        super().__init__(strategy)
+    def __init__(self, order_list: List[Order], strategy: BaseStrategy, idx_range: Tuple[int, int] = None):
+        super().__init__(strategy, idx_range=idx_range)
         self.order_list = order_list
-        self.idx_range = idx_range
-
-    def get_range_limit(self) -> Tuple[int, int]:
-        if self.idx_range is None:
-            # Default to get full index
-            raise NotImplementedError(f"The decision didn't provide an index range")
-        return self.idx_range
 
     def get_decision(self) -> List[object]:
         return self.order_list
diff --git a/qlib/backtest/report.py b/qlib/backtest/report.py
index 43a6a455b..138a44faa 100644
--- a/qlib/backtest/report.py
+++ b/qlib/backtest/report.py
@@ -4,21 +4,23 @@
 
 from collections import OrderedDict
 from logging import warning
-from qlib.backtest.exchange import Exchange
-from typing import Dict, List
-from qlib.backtest.order import BaseTradeDecision, Order, OrderDir
-import pandas as pd
-import numpy as np
 import pathlib
+from typing import Dict, List, Tuple
 import warnings
-from pandas.core import groupby
 
+import numpy as np
+import pandas as pd
+from pandas.core import groupby
 from pandas.core.frame import DataFrame
 
-from ..utils.time import Freq
-from ..utils.resam import resam_ts_data, get_higher_eq_freq_feature
+from qlib.backtest.exchange import Exchange
+from qlib.backtest.order import BaseTradeDecision, Order, OrderDir
+from qlib.backtest.utils import TradeCalendarManager
+
 from ..data import D
 from ..tests.config import CSI300_BENCH
+from ..utils.resam import get_higher_eq_freq_feature, resam_ts_data
+from ..utils.time import Freq
 
 
 class Report:
@@ -251,14 +253,21 @@ class Indicator:
     """
 
     def __init__(self):
+        # order indicator is metrics for a single order for a specific step
         self.order_indicator_his = OrderedDict()
-        self.order_indicator = OrderedDict()
-        self.trade_indicator_his = OrderedDict()
-        self.trade_indicator = OrderedDict()
+        self.order_indicator: Dict[str, pd.Series] = OrderedDict()
 
-    def clear(self):
+        # trade indicator is metrics for all orders for a specific step
+        self.trade_indicator_his = OrderedDict()
+        self.trade_indicator: Dict[str, float] = OrderedDict()
+
+        self._trade_calendar = None
+
+    # def reset(self, trade_calendar: TradeCalendarManager):
+    def reset(self):
         self.order_indicator = OrderedDict()
         self.trade_indicator = OrderedDict()
+        # self._trade_calendar = trade_calendar
 
     def record(self, trade_start_time):
         self.order_indicator_his[trade_start_time] = self.order_indicator
@@ -294,9 +303,14 @@ class Indicator:
     def _update_order_price_advantage(self):
         # NOTE:
         # trade_price and baseline price will be same on the lowest-level
-        # So Pa should be 0
+        # So Pa should be 0 or do nothing
         self.order_indicator["pa"] = 0
 
+    def update_order_indicators(self, trade_info: list):
+        self._update_order_trade_info(trade_info=trade_info)
+        self._update_order_fulfill_rate()
+        self._update_order_price_advantage()
+
     def _agg_order_trade_info(self, inner_order_indicators: List[Dict[str, pd.Series]]):
         inner_amount = pd.Series()
         deal_amount = pd.Series()
@@ -312,7 +326,7 @@ class Indicator:
             )
             trade_value = trade_value.add(_order_indicator["trade_value"], fill_value=0)
             trade_cost = trade_cost.add(_order_indicator["trade_cost"], fill_value=0)
-            trade_dir = trade_dir.add(_order_indicator["trade_dir"])
+            trade_dir = trade_dir.add(_order_indicator["trade_dir"], fill_value=0)
 
         trade_dir = trade_dir.apply(Order.parse_dir)
 
@@ -335,24 +349,77 @@ class Indicator:
     def _agg_order_fulfill_rate(self):
         self.order_indicator["ffr"] = self.order_indicator["deal_amount"] / self.order_indicator["amount"]
 
-    def _agg_order_price_advantage(
+    def _get_base_vol_pri(
         self,
-        inner_order_indicators: List[Dict[str, pd.Series]],
+        inst: str,
         trade_start_time: pd.Timestamp,
         trade_end_time: pd.Timestamp,
+        direction: OrderDir,
+        decision: BaseTradeDecision,
+        trade_exchange: Exchange,
+        pa_config: dict = {},
+    ):
+        """Get the base volume and price information"""
+
+        agg = pa_config.get("agg", "twap").lower()
+        price = pa_config.get("price", "deal_price").lower()
+
+        if price == "deal_price":
+            price_s = trade_exchange.get_deal_price(
+                inst, trade_start_time, trade_end_time, direction=direction, method=None
+            )
+        else:
+            raise NotImplementedError(f"This type of input is not supported")
+
+        # NOTE: there are some zeros in the trading price. These cases are known meaningless
+        # for aligning the previous logic, remove it.
+        # price_s = price_s.mask(np.isclose(price_s, 0))
+
+        if agg == "vwap":
+            volume_s = trade_exchange.get_volume(inst, trade_start_time, trade_end_time, method=None)
+        elif agg == "twap":
+            volume_s = pd.Series(1, index=price_s.index)
+        else:
+            raise NotImplementedError(f"This type of input is not supported")
+
+        # no sub executor on the lowest level
+        # So range_limit an total step will all be None
+        total_step = decision.total_step
+        if total_step is None:
+            total_step = 1
+        range_limit = decision.get_range_limit(default_value=(0, total_step - 1))
+
+        assert volume_s.shape[0] % total_step == 0, "The price series can't  be divided by step length"
+        factor = volume_s.shape[0] // total_step
+
+        slc = slice(range_limit[0] * factor, (range_limit[1] + 1) * factor)
+
+        volume_s = volume_s.iloc[slc]
+        price_s = price_s.iloc[slc]
+
+        base_volume = volume_s.sum().item()
+        base_price = ((price_s * volume_s).sum() / base_volume).item()
+
+        return base_price, base_volume
+
+    def _agg_base_price(
+        self,
+        inner_order_indicators: List[Dict[str, pd.Series]],
+        decision_list: List[Tuple[BaseTradeDecision, pd.Timestamp, pd.Timestamp]],
         trade_exchange: Exchange,
         pa_config: dict = {},
     ):
         """
+        # NOTE:!!!!
+        # Strong assumption!!!!!!
+        # the correctness of the base_price relies on that the **same** exchange is used
 
         Parameters
         ----------
         inner_order_indicators : List[Dict[str, pd.Series]]
             the indicators of account of inner executor
-        trade_start_time : pd.Timestamp
-            the start_time of the trade period, for slicing
-        trade_end_time : pd.Timestamp
-            the end_time of the trade period, for slicing (so it may include more time at the end)
+        decision_list: List[Tuple[BaseTradeDecision, pd.Timestamp, pd.Timestamp]],
+            a list of decisions according to inner_order_indicators
         trade_exchange : Exchange
             for retrieving trading price
         pa_config : dict
@@ -362,32 +429,61 @@ class Indicator:
                 "price": "$close",  # TODO: this is not supported now!!!!!
                                     # default to use deal price of the exchange
             }
+
         """
 
-        agg = pa_config.get("agg", "twap").lower()
-        price = pa_config.get("price", "deal_price").lower()
+        # TODO: I think there are potentials to be optimized
+        trade_dir = self.order_indicator["trade_dir"]
+        if len(trade_dir) > 0:
+            bp_all, bv_all = [], []
+            # <step, inst, (base_volume | base_price)>
+            for oi, (dec, start, end) in zip(inner_order_indicators, decision_list):
+                bp_s = oi.get("base_price", pd.Series()).reindex(trade_dir.index)
+                bv_s = oi.get("base_volume", pd.Series()).reindex(trade_dir.index)
+                bp_new, bv_new = {}, {}
+                for pr, v, (inst, direction) in zip(bp_s.values, bv_s.values, trade_dir.items()):
+                    if np.isnan(pr):
+                        bp_new[inst], bv_new[inst] = self._get_base_vol_pri(
+                            inst,
+                            start,
+                            end,
+                            decision=dec,
+                            direction=direction,
+                            trade_exchange=trade_exchange,
+                            pa_config=pa_config,
+                        )
+                    else:
+                        bp_new[inst], bv_new[inst] = pr, v
 
-        base_price = {}
-        for inst, dir in self.order_indicator["trade_dir"].items():
+                bp_new, bv_new = pd.Series(bp_new), pd.Series(bv_new)
+                bp_all.append(bp_new)
+                bv_all.append(bv_new)
+            bp_all = pd.concat(bp_all, axis=1)
+            bv_all = pd.concat(bv_all, axis=1)
 
-            if price == "deal_price":
-                price_s = trade_exchange.get_deal_price(inst, trade_start_time, trade_end_time, dir, method=None)
-            else:
-                raise NotImplementedError(f"This type of input is not supported")
+            self.order_indicator["base_volume"] = bv_all.sum(axis=1)
+            self.order_indicator["base_price"] = (bp_all * bv_all).sum(axis=1) / self.order_indicator["base_volume"]
 
-            # there are some zeros in the trading price. These cases are known meaningless
-            price_s = price_s.mask(np.isclose(price_s, 0))
+    def _agg_order_price_advantage(self):
+        if not self.order_indicator["trade_price"].empty:
+            self.order_indicator["pa"] = self.order_indicator["trade_price"] / self.order_indicator["base_price"] - 1
+        else:
+            self.order_indicator["pa"] = pd.Series()
 
-            if agg == "vwap":
-                volume_s = trade_exchange.get_volume(inst, trade_start_time, trade_end_time, method=None)
-                base_price[inst] = ((price_s * volume_s).sum() / volume_s.sum()).item()
-            elif agg == "twap":
-                base_price[inst] = price_s.mean().item()
-
-        base_price = pd.Series(base_price)
-
-        # update PA
-        self.order_indicator["pa"] = self.order_indicator["trade_price"] / base_price - 1
+    def agg_order_indicators(
+        self,
+        inner_order_indicators: List[Dict[str, pd.Series]],
+        decision_list: List[Tuple[BaseTradeDecision, pd.Timestamp, pd.Timestamp]],
+        outer_trade_decision: BaseTradeDecision,
+        trade_exchange: Exchange,
+        indicator_config={},
+    ):
+        self._agg_order_trade_info(inner_order_indicators)
+        self._update_trade_amount(outer_trade_decision)
+        self._agg_order_fulfill_rate()
+        pa_config = indicator_config.get("pa_config", {})
+        self._agg_base_price(inner_order_indicators, decision_list, trade_exchange, pa_config=pa_config)
+        self._agg_order_price_advantage()
 
     def _cal_trade_fulfill_rate(self, method="mean"):
         if method == "mean":
@@ -402,7 +498,7 @@ class Indicator:
             raise ValueError(f"method {method} is not supported!")
 
     def _cal_trade_price_advantage(self, method="mean"):
-        pa_order = self.order_indicator["pa"] * (2 * (self.order_indicator["amount"] < 0).astype(int) - 1)
+        pa_order = self.order_indicator["pa"] * (1 - self.order_indicator["trade_dir"] * 2)
         if method == "mean":
             return pa_order.mean()
         elif method == "amount_weighted":
@@ -427,28 +523,6 @@ class Indicator:
     def _cal_trade_order_count(self):
         return self.order_indicator["amount"].count()
 
-    def update_order_indicators(self, trade_info: list):
-        self._update_order_trade_info(trade_info=trade_info)
-        self._update_order_fulfill_rate()
-        self._update_order_price_advantage()
-
-    def agg_order_indicators(
-        self,
-        trade_start_time,
-        trade_end_time,
-        inner_order_indicators: List[Dict[str, pd.Series]],
-        outer_trade_decision: BaseTradeDecision,
-        trade_exchange: Exchange,
-        indicator_config={},
-    ):
-        self._agg_order_trade_info(inner_order_indicators)
-        self._update_trade_amount(outer_trade_decision)
-        self._agg_order_fulfill_rate()
-        pa_config = indicator_config.get("pa_config", {})
-        self._agg_order_price_advantage(
-            inner_order_indicators, trade_start_time, trade_end_time, trade_exchange, pa_config=pa_config
-        )
-
     def cal_trade_indicators(self, trade_start_time, freq, indicator_config={}):
         show_indicator = indicator_config.get("show_indicator", False)
         ffr_config = indicator_config.get("ffr_config", {})
diff --git a/qlib/backtest/utils.py b/qlib/backtest/utils.py
index 0ba607bdb..5c643df30 100644
--- a/qlib/backtest/utils.py
+++ b/qlib/backtest/utils.py
@@ -1,9 +1,14 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
+from __future__ import annotations
+from typing import Union, TYPE_CHECKING, Tuple, Union, List, Set
+
+if TYPE_CHECKING:
+    from qlib.backtest.order import BaseTradeDecision
+    from qlib.strategy.base import BaseStrategy
 
 import pandas as pd
 import warnings
-from typing import Tuple, Union, List, Set
 
 from ..utils.resam import get_resam_calendar
 from ..data.data import Cal
@@ -30,17 +35,20 @@ class TradeCalendarManager:
             closed end of the trade time range, by default None
             If `end_time` is None, it must be reset before trading.
         """
-        self.freq = freq
-        self.start_time = pd.Timestamp(start_time) if start_time else None
-        self.end_time = pd.Timestamp(end_time) if end_time else None
-        self._init_trade_calendar(freq=freq, start_time=start_time, end_time=end_time)
+        self.reset(freq=freq, start_time=start_time, end_time=end_time)
 
-    def _init_trade_calendar(self, freq, start_time, end_time):
+    def reset(self, freq, start_time, end_time):
         """
+        Please refer to the docs of `__init__`
+
         Reset the trade calendar
         - self.trade_len : The total count for trading step
         - self.trade_step : The number of trading step finished, self.trade_step can be [0, 1, 2, ..., self.trade_len - 1]
         """
+        self.freq = freq
+        self.start_time = pd.Timestamp(start_time) if start_time else None
+        self.end_time = pd.Timestamp(end_time) if end_time else None
+
         _calendar, freq, freq_sam = get_resam_calendar(freq=freq)
         self._calendar = _calendar
         _, _, _start_index, _end_index = Cal.locate_index(start_time, end_time, freq=freq, freq_sam=freq_sam)
@@ -67,6 +75,7 @@ class TradeCalendarManager:
         return self.freq
 
     def get_trade_len(self):
+        """get the total step length"""
         return self.trade_len
 
     def get_trade_step(self):
@@ -99,6 +108,12 @@ class TradeCalendarManager:
         calendar_index = self.start_index + trade_step
         return self._calendar[calendar_index], self._calendar[calendar_index + 1] - pd.Timedelta(seconds=1)
 
+    def get_cur_step_time(self):
+        """
+        get current step time
+        """
+        return self.get_step_time(self.get_trade_step())
+
     def get_all_time(self):
         """Get the start_time and end_time for trading"""
         return self.start_time, self.end_time
@@ -146,5 +161,40 @@ class CommonInfrastructure(BaseInfrastructure):
 
 
 class LevelInfrastructure(BaseInfrastructure):
+    """level instrastructure is created by executor, and then shared to strategies on the same level"""
+
     def get_support_infra(self):
-        return ["trade_calendar"]
+        return ["trade_calendar", "sub_level_infra"]
+
+    def reset_cal(self, freq, start_time, end_time):
+        """reset trade calendar manager"""
+        if self.has("trade_calendar"):
+            self.get("trade_calendar").reset(freq, start_time=start_time, end_time=end_time)
+        else:
+            self.reset_infra(trade_calendar=TradeCalendarManager(freq, start_time=start_time, end_time=end_time))
+
+    def set_sub_level_infra(self, sub_level_infra: LevelInfrastructure):
+        """this will make the calendar access easier when acrossing multi-levels"""
+        self.reset_infra(sub_level_infra=sub_level_infra)
+
+
+def get_start_end_idx(trade_calendar: TradeCalendarManager, outer_trade_decision: BaseTradeDecision) -> Union[int, int]:
+    """
+    A helper function for getting the decision-level index range limitation for inner strategy
+    - NOTE: this function is not applicable to order-level
+
+    Parameters
+    ----------
+    trade_calendar : TradeCalendarManager
+    outer_trade_decision : BaseTradeDecision
+        the trade decision made by outer strategy
+
+    Returns
+    -------
+    Union[int, int]:
+        start index and end index
+    """
+    try:
+        return outer_trade_decision.get_range_limit()
+    except NotImplementedError:
+        return 0, trade_calendar.get_trade_len() - 1
diff --git a/qlib/contrib/strategy/rule_strategy.py b/qlib/contrib/strategy/rule_strategy.py
index 3ca325bf6..026afc8bb 100644
--- a/qlib/contrib/strategy/rule_strategy.py
+++ b/qlib/contrib/strategy/rule_strategy.py
@@ -14,29 +14,7 @@ from ...backtest.order import BaseTradeDecision, Order, TradeDecisionWO
 from ...backtest.exchange import Exchange, OrderHelper
 from ...backtest.utils import CommonInfrastructure, LevelInfrastructure
 from qlib.utils.file import get_io_object
-
-
-def get_start_end_idx(strategy: BaseStrategy, outer_trade_decision: BaseTradeDecision) -> Union[int, int]:
-    """
-    A helper function for getting the decision-level index range limitation for inner strategy
-    - NOTE: this function is not applicable to order-level
-
-    Parameters
-    ----------
-    strategy : BaseStrategy
-        the inner strawtegy
-    outer_trade_decision : BaseTradeDecision
-        the trade decision made by outer strategy
-
-    Returns
-    -------
-    Union[int, int]:
-        start index and end index
-    """
-    try:
-        return outer_trade_decision.get_range_limit()
-    except NotImplementedError:
-        return 0, strategy.trade_calendar.get_trade_len() - 1
+from qlib.backtest.utils import get_start_end_idx
 
 
 class TWAPStrategy(BaseStrategy):
@@ -105,7 +83,7 @@ class TWAPStrategy(BaseStrategy):
         # get the number of trading step finished, trade_step can be [0, 1, 2, ..., trade_len - 1]
         trade_step = self.trade_calendar.get_trade_step()
         # get the total count of trading step
-        start_idx, end_idx = get_start_end_idx(self, self.outer_trade_decision)
+        start_idx, end_idx = get_start_end_idx(self.trade_calendar, self.outer_trade_decision)
         trade_len = end_idx - start_idx + 1
 
         if trade_step < start_idx or trade_step > end_idx:
diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py
index a787c098f..23d6b520a 100644
--- a/qlib/strategy/base.py
+++ b/qlib/strategy/base.py
@@ -1,5 +1,6 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
+from qlib.backtest.position import BasePosition
 from typing import List, Union
 
 from ..model.base import BaseModel
@@ -37,24 +38,26 @@ class BaseStrategy:
 
         self.reset(level_infra=level_infra, common_infra=common_infra, outer_trade_decision=outer_trade_decision)
 
+    @property
+    def trade_calendar(self) -> TradeCalendarManager:
+        return self.level_infra.get("trade_calendar")
+
+    @property
+    def trade_position(self) -> BasePosition:
+        return self.common_infra.get("trade_account").current
+
     def reset_level_infra(self, level_infra: LevelInfrastructure):
         if not hasattr(self, "level_infra"):
             self.level_infra = level_infra
         else:
             self.level_infra.update(level_infra)
 
-        if level_infra.has("trade_calendar"):
-            self.trade_calendar: TradeCalendarManager = level_infra.get("trade_calendar")
-
     def reset_common_infra(self, common_infra: CommonInfrastructure):
         if not hasattr(self, "common_infra"):
             self.common_infra: CommonInfrastructure = common_infra
         else:
             self.common_infra.update(common_infra)
 
-        if common_infra.has("trade_account"):
-            self.trade_position = common_infra.get("trade_account").current
-
     def reset(
         self,
         level_infra: LevelInfrastructure = None,

From 155019ba353bcd7d6758dd23914698f2c34395d8 Mon Sep 17 00:00:00 2001
From: Young <afe.young@gmail.com>
Date: Fri, 9 Jul 2021 10:33:41 +0000
Subject: [PATCH 24/28] move the pa sign from last step to first

---
 qlib/backtest/report.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/qlib/backtest/report.py b/qlib/backtest/report.py
index 138a44faa..8a49af490 100644
--- a/qlib/backtest/report.py
+++ b/qlib/backtest/report.py
@@ -466,7 +466,10 @@ class Indicator:
 
     def _agg_order_price_advantage(self):
         if not self.order_indicator["trade_price"].empty:
-            self.order_indicator["pa"] = self.order_indicator["trade_price"] / self.order_indicator["base_price"] - 1
+            sign = 1 - self.order_indicator["trade_dir"] * 2
+            self.order_indicator["pa"] = sign * (
+                self.order_indicator["trade_price"] / self.order_indicator["base_price"] - 1
+            )
         else:
             self.order_indicator["pa"] = pd.Series()
 
@@ -498,7 +501,11 @@ class Indicator:
             raise ValueError(f"method {method} is not supported!")
 
     def _cal_trade_price_advantage(self, method="mean"):
-        pa_order = self.order_indicator["pa"] * (1 - self.order_indicator["trade_dir"] * 2)
+        pa_order = self.order_indicator["pa"]
+        if isinstance(pa_order, (int, float)):
+            # pa from atomic executor
+            return pa_order
+
         if method == "mean":
             return pa_order.mean()
         elif method == "amount_weighted":
@@ -511,7 +518,10 @@ class Indicator:
             raise ValueError(f"method {method} is not supported!")
 
     def _cal_trade_positive_rate(self):
-        pa_order = self.order_indicator["pa"] * (2 * (self.order_indicator["amount"] < 0).astype(int) - 1)
+        pa_order = self.order_indicator["pa"]
+        if isinstance(pa_order, (int, float)):
+            # pa from atomic executor
+            return pa_order
         return (pa_order > 0).astype(int).sum() / pa_order.count()
 
     def _cal_trade_amount(self):

From c29e5b262191557a3a3d08ef68a8a80a3a28973b Mon Sep 17 00:00:00 2001
From: v-mingzhehan <v-mingzhehan@microsoft.com>
Date: Mon, 12 Jul 2021 13:50:13 +0000
Subject: [PATCH 25/28] Fix circular import

---
 qlib/strategy/__init__.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/qlib/strategy/__init__.py b/qlib/strategy/__init__.py
index e3fcd8e26..59e481eb9 100644
--- a/qlib/strategy/__init__.py
+++ b/qlib/strategy/__init__.py
@@ -1,4 +1,2 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
-
-from .base import *

From 9bf8c999e67520a45ad4bf1b0351ec311debb2ec Mon Sep 17 00:00:00 2001
From: v-mingzhehan <v-mingzhehan@microsoft.com>
Date: Tue, 20 Jul 2021 06:14:40 +0000
Subject: [PATCH 26/28] type checking update

---
 qlib/strategy/base.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py
index fa21fae5f..7a267b511 100644
--- a/qlib/strategy/base.py
+++ b/qlib/strategy/base.py
@@ -1,6 +1,9 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
-from qlib.backtest.exchange import Exchange
+from __future__ import annotations
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from qlib.backtest.exchange import Exchange
 from qlib.backtest.position import BasePosition
 from typing import List, Tuple, Union
 

From bdebe12cf29ad7b7cad3b261e6a603a579f2d458 Mon Sep 17 00:00:00 2001
From: Young <afe.young@gmail.com>
Date: Mon, 26 Jul 2021 06:14:57 +0000
Subject: [PATCH 27/28] support empty benchmark

Empty benchmark could accelerate the learning process
---
 qlib/backtest/__init__.py              |  3 ++-
 qlib/backtest/account.py               | 21 +++++++++--------
 qlib/backtest/exchange.py              | 20 ++++++++--------
 qlib/backtest/report.py                |  5 ++--
 qlib/contrib/strategy/rule_strategy.py | 32 ++++++++++++++------------
 qlib/strategy/base.py                  |  3 ++-
 6 files changed, 45 insertions(+), 39 deletions(-)

diff --git a/qlib/backtest/__init__.py b/qlib/backtest/__init__.py
index 19dbe87ce..dbfbd4a0e 100644
--- a/qlib/backtest/__init__.py
+++ b/qlib/backtest/__init__.py
@@ -8,9 +8,9 @@ from .account import Account
 
 if TYPE_CHECKING:
     from ..strategy.base import BaseStrategy
+    from .executor import BaseExecutor
 from .position import Position
 from .exchange import Exchange
-from .executor import BaseExecutor
 from .backtest import backtest_loop
 from .backtest import collect_data_loop
 from .order import Order
@@ -155,6 +155,7 @@ def get_strategy_executor(
     # - for avoiding recursive import
     # - typing annotations is not reliable
     from ..strategy.base import BaseStrategy
+    from .executor import BaseExecutor
 
     trade_account = create_account_instance(
         start_time=start_time, end_time=end_time, benchmark=benchmark, account=account, pos_type=pos_type
diff --git a/qlib/backtest/account.py b/qlib/backtest/account.py
index 806f88a96..9b9a25c23 100644
--- a/qlib/backtest/account.py
+++ b/qlib/backtest/account.py
@@ -75,17 +75,7 @@ class Account:
     ):
         self._pos_type = pos_type
         self._port_metr_enabled = port_metr_enabled
-        self.init_vars(init_cash, position_dict, freq, benchmark_config)
 
-    def is_port_metr_enabled(self):
-        """
-        Is portfolio-based metrics enabled.
-        """
-        return self._port_metr_enabled and not self.current.skip_update()
-
-    def init_vars(self, init_cash, position_dict, freq: str, benchmark_config: dict):
-
-        # init cash
         self.init_cash = init_cash
         self.current: BasePosition = init_instance_by_config(
             {
@@ -100,8 +90,19 @@ class Account:
         self.accum_info = AccumulatedInfo()
         self.report = None
         self.positions = {}
+
+        # in of reset ignore None values
+        self.benchmark_config = benchmark_config
+        self.freq = freq
+
         self.reset(freq=freq, benchmark_config=benchmark_config, init_report=True)
 
+    def is_port_metr_enabled(self):
+        """
+        Is portfolio-based metrics enabled.
+        """
+        return self._port_metr_enabled and not self.current.skip_update()
+
     def reset_report(self, freq, benchmark_config):
         # portfolio related metrics
         if self.is_port_metr_enabled():
diff --git a/qlib/backtest/exchange.py b/qlib/backtest/exchange.py
index a22754885..ea1d012eb 100644
--- a/qlib/backtest/exchange.py
+++ b/qlib/backtest/exchange.py
@@ -512,7 +512,7 @@ class Exchange:
     def _get_factor_or_raise_erorr(self, factor: float = None, stock_id: str = None, start_time=None, end_time=None):
         """Please refer to the docs of get_amount_of_trade_unit"""
         if factor is None:
-            if stock_id is not None and start_time is not None  and end_time is not None :
+            if stock_id is not None and start_time is not None and end_time is not None:
                 factor = self.get_factor(stock_id=stock_id, start_time=start_time, end_time=end_time)
             else:
                 raise ValueError(f"`factor` and (`stock_id`, `start_time`, `end_time`) can't both be None")
@@ -537,15 +537,16 @@ class Exchange:
             the end time of trading range
         """
         if not self.trade_w_adj_price and self.trade_unit is not None:
-            factor = self._get_factor_or_raise_erorr(factor=factor,
-                                                     stock_id=stock_id,
-                                                     start_time=start_time,
-                                                     end_time=end_time)
+            factor = self._get_factor_or_raise_erorr(
+                factor=factor, stock_id=stock_id, start_time=start_time, end_time=end_time
+            )
             return self.trade_unit / factor
         else:
             return None
 
-    def round_amount_by_trade_unit(self, deal_amount, factor: float = None, stock_id: str = None, start_time=None, end_time=None):
+    def round_amount_by_trade_unit(
+        self, deal_amount, factor: float = None, stock_id: str = None, start_time=None, end_time=None
+    ):
         """Parameter
         Please refer to the docs of get_amount_of_trade_unit
 
@@ -555,10 +556,9 @@ class Exchange:
         """
         if not self.trade_w_adj_price and self.trade_unit is not None:
             # the minimal amount is 1. Add 0.1 for solving precision problem.
-            factor = self._get_factor_or_raise_erorr(factor=factor,
-                                                     stock_id=stock_id,
-                                                     start_time=start_time,
-                                                     end_time=end_time)
+            factor = self._get_factor_or_raise_erorr(
+                factor=factor, stock_id=stock_id, start_time=start_time, end_time=end_time
+            )
             return (deal_amount * factor + 0.1) // self.trade_unit * self.trade_unit / factor
         return deal_amount
 
diff --git a/qlib/backtest/report.py b/qlib/backtest/report.py
index 6b64bf3b1..84cae2568 100644
--- a/qlib/backtest/report.py
+++ b/qlib/backtest/report.py
@@ -80,11 +80,12 @@ class Report:
     def init_bench(self, freq=None, benchmark_config=None):
         if freq is not None:
             self.freq = freq
-        if benchmark_config is not None:
-            self.benchmark_config = benchmark_config
+        self.benchmark_config = benchmark_config
         self.bench = self._cal_benchmark(self.benchmark_config, self.freq)
 
     def _cal_benchmark(self, benchmark_config, freq):
+        if benchmark_config is None:
+            return None
         benchmark = benchmark_config.get("benchmark", CSI300_BENCH)
         if benchmark is None:
             return None
diff --git a/qlib/contrib/strategy/rule_strategy.py b/qlib/contrib/strategy/rule_strategy.py
index 1ec054e45..b42c4f578 100644
--- a/qlib/contrib/strategy/rule_strategy.py
+++ b/qlib/contrib/strategy/rule_strategy.py
@@ -63,9 +63,9 @@ class TWAPStrategy(BaseStrategy):
                 stock_id=order.stock_id, start_time=trade_start_time, end_time=trade_end_time
             ):
                 continue
-            _amount_trade_unit = self.trade_exchange.get_amount_of_trade_unit(stock_id=order.stock_id,
-                                                                              start_time=order.start_time,
-                                                                              end_time=order.end_time)
+            _amount_trade_unit = self.trade_exchange.get_amount_of_trade_unit(
+                stock_id=order.stock_id, start_time=order.start_time, end_time=order.end_time
+            )
             _order_amount = None
             # considering trade unit
             if _amount_trade_unit is None:
@@ -169,9 +169,9 @@ class SBBStrategyBase(BaseStrategy):
                     self.trade_trend[order.stock_id] = _pred_trend
                 continue
             # get amount of one trade unit
-            _amount_trade_unit = self.trade_exchange.get_amount_of_trade_unit(stock_id=order.stock_id,
-                                                                              start_time=order.start_time,
-                                                                              end_time=order.end_time)
+            _amount_trade_unit = self.trade_exchange.get_amount_of_trade_unit(
+                stock_id=order.stock_id, start_time=order.start_time, end_time=order.end_time
+            )
             if _pred_trend == self.TREND_MID:
                 _order_amount = None
                 # considering trade unit
@@ -471,9 +471,9 @@ class ACStrategy(BaseStrategy):
 
             if sig_sam is None or np.isnan(sig_sam):
                 # no signal, TWAP
-                _amount_trade_unit = self.trade_exchange.get_amount_of_trade_unit(stock_id=order.stock_id,
-                                                                                  start_time=order.start_time,
-                                                                                  end_time=order.end_time)
+                _amount_trade_unit = self.trade_exchange.get_amount_of_trade_unit(
+                    stock_id=order.stock_id, start_time=order.start_time, end_time=order.end_time
+                )
                 if _amount_trade_unit is None:
                     # divide the order into equal parts, and trade one part
                     _order_amount = self.trade_amount[order.stock_id] / (trade_len - trade_step)
@@ -494,10 +494,9 @@ class ACStrategy(BaseStrategy):
                     np.sinh(kappa * (trade_len - trade_step)) - np.sinh(kappa * (trade_len - trade_step - 1))
                 ) / np.sinh(kappa * trade_len)
                 _order_amount = order.amount * amount_ratio
-                _order_amount = self.trade_exchange.round_amount_by_trade_unit(_order_amount,
-                                                                               stock_id=order.stock_id,
-                                                                               start_time=order.start_time,
-                                                                               end_time=order.end_time)
+                _order_amount = self.trade_exchange.round_amount_by_trade_unit(
+                    _order_amount, stock_id=order.stock_id, start_time=order.start_time, end_time=order.end_time
+                )
 
             if order.direction == order.SELL:
                 # sell all amount at last
@@ -584,8 +583,11 @@ class FileOrderStrategy(BaseStrategy):
     """
 
     def __init__(
-        self, file: Union[IO, str, Path, pd.DataFrame],
-        trade_range: Union[Tuple[int, int], TradeRange] = None, *args, **kwargs
+        self,
+        file: Union[IO, str, Path, pd.DataFrame],
+        trade_range: Union[Tuple[int, int], TradeRange] = None,
+        *args,
+        **kwargs,
     ):
         """
 
diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py
index 7a267b511..c47d2494f 100644
--- a/qlib/strategy/base.py
+++ b/qlib/strategy/base.py
@@ -2,9 +2,10 @@
 # Licensed under the MIT License.
 from __future__ import annotations
 from typing import TYPE_CHECKING
+
 if TYPE_CHECKING:
     from qlib.backtest.exchange import Exchange
-from qlib.backtest.position import BasePosition
+    from qlib.backtest.position import BasePosition
 from typing import List, Tuple, Union
 
 from ..model.base import BaseModel

From e817413769c648a7cd6e9a902f9de568b3c08a5c Mon Sep 17 00:00:00 2001
From: v-mingzhehan <v-mingzhehan@microsoft.com>
Date: Tue, 27 Jul 2021 14:52:29 +0000
Subject: [PATCH 28/28] Restore examples

---
 .../nested_decision_execution/assets/orders   | Bin 3464 -> 0 bytes
 .../requirements.txt                          |   2 -
 .../nested_decision_execution/rl_dummy.py     | 586 ------------------
 .../nested_decision_execution/workflow.py     |  11 +-
 4 files changed, 2 insertions(+), 597 deletions(-)
 delete mode 100644 examples/nested_decision_execution/assets/orders
 delete mode 100644 examples/nested_decision_execution/requirements.txt
 delete mode 100644 examples/nested_decision_execution/rl_dummy.py

diff --git a/examples/nested_decision_execution/assets/orders b/examples/nested_decision_execution/assets/orders
deleted file mode 100644
index 7902b901c000bfd82fb7fcc0386c588f3f78cbb4..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 3464
zcmai$eM}Q~7{^;->?j+k)cJzoL`)bn&F}ioCT}CMX-(Nc=HkTe`a-UQzU+G4Bus`Q
zI>BQSJ%TO}L`B0UZjDnjmStp_EHR5j&`k+(i2|Dw#>|9GmWBPU9rq*4-LLmYFYP_~
z-1m8&@9%k97u&MuNk#Z7=QFwFx2oKBjh%8-vaSHD@i9&p!*h=nhwn%DXZG@YU=$Hx
zeU3_-+sSi8=}SfcOhCtTag@gw^s+p+%p3Iht2GWE<I;G7Rjiw07^)fu1RPWq#qdWU
zF&qN#PPW?b5`I}!VdfhSe889RnU)w%xskV8ty|{c75=a&Sl(Vpv=+TQSY7`hpWhGT
zxCEz*s<Qp}R*QmD1z3t<ZLHhl;<+?G<#kd4jSVIg(6E8X4L*WF9)E!6;<wFzfv;~Q
z6pEE!zVV<{ZM6y-IKt(1Mo4rDd}utg+rRvy`G@gNH{)`!Zl9O8D7m-_%E9^=o=%_>
zVH$dr2E9}wiP)^8GkM_zf_6G4Qc*e%7IFswTD`%@(*z_fDI|2)$?me_$pzt9dbveG
zub@-uH2P_Jtwl+v!=Uwrv709+a%EvAg9!UZb-%Y|U<n@f>HDQ}%7Dpe7Gb78?v?ND
zOvJ;c2G2?=6Z~Qzvqjjb>Wh7UcMV5+Dwk$~(<%vrH3Pt?6Je)8NAJDdB)qbph&f3Z
zR7L`Tl>J^@(iBX}!o$vOJcNOvZZ?uRz2~V7cv$<eG)%At0H^_%dnZRc7oNbwI(B|7
zrwka4K&;uN8m1`p`4qkZwTB1ftf4gk1iMijden6?7q4uf_GFHXX@OCTy7}jwQ*EQ2
zN<3`Cu2quC1fKyNs##An&HUW47!UiZElILwLIZR_FV^gB@31@WY&ssclX+e?&1{oN
zglU?_Girub;b9pnpQJLuv_>N`ZODF0d10vz53|RWOTvWXWLR*}S6m-_MO`qBw?_Vz
zOvwfaHPdTR&GPqMiNE!58D3eX@~EUTAx)hQS%Yc(Zt9o#=kT!BHPTZ<u!c4pHQ>$G
zyve?R77v@K@XIPQk;rE=LqiGwG)HMbd#Oea21ql4)sN1mzA(KGudM4wP7)?GK&vN3
zpB-%P?w@Rk#lub{zbe^Hp=M?if;GDQ>ANoK@vzksJ+jK+zXnm+{qJwi7HzqS_u2N2
z?U8{0?M*?M&Wx<__>JOEE~Uiam2G|PUCA_e=-?4BqrI6j=WQ+x#p7X*KD#TKrl5?>
zMmylzdy8|`Em7uK_o75;Wx{bHkuvqQnz8(kpTaA9N7X5>%z(zWZ+e<IkQGH){?wE#
z%%Dfzyn13yUmo!-zMFMd(<0I(x(Q5ZYK#>mj5i%p<J0`>_1m(_;NTX0w)?lfma&mM
zJgkmgFAFm**a5@A(LP&wl!fx_;d;4l0>gssba_1WTw7=aU$fym6_POMGq?lL)OfK!
zcs@f#;?qp&|4=qfok=U!?D*>1*`lPCcvwS^^mYpGO%nNxFPkXaoCNT&+GB*IvhX|u
zNLk6u!L7u)O?cSd99D7(3pInIR!q~dZQOS8Oq80b_n(qe28RZM{b0Er^8OlS1gRdL
Jo`<<K{{rMQg4X~5

diff --git a/examples/nested_decision_execution/requirements.txt b/examples/nested_decision_execution/requirements.txt
deleted file mode 100644
index 2ad0a826f..000000000
--- a/examples/nested_decision_execution/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-tianshou>=0.4.1
-torch>=1.8.0
diff --git a/examples/nested_decision_execution/rl_dummy.py b/examples/nested_decision_execution/rl_dummy.py
deleted file mode 100644
index c42e28be4..000000000
--- a/examples/nested_decision_execution/rl_dummy.py
+++ /dev/null
@@ -1,586 +0,0 @@
-import pickle
-from collections import OrderedDict, defaultdict
-from dataclasses import dataclass, asdict
-from pprint import pprint
-from typing import Iterable, Any, Optional, OrderedDict, Tuple, Dict, List
-
-import fire
-import gym
-import numpy as np
-import pandas as pd
-import qlib
-from gym import spaces
-from qlib.backtest import get_exchange, Account, BaseExecutor, CommonInfrastructure, Order, TradeCalendarManager, backtest_func
-from qlib.backtest.executor import NestedExecutor, SimulatorExecutor
-from qlib.config import REG_CN
-from qlib.data import D
-from qlib.rl.interpreter import StateInterpreter, ActionInterpreter
-from qlib.strategy import BaseStrategy
-from qlib.tests.data import GetData
-from qlib.utils import init_instance_by_config, exists_qlib_data
-from torch.utils.data import Dataset, DataLoader
-from tianshou.data import Batch, Collector
-from tianshou.env import DummyVectorEnv, SubprocVectorEnv
-from tianshou.policy import BasePolicy
-
-from workflow import NestedDecisonExecutionWorkflow
-
-
-MAX_STEPS = 10
-
-
-def get_executor(start_time, end_time, executor, exchange, benchmark="SH000300", account=1e9) -> BaseExecutor:
-    trade_account = Account(
-        init_cash=account,
-        benchmark_config={
-            "benchmark": benchmark,
-            "start_time": start_time,
-            "end_time": end_time,
-        },
-    )
-
-    common_infra = CommonInfrastructure(trade_account=trade_account, trade_exchange=exchange)
-    trade_executor = init_instance_by_config(executor, accept_types=BaseExecutor, common_infra=common_infra)
-
-    return trade_executor
-
-
-def price_advantage(exec_price: float, baseline_price: float, direction: int) -> float:
-    if baseline_price == 0:
-        return 0.
-    if direction == 1:
-        return (1 - exec_price / baseline_price) * 10000
-    else:
-        return (exec_price / baseline_price - 1) * 10000
-
-
-@dataclass
-class EpisodicState:
-    """
-    A simplified data structure as the input of RL-related components to calculate observations and rewards.
-    Some of the metrics info are calculated on-the-fly in this class.
-    """
-    # requirements
-    stock_id: int
-    start_time: pd.Timestamp
-    end_time: pd.Timestamp
-    direction: int
-    target: float
-    num_step: int
-
-    # simplified market data used to calculate backtest metrics
-    # this may contains information from future so be careful
-    market_price: np.ndarray
-    market_vol: np.ndarray
-
-    # agent state
-    cur_time: Optional[pd.Timestamp] = None
-    cur_step: int = 0
-    cur_tick: int = 0  # tick is the most fine-grained time unit (typically minute)
-    done: bool = False
-    position: Optional[float] = None
-    exec_vol: Optional[np.ndarray] = None
-    last_step_duration: Optional[int] = None
-    position_history: Optional[np.ndarray] = None
-
-    # calculated statistics
-    turnover: Optional[float] = None
-    baseline_twap: Optional[float] = None
-    baseline_vwap: Optional[float] = None
-    exec_avg_price: Optional[float] = None
-    pa_twap: Optional[float] = None
-    pa_vwap: Optional[float] = None
-    fulfill_rate: Optional[float] = None
-
-    def __post_init__(self):
-        assert self.target >= 0
-        assert len(self.market_price) == len(self.market_vol)
-        self.cur_time = self.start_time
-        self.position = self.target
-        self.position_history = np.full((self.num_step + 1), np.nan)
-        self.position_history[0] = self.position
-        self.baseline_twap = np.mean(self.market_price)
-        if self.market_vol.sum() == 0:
-            self.baseline_vwap = np.mean(self.market_price)
-        else:
-            self.baseline_vwap = np.average(self.market_price, weights=self.market_vol)
-
-    def update_stats(self):
-        market_price = self.market_price[:len(self.exec_vol)]
-        self.turnover = (self.exec_vol * market_price).sum()
-        # exec_vol can be zero
-        if np.isclose(self.exec_vol.sum(), 0):
-            self.exec_avg_price = market_price[0]
-        else:
-            self.exec_avg_price = np.average(market_price, weights=self.exec_vol)
-        self.pa_twap = price_advantage(self.exec_avg_price, self.baseline_twap, self.direction)
-        self.pa_vwap = price_advantage(self.exec_avg_price, self.baseline_vwap, self.direction)
-        self.fulfill_rate = (self.target - self.position) / self.target
-        if abs(self.fulfill_rate - 1.0) < 1e-5:
-            self.fulfill_rate = 1.0
-        self.fulfill_rate *= 100
-
-    def logs(self):
-        logs = {
-            'stop_time': self.cur_time - self.start_time,
-            'stop_step': self.cur_step,
-            'turnover': self.turnover,
-            'baseline_twap': self.baseline_twap,
-            'baseline_vwap': self.baseline_vwap,
-            'exec_avg_price': self.exec_avg_price,
-            'pa_twap': self.pa_twap,
-            'pa_vwap': self.pa_vwap,
-            'ffr': self.fulfill_rate
-        }
-        return logs
-
-    @classmethod
-    def from_order_and_executor(cls, order: Order, calendar: TradeCalendarManager, frequency: str) -> "EpisodicState":
-        # Synchronous state for executor to EpisodicState
-        state = cls(
-            stock_id=order.stock_id,
-            start_time=order.start_time,
-            end_time=order.end_time,
-            direction=order.direction,
-            target=order.amount,
-            num_step=calendar.get_trade_len(),
-            market_price=_retrieve_backtest_data(order, '$close', frequency),
-            market_vol=_retrieve_backtest_data(order, '$volume', frequency),
-        )
-        state.cur_step = calendar.get_trade_step()
-        assert state.cur_step == 0
-        state.cur_time, _ = calendar.get_step_time(state.cur_step)
-        return state
-
-    def update(self, execute_result: List[Order], calendar: TradeCalendarManager,
-               done: Optional[bool] = None, length: Optional[int] = None) -> "StepState":
-        if length is not None:
-            exec_vol = np.zeros(length)
-            exec_vol[:len(execute_result)] = np.array([order.deal_amount for order, _, __, ___ in execute_result])
-        else:
-            exec_vol = np.array([order.deal_amount for order, _, __, ___ in execute_result])
-        # Synchronous exec_vol to executor and synchronous back to EpisodicState
-        cur_tick = self.cur_tick
-        ticks_this_step = len(exec_vol)
-        self.cur_step = trade_step = calendar.get_trade_step()
-        self.cur_tick += ticks_this_step
-        self.position -= np.sum(exec_vol)
-        self.position_history[trade_step] = self.position
-        if done is not None:
-            self.done = done
-        else:
-            self.done = self.position < 1e-5
-        self.exec_vol = exec_vol if self.exec_vol is None else \
-            np.concatenate((self.exec_vol, exec_vol))
-
-        if self.done:
-            self.update_stats()
-        else:
-            self.cur_time, _ = calendar.get_step_time(trade_step)
-
-        l, r = cur_tick, cur_tick + ticks_this_step
-        assert 0 <= l < r
-        return StepState(exec_vol, self.market_vol[l:r], self.market_price[l:r], self)
-
-
-@dataclass
-class StepState:
-    # market info and execution volume for current step
-    exec_vol: np.ndarray
-    market_vol: np.ndarray
-    market_price: np.ndarray
-
-    # episode info
-    episode_state: EpisodicState
-
-    # calculated statistics
-    turnover: Optional[float] = None
-    exec_avg_price: Optional[float] = None
-    pa_twap: Optional[float] = None
-    pa_vwap: Optional[float] = None
-
-    def __post_init__(self):
-        assert len(self.exec_vol) == len(self.market_price) == len(self.market_vol)
-        self.turnover = (self.exec_vol * self.market_price).sum()
-        if np.isclose(self.market_vol.sum(), 0):
-            self.exec_avg_price = self.market_price[0]
-        else:
-            self.exec_avg_price = np.average(self.market_price, weights=self.market_vol)
-        self.pa_twap = price_advantage(self.exec_avg_price, self.episode_state.baseline_twap,
-                                       self.episode_state.direction)
-        self.pa_vwap = price_advantage(self.exec_avg_price, self.episode_state.baseline_vwap,
-                                       self.episode_state.direction)
-
-
-def _retrieve_backtest_data(order: Order, field: str, frequency: str) -> np.ndarray:
-    # Retrieve backtest data for RL-specific use (including reward calculation)
-    return D.features(
-        [order.stock_id],
-        ['$open', '$close', '$high', '$low', '$volume'],
-        start_time=order.start_time,
-        end_time=order.end_time,
-        freq=frequency
-    )[field].to_numpy()
-
-
-def create_sub_order(exec_vol: float, calendar: TradeCalendarManager, original_order: Order) -> Order:
-    # Convert a real number to an order
-    trade_step = calendar.get_trade_step()
-    trade_start_time, trade_end_time = calendar.get_step_time(trade_step)
-    order_kwargs = asdict(original_order)
-    order_kwargs.update(start_time=trade_start_time, end_time=trade_end_time, amount=exec_vol)
-    trade_decision = Order(**order_kwargs)
-    return trade_decision
-
-
-class SingleOrderEnv(gym.Env):
-    def __init__(self,
-                 observation: StateInterpreter,
-                 action: ActionInterpreter,
-                 reward: Any,
-                 dataloader: Iterable,
-                 executor: BaseExecutor):
-        self.action = action
-        self.observation = observation
-        self.reward = reward
-        self.dataloader = dataloader
-        self.executor = executor
-
-        self.inner_frequency = self.executor.get_all_executor()[-1].time_per_step
-
-    @property
-    def action_space(self):
-        return self.action.action_space
-
-    @property
-    def observation_space(self):
-        return self.observation.observation_space
-
-    def reset(self):
-        try:
-            self.cur_order = next(self.dataloader)
-        except StopIteration:
-            self.dataloader = None
-            return None
-
-        self.execute_result = []
-        self.executor.reset(start_time=self.cur_order.start_time, end_time=self.cur_order.end_time)
-        self.ep_state = EpisodicState.from_order_and_executor(
-            self.cur_order, self.executor.trade_calendar, self.inner_frequency
-        )
-
-        self.action_history = np.full(self.ep_state.num_step, np.nan)
-        return self.observation(self.ep_state)
-
-    def step(self, action):
-        assert self.dataloader is not None
-        assert not self.executor.finished()
-        self.action_history[self.ep_state.cur_step] = action
-
-        exec_vol = self.action(action, self.ep_state)
-        trade_decision = create_sub_order(exec_vol, self.executor.trade_calendar, self.cur_order)
-        execute_result = self.executor.execute([trade_decision])
-        step_state = self.ep_state.update(execute_result, self.executor.trade_calendar)
-        if self.executor.finished():
-            assert self.ep_state.done
-
-        reward, rew_info = self.reward(self.ep_state, step_state)
-
-        info = {
-            'action_history': self.action_history,
-            'category': self.ep_state.direction,
-            'reward': rew_info
-        }
-        if self.ep_state.done:
-            info['logs'] = self.ep_state.logs()
-            info['index'] = {
-                'ins': self.ep_state.stock_id,
-                'date': self.ep_state.start_time,
-            }
-            # TODO: collect logs
-            pprint(info)
-
-        return self.observation(self.ep_state), reward, self.ep_state.done, info
-
-
-class RLStrategy(BaseStrategy):
-    """When inference and do the backtest from end to end, use this strategy."""
-
-    def __init__(
-        self,
-        observation: "Observation",
-        action: "Action",
-        policy: BasePolicy,
-        **kwargs
-    ):
-        super().__init__(**kwargs)
-        self.observation = observation
-        self.action = action
-        self.policy = policy
-
-        # TODO: how to get inner frequency and trade len
-        # This should be no longer required when PA is provided by qlib.
-        self.inner_frequency = "day"
-        self.inner_trade_len = 1
-
-    def reset(self, outer_trade_decision: List[Order] = None, **kwargs):
-        super().reset(outer_trade_decision=outer_trade_decision, **kwargs)
-        if outer_trade_decision is not None:
-            self.states = OrderedDict()  # explicitly make it ordered
-            for order in outer_trade_decision:
-                state = EpisodicState.from_order_and_executor(order, self.trade_calendar, "day")
-                self.states[order.stock_id, order.direction] = state
-
-    def generate_trade_decision(self, execute_result=None):
-        # apply results from the last step
-        if execute_result is not None:
-            orders = defaultdict(list)
-            for e in execute_result:
-                orders[e[0].stock_id, e[0].direction].append(e)
-            for (stock_id, direction), state in self.states.items():
-                state.update(orders[stock_id, direction], self.trade_calendar, length=self.inner_trade_len)
-
-        if not self.states:
-            return []
-
-        obs_batch = Batch([{"obs": self.observation(state)} for state in self.states.values()])
-        act = self.policy(obs_batch)
-        exec_vols = [self.action(a, s) for a, s in zip(act.act, self.states.values())]
-        return [create_sub_order(v, self.trade_calendar, o) for v, o in zip(exec_vols, self.outer_trade_decision)]
-
-
-class RlWorkflow(NestedDecisonExecutionWorkflow):
-
-    def tianshou(self):
-        self._init_qlib()
-
-        # TODO: why is there a benchmark?
-        trade_start_time = "2017-01-01"
-        trade_end_time = "2020-08-01"
-        benchmark = "SH000300"
-        time_per_step = "day"
-        executor_config = {
-            "class": "SimulatorExecutor",
-            "module_path": "qlib.backtest.executor",
-            "kwargs": {
-                "time_per_step": time_per_step,
-                "verbose": True,
-                "generate_report": False,
-            }
-        }
-        exchange = get_exchange(
-            freq="day",
-            limit_threshold=0.095,
-            deal_price="close",
-            open_cost=0.0005,
-            close_cost=0.0015,
-            min_cost=5
-        )
-
-        observation = Observation(time_per_step)
-        action = Action()
-        reward_fn = Reward()
-
-        def dummy_env():
-            executor = get_executor(
-                trade_start_time,
-                trade_end_time,
-                executor_config,
-                exchange,
-                benchmark,
-                1000000000,
-            )
-            return SingleOrderEnv(
-                observation, action, reward_fn,
-                iter(DataLoader(QlibOrderDataset('assets/orders'), batch_size=None, shuffle=True)), executor)
-
-        policy = DummyPolicy()
-
-        # This can not be replaced with SubprocVectorEnv
-        # File "/xxx/qlib/qlib/data/data.py", line 462, in dataset_processor
-        # p = Pool(processes=workers)
-        # AssertionError: daemonic processes are not allowed to have children
-        envs = DummyVectorEnv([dummy_env for _ in range(4)])
-        test_collector = Collector(policy, envs)
-        policy.eval()
-        # TODO: create a queue for all orders and make it auto-complete when all the orders are processed
-        test_collector.collect(n_episode=10)
-
-    def rl_day(self, load_model: Optional[str] = None):
-        self._init_qlib()
-        model = init_instance_by_config(self.task["model"])
-        dataset = init_instance_by_config(self.task["dataset"])
-        if load_model is None:
-            self._train_model(model, dataset)
-        else:
-            model = self._load_model(load_model)
-        trade_start_time = "2017-01-01"
-        trade_end_time = "2020-08-01"
-        trade_account = Account(
-            init_cash=int(1e9),
-            benchmark_config={
-                "benchmark": "SH000300",
-                "start_time": trade_start_time,
-                "end_time": trade_end_time,
-            },
-        )
-        exchange = get_exchange(
-            freq="day",
-            limit_threshold=0.095,
-            deal_price="close",
-            open_cost=0.0005,
-            close_cost=0.0015,
-            min_cost=5
-        )
-        common_infra = CommonInfrastructure(trade_account=trade_account, trade_exchange=exchange)
-        executor = NestedExecutor(
-            time_per_step="week",
-            inner_executor=SimulatorExecutor(time_per_step="day", verbose=True),
-            inner_strategy=RLStrategy(Observation("day"), Action(), DummyPolicy()),
-            common_infra=common_infra
-        )
-        strategy = init_instance_by_config({
-            "class": "TopkDropoutStrategy",
-            "module_path": "qlib.contrib.strategy.model_strategy",
-            "kwargs": {
-                "model": model,
-                "dataset": dataset,
-                "topk": 50,
-                "n_drop": 5,
-            },
-        }, common_infra=common_infra)
-        report_dict = backtest_func(trade_start_time, trade_end_time, strategy, executor)
-        print(report_dict)
-
-
-### This is a full RL strategy ###
-
-
-class QlibOrderDataset(Dataset):
-    def __init__(self, order_file):
-        with open(order_file, 'rb') as f:
-            self.orders = pickle.load(f)
-
-    def __len__(self):
-        return len(self.orders)
-
-    def __getitem__(self, index) -> Order:
-        return self.orders[index]
-
-
-class DummyPolicy(BasePolicy):
-    def forward(self, batch, state=None, **kwargs):
-        return Batch(act=np.random.randint(0, 5, size=(len(batch), )))
-
-    def learn(self, *args, **kwargs):
-        pass
-
-
-class Observation:
-    def __init__(self, time_per_step):
-        self.time_per_step = time_per_step
-
-    def __call__(self, ep_state: EpisodicState) -> Any:
-        obs = self.observe(ep_state)
-        if not self.validate(obs):
-            raise ValueError(f'Observation space does not contain obs. Space: {self.observation_space} Sample: {obs}')
-        return obs
-
-    def validate(self, obs: Any) -> bool:
-        return self.observation_space.contains(obs)
-
-    @property
-    def observation_space(self):
-        space = {
-            'direction': spaces.Discrete(2),
-            'cur_step': spaces.Box(0, MAX_STEPS, shape=(), dtype=np.int32),
-            'num_step': spaces.Box(0, MAX_STEPS, shape=(), dtype=np.int32),
-            'target': spaces.Box(-1e-5, np.inf, shape=()),
-            'position': spaces.Box(-1e-5, np.inf, shape=()),
-            'features': spaces.Box(-np.inf, np.inf, shape=(5, ))
-        }
-        return spaces.Dict(space)
-
-    def observe(self, ep_state: EpisodicState) -> Any:
-        features = D.features(
-            [ep_state.stock_id],
-            ['$open', '$close', '$high', '$low', '$volume'],
-            start_time=ep_state.start_time,
-            end_time=ep_state.end_time,
-            freq=self.time_per_step
-        ).loc[(ep_state.stock_id, ep_state.cur_time)].to_numpy()
-        features = np.nan_to_num(features)
-        return {
-            'direction': _to_int32(ep_state.direction),
-            'cur_step': _to_int32(min(ep_state.cur_step, ep_state.num_step - 1)),
-            'num_step': _to_int32(ep_state.num_step),
-            'target': _to_float32(ep_state.target),
-            'position': _to_float32(ep_state.position),
-            'features': features,
-        }
-
-
-class Action:
-    denominator = 4
-
-    @property
-    def action_space(self):
-        return spaces.Discrete(self.denominator + 1)
-
-    def __call__(self, action: Any, ep_state: EpisodicState) -> Any:
-        if not self.validate(action):
-            raise ValueError(f'Action space does not contain action. Space: {self.action_space} Sample: {action}')
-        act_ = self.to_volume(action, ep_state)
-        return act_
-
-    def validate(self, action: Any) -> bool:
-        return self.action_space.contains(action)
-
-    def to_volume(self, action: Any, ep_state: EpisodicState) -> Any:
-        exec_vol = ep_state.position / self.denominator * action
-        if ep_state.cur_step + 1 >= ep_state.num_step:
-            exec_vol = ep_state.position
-        # TODO: might need to check whether the stock is tradable or whether it satisfies trade unit?
-        return exec_vol
-
-
-class Reward:
-    weight = 1.0
-
-    def __call__(self, ep_state: EpisodicState, st_state: StepState) -> Tuple[float, Dict[str, float]]:
-        rew, info = 0., {}
-        if ep_state.done:
-            ep_rew, ep_info = self._to_tuple(self.episode_end(ep_state))
-            rew += ep_rew
-            info.update({f'ep/{k}': v for k, v in ep_info.items()})
-        st_rew, st_info = self._to_tuple(self.step_end(ep_state, st_state))
-        rew += st_rew
-        info.update({f'st/{k}': v for k, v in st_info.items()})
-        return rew * self.weight, info
-
-    @staticmethod
-    def _to_tuple(x):
-        if isinstance(x, tuple):
-            return x
-        return x, {}
-
-    def episode_end(self, ep_state: EpisodicState) -> Tuple[float, Dict[str, float]]:
-        return 0.
-
-    def step_end(self, ep_state: EpisodicState, st_state: StepState) -> Tuple[float, Dict[str, float]]:
-        assert ep_state.target > 0
-        baseline_price = st_state.pa_twap
-        pa = baseline_price * st_state.exec_vol.sum() / ep_state.target
-        penalty = -100 * ((st_state.exec_vol / ep_state.target) ** 2).sum()  # penalize too much volume at one step
-        reward = pa + penalty
-        return reward, {'pa': pa, 'penalty': penalty}
-
-
-def _to_int32(val): return np.array(int(val), dtype=np.int32)
-def _to_float32(val): return np.array(val, dtype=np.float32)
-
-### End of RL strategy ###
-
-
-if __name__ == '__main__':
-    fire.Fire(RlWorkflow)
diff --git a/examples/nested_decision_execution/workflow.py b/examples/nested_decision_execution/workflow.py
index a90e7281c..b6c1362fd 100644
--- a/examples/nested_decision_execution/workflow.py
+++ b/examples/nested_decision_execution/workflow.py
@@ -1,7 +1,6 @@
 #  Copyright (c) Microsoft Corporation.
 #  Licensed under the MIT License.
 
-from typing import Optional
 
 import qlib
 import fire
@@ -171,17 +170,11 @@ class NestedDecisionExecutionWorkflow:
             sr = SignalRecord(model, dataset, recorder)
             sr.generate()
 
-    def _load_model(self, load):
-        return R.get_recorder(load, experiment_name="train").load_object("params.pkl")
-
-    def backtest(self, load_model: Optional[str] = None):
+    def backtest(self):
         self._init_qlib()
         model = init_instance_by_config(self.task["model"])
         dataset = init_instance_by_config(self.task["dataset"])
-        if load_model is None:
-            self._train_model(model, dataset)
-        else:
-            model = self._load_model(load_model)
+        self._train_model(model, dataset)
         strategy_config = {
             "class": "TopkDropoutStrategy",
             "module_path": "qlib.contrib.strategy.model_strategy",