Qlib simulator refinement (redo of PR 1244) (#1262)

* Use dict-like configuration * Rename from_neutrader to integration * SAOE strategy * Optimize file structure * Optimize code * Format code * create_state_maintainer_recursive * Remove explicit time_per_step * CI test passed * Resolve PR comments * Pass all CI * Minor test issue * Refine SAOE adapter logic * Minor bugfix * Cherry pick updates * Resolve PR comments * CI issues * Refine adapter & saoe_data logic * Resolve PR comments * Resolve PR comments * Rename ONE_SEC to EPS_T; complete backtest loop * CI issue * Resolve Yuge's PR comments
2026-07-22 03:37:34 +08:00 · 2022-08-24 14:09:45 +08:00
parent e78fe48a26
commit 1d65d28b28
26 changed files with 1009 additions and 772 deletions
--- a/tests/rl/test_qlib_simulator.py
+++ b/tests/rl/test_qlib_simulator.py
@@ -1,17 +1,16 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
+
 import sys
 from pathlib import Path
+from typing import Tuple

 import pandas as pd
 import pytest
-
-from qlib.backtest.decision import Order, OrderDir
-from qlib.backtest.executor import NestedExecutor, SimulatorExecutor
-from qlib.backtest.utils import CommonInfrastructure
-from qlib.contrib.strategy import TWAPStrategy
+from qlib.backtest.decision import Order, OrderDir, TradeRangeByTime
+from qlib.backtest.executor import SimulatorExecutor
 from qlib.rl.order_execution import CategoricalActionInterpreter
-from qlib.rl.order_execution.simulator_qlib import ExchangeConfig, SingleAssetOrderExecutionQlib
+from qlib.rl.order_execution.simulator_qlib import SingleAssetOrderExecution

 TOTAL_POSITION = 2100.0

@@ -32,23 +31,71 @@ def get_order() -> Order:
    )


-def get_simulator(order: Order) -> SingleAssetOrderExecutionQlib:
-    def _inner_executor_fn(time_per_step: str, common_infra: CommonInfrastructure) -> NestedExecutor:
-        return NestedExecutor(
-            time_per_step=time_per_step,
-            inner_strategy=TWAPStrategy(),
-            inner_executor=SimulatorExecutor(
-                time_per_step="1min",
-                verbose=False,
-                trade_type=SimulatorExecutor.TT_SERIAL,
-                generate_report=False,
-                common_infra=common_infra,
-                track_data=True,
-            ),
-            common_infra=common_infra,
-            track_data=True,
-        )
+def get_configs(order: Order) -> Tuple[dict, dict, dict]:
+    strategy_config = {
+        "class": "SingleOrderStrategy",
+        "module_path": "qlib.rl.strategy.single_order",
+        "kwargs": {
+            "order": order,
+            "trade_range": TradeRangeByTime(order.start_time.time(), order.end_time.time()),
+        },
+    }

+    executor_config = {
+        "class": "NestedExecutor",
+        "module_path": "qlib.backtest.executor",
+        "kwargs": {
+            "time_per_step": "1day",
+            "inner_strategy": {"class": "ProxySAOEStrategy", "module_path": "qlib.rl.order_execution.strategy"},
+            "track_data": True,
+            "inner_executor": {
+                "class": "NestedExecutor",
+                "module_path": "qlib.backtest.executor",
+                "kwargs": {
+                    "time_per_step": "30min",
+                    "inner_strategy": {
+                        "class": "TWAPStrategy",
+                        "module_path": "qlib.contrib.strategy.rule_strategy",
+                    },
+                    "inner_executor": {
+                        "class": "SimulatorExecutor",
+                        "module_path": "qlib.backtest.executor",
+                        "kwargs": {
+                            "time_per_step": "1min",
+                            "verbose": False,
+                            "trade_type": SimulatorExecutor.TT_SERIAL,
+                            "generate_report": False,
+                            "track_data": True,
+                        },
+                    },
+                    "track_data": True,
+                },
+            },
+            "start_time": pd.Timestamp(order.start_time.date()),
+            "end_time": pd.Timestamp(order.start_time.date()),
+        },
+    }
+
+    exchange_config = {
+        "freq": "1min",
+        "codes": [order.stock_id],
+        "limit_threshold": ("$ask == 0", "$bid == 0"),
+        "deal_price": ("If($ask == 0, $bid, $ask)", "If($bid == 0, $ask, $bid)"),
+        "volume_threshold": {
+            "all": ("cum", "0.2 * DayCumsum($volume, '9:30', '14:29')"),
+            "buy": ("current", "$askV1"),
+            "sell": ("current", "$bidV1"),
+        },
+        "open_cost": 0.0005,
+        "close_cost": 0.0015,
+        "min_cost": 5.0,
+        "trade_unit": None,
+    }
+
+    return strategy_config, executor_config, exchange_config
+
+
+def get_simulator(order: Order) -> SingleAssetOrderExecution:
    DATA_ROOT_DIR = Path(__file__).parent.parent / ".data" / "rl" / "qlib_simulator"

    # fmt: off
@@ -67,27 +114,13 @@ def get_simulator(order: Order) -> SingleAssetOrderExecutionQlib:
    }
    # fmt: on

-    exchange_config = ExchangeConfig(
-        limit_threshold=("$ask == 0", "$bid == 0"),
-        deal_price=("If($ask == 0, $bid, $ask)", "If($bid == 0, $ask, $bid)"),
-        volume_threshold={
-            "all": ("cum", "0.2 * DayCumsum($volume, '9:30', '14:29')"),
-            "buy": ("current", "$askV1"),
-            "sell": ("current", "$bidV1"),
-        },
-        open_cost=0.0005,
-        close_cost=0.0015,
-        min_cost=5.0,
-        trade_unit=None,
-        cash_limit=None,
-        generate_report=False,
-    )
+    strategy_config, executor_config, exchange_config = get_configs(order)

-    return SingleAssetOrderExecutionQlib(
+    return SingleAssetOrderExecution(
        order=order,
-        time_per_step="30min",
        qlib_config=qlib_config,
-        inner_executor_fn=_inner_executor_fn,
+        strategy_config=strategy_config,
+        executor_config=executor_config,
        exchange_config=exchange_config,
    )

@@ -115,12 +148,12 @@ def test_simulator_first_step():
    assert is_close(state.history_exec["trade_price"].iloc[0], 149.566483)
    assert is_close(state.history_exec["trade_value"].iloc[0], 1495.664825)
    assert is_close(state.history_exec["position"].iloc[0], TOTAL_POSITION - AMOUNT / 30)
-    # assert state.history_exec["ffr"].iloc[0] == 1 / 60  # FIXME
+    assert is_close(state.history_exec["ffr"].iloc[0], AMOUNT / TOTAL_POSITION / 30)

    assert is_close(state.history_steps["market_volume"].iloc[0], 1254848.5756835938)
    assert state.history_steps["amount"].iloc[0] == AMOUNT
    assert state.history_steps["deal_amount"].iloc[0] == AMOUNT
-    assert state.history_steps["ffr"].iloc[0] == 1.0
+    assert state.history_steps["ffr"].iloc[0] == AMOUNT / TOTAL_POSITION
    assert is_close(
        state.history_steps["pa"].iloc[0] * (1.0 if order.direction == OrderDir.SELL else -1.0),
        (state.history_steps["trade_price"].iloc[0] / simulator.twap_price - 1) * 10000,
@@ -169,9 +202,3 @@ def test_interpreter() -> None:
        position_history.append(state.position)

        assert position_history[-1] == max(TOTAL_POSITION - TOTAL_POSITION / NUM_EXECUTION * (i + 1), 0.0)
-
-
-if __name__ == "__main__":
-    test_simulator_first_step()
-    test_simulator_stop_twap()
-    test_interpreter()