diff --git a/qlib/rl/order_execution/from_neutrader/strategy.py b/qlib/rl/order_execution/from_neutrader/strategy.py deleted file mode 100644 index ac6b111f9..000000000 --- a/qlib/rl/order_execution/from_neutrader/strategy.py +++ /dev/null @@ -1,65 +0,0 @@ -from typing import List, Optional, Tuple - -from qlib.backtest.decision import BaseTradeDecision, Order, OrderHelper, TradeDecisionWO, TradeRange -from qlib.backtest.utils import CommonInfrastructure -from qlib.strategy.base import BaseStrategy - - -class DecomposedStrategy(BaseStrategy): - def __init__(self) -> None: - super(DecomposedStrategy, self).__init__() - - self.execute_order: Optional[Order] = None - self.execute_result: List[Tuple[Order, float, float, float]] = [] - - def generate_trade_decision(self, execute_result: list = None) -> BaseTradeDecision: - exec_vol = yield self - - oh = self.trade_exchange.get_order_helper() - order = oh.create(self._order.stock_id, exec_vol, self._order.direction) - - self.execute_order = order - - return TradeDecisionWO([order], self) - - def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) -> BaseTradeDecision: - return outer_trade_decision - - def receive_execute_result(self, execute_result: list) -> None: - self.execute_result = execute_result - - def reset(self, outer_trade_decision: TradeDecisionWO = None, **kwargs) -> None: - super().reset(outer_trade_decision=outer_trade_decision, **kwargs) - if outer_trade_decision is not None: - order_list = outer_trade_decision.order_list - assert len(order_list) == 1 - self._order = order_list[0] - - -class SingleOrderStrategy(BaseStrategy): - # this logic is copied from FileOrderStrategy - def __init__( - self, - common_infra: CommonInfrastructure, - order: Order, - trade_range: TradeRange, - instrument: str, - ) -> None: - super().__init__(common_infra=common_infra) - self._order = order - self._trade_range = trade_range - self._instrument = instrument - - def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) -> BaseTradeDecision: - return outer_trade_decision - - def generate_trade_decision(self, execute_result: list = None) -> TradeDecisionWO: - oh: OrderHelper = self.common_infra.get("trade_exchange").get_order_helper() - order_list = [ - oh.create( - code=self._instrument, - amount=self._order.amount, - direction=Order.parse_dir(self._order.direction), - ) - ] - return TradeDecisionWO(order_list, self, self._trade_range) diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py index 25999eb3b..4d455302b 100644 --- a/qlib/rl/order_execution/simulator_qlib.py +++ b/qlib/rl/order_execution/simulator_qlib.py @@ -4,116 +4,83 @@ """Placeholder for qlib-based simulator.""" from __future__ import annotations -from typing import Any, Callable, Generator, List, Optional, cast +from typing import Callable, Generator, List, Optional, Tuple, cast import numpy as np import pandas as pd -from qlib.rl.order_execution.from_neutrader.feature import init_qlib -from qlib.backtest import get_exchange -from qlib.backtest.account import Account -from qlib.backtest.decision import Order, OrderDir, TradeRangeByTime +from qlib.backtest.decision import BaseTradeDecision, Order, OrderHelper, TradeDecisionWO, TradeRange, TradeRangeByTime from qlib.backtest.executor import BaseExecutor, NestedExecutor from qlib.backtest.utils import CommonInfrastructure from qlib.config import QlibConfig from qlib.constant import EPS from qlib.rl.order_execution.from_neutrader.config import ExchangeConfig -from qlib.rl.order_execution.from_neutrader.strategy import DecomposedStrategy, SingleOrderStrategy -from qlib.rl.order_execution.simulator_simple import ONE_SEC, SAOEMetrics, SAOEState, _float_or_ndarray +from qlib.rl.order_execution.from_neutrader.feature import init_qlib +from qlib.rl.order_execution.simulator_simple import SAOEMetrics, SAOEState +from qlib.rl.order_execution.utils import (_convert_tick_str_to_int, _dataframe_append, _get_common_infra, _get_minutes, + _get_ticks_slice, _price_advantage) from qlib.rl.simulator import Simulator +from qlib.strategy.base import BaseStrategy -def get_common_infra( - config: ExchangeConfig, - trade_date: pd.Timestamp, - codes: List[str], - cash_limit: Optional[float] = None, -) -> CommonInfrastructure: - # need to specify a range here for acceleration - if cash_limit is None: - trade_account = Account(init_cash=int(1e12), benchmark_config={}, pos_type="InfPosition") - else: - trade_account = Account( - init_cash=cash_limit, - benchmark_config={}, - pos_type="Position", - position_dict={code: {"amount": 1e12, "price": 1.0} for code in codes}, - ) +class DecomposedStrategy(BaseStrategy): + def __init__(self) -> None: + super(DecomposedStrategy, self).__init__() - exchange = get_exchange( - codes=codes, - freq="1min", - limit_threshold=config.limit_threshold, - deal_price=config.deal_price, - open_cost=config.open_cost, - close_cost=config.close_cost, - min_cost=config.min_cost if config.trade_unit is not None else 0, - start_time=trade_date, - end_time=trade_date + pd.DateOffset(1), - trade_unit=config.trade_unit, - volume_threshold=config.volume_threshold, - ) + self.execute_order: Optional[Order] = None + self.execute_result: List[Tuple[Order, float, float, float]] = [] - return CommonInfrastructure(trade_account=trade_account, trade_exchange=exchange) + def generate_trade_decision(self, execute_result: list = None) -> BaseTradeDecision: + exec_vol = yield self + + oh = self.trade_exchange.get_order_helper() + order = oh.create(self._order.stock_id, exec_vol, self._order.direction) + + self.execute_order = order + + return TradeDecisionWO([order], self) + + def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) -> BaseTradeDecision: + return outer_trade_decision + + def receive_execute_result(self, execute_result: list) -> None: + self.execute_result = execute_result + + def reset(self, outer_trade_decision: TradeDecisionWO = None, **kwargs) -> None: + super().reset(outer_trade_decision=outer_trade_decision, **kwargs) + if outer_trade_decision is not None: + order_list = outer_trade_decision.order_list + assert len(order_list) == 1 + self._order = order_list[0] -def _convert_tick_str_to_int(time_per_step: str) -> int: - d = { - "30min": 30, - } - return d[time_per_step] +class SingleOrderStrategy(BaseStrategy): + # this logic is copied from FileOrderStrategy + def __init__( + self, + common_infra: CommonInfrastructure, + order: Order, + trade_range: TradeRange, + instrument: str, + ) -> None: + super().__init__(common_infra=common_infra) + self._order = order + self._trade_range = trade_range + self._instrument = instrument + def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) -> BaseTradeDecision: + return outer_trade_decision -def _get_ticks_slice( - ticks_index: pd.DatetimeIndex, - start: pd.Timestamp, - end: pd.Timestamp, - include_end: bool = False, -) -> pd.DatetimeIndex: - if not include_end: - end = end - ONE_SEC - return ticks_index[ticks_index.slice_indexer(start, end)] - - -def _get_minutes(start_time: pd.Timestamp, end_time: pd.Timestamp) -> List[pd.Timestamp]: - minutes = [] - t = start_time - while t <= end_time: - minutes.append(t) - t += pd.Timedelta("1min") - return minutes - - -def _dataframe_append(df: pd.DataFrame, other: Any) -> pd.DataFrame: - # dataframe.append is deprecated - other_df = pd.DataFrame(other).set_index("datetime") - other_df.index.name = "datetime" - - res = pd.concat([df, other_df], axis=0) - return res - - -def _price_advantage( - exec_price: _float_or_ndarray, - baseline_price: float, - direction: OrderDir | int, -) -> _float_or_ndarray: - if baseline_price == 0: # something is wrong with data. Should be nan here - if isinstance(exec_price, float): - return 0.0 - else: - return np.zeros_like(exec_price) - if direction == OrderDir.BUY: - res = (1 - exec_price / baseline_price) * 10000 - elif direction == OrderDir.SELL: - res = (exec_price / baseline_price - 1) * 10000 - else: - raise ValueError(f"Unexpected order direction: {direction}") - res_wo_nan: np.ndarray = np.nan_to_num(res, nan=0.0) - if res_wo_nan.size == 1: - return res_wo_nan.item() - else: - return cast(_float_or_ndarray, res_wo_nan) + def generate_trade_decision(self, execute_result: list = None) -> TradeDecisionWO: + oh: OrderHelper = self.common_infra.get("trade_exchange").get_order_helper() + order_list = [ + oh.create( + code=self._instrument, + amount=self._order.amount, + direction=Order.parse_dir(self._order.direction), + ) + ] + return TradeDecisionWO(order_list, self, self._trade_range) class StateMaintainer: @@ -264,7 +231,7 @@ class QlibSimulator(Simulator[Order, SAOEState, float]): init_qlib(self._qlib_config, instrument) - common_infra = get_common_infra( + common_infra = _get_common_infra( self._exchange_config, trade_date=pd.Timestamp(self._order_date), codes=[instrument], diff --git a/qlib/rl/order_execution/utils.py b/qlib/rl/order_execution/utils.py new file mode 100644 index 000000000..dcfc03b31 --- /dev/null +++ b/qlib/rl/order_execution/utils.py @@ -0,0 +1,104 @@ +from __future__ import annotations + +from typing import Any, List, Optional, cast + +import numpy as np +import pandas as pd + +from qlib.backtest import Account, CommonInfrastructure, get_exchange +from qlib.backtest.decision import OrderDir +from qlib.rl.order_execution.from_neutrader.config import ExchangeConfig +from qlib.rl.order_execution.simulator_simple import ONE_SEC, _float_or_ndarray + + +def _get_common_infra( + config: ExchangeConfig, + trade_date: pd.Timestamp, + codes: List[str], + cash_limit: Optional[float] = None, +) -> CommonInfrastructure: + # need to specify a range here for acceleration + if cash_limit is None: + trade_account = Account(init_cash=int(1e12), benchmark_config={}, pos_type="InfPosition") + else: + trade_account = Account( + init_cash=cash_limit, + benchmark_config={}, + pos_type="Position", + position_dict={code: {"amount": 1e12, "price": 1.0} for code in codes}, + ) + + exchange = get_exchange( + codes=codes, + freq="1min", + limit_threshold=config.limit_threshold, + deal_price=config.deal_price, + open_cost=config.open_cost, + close_cost=config.close_cost, + min_cost=config.min_cost if config.trade_unit is not None else 0, + start_time=trade_date, + end_time=trade_date + pd.DateOffset(1), + trade_unit=config.trade_unit, + volume_threshold=config.volume_threshold, + ) + + return CommonInfrastructure(trade_account=trade_account, trade_exchange=exchange) + + +def _convert_tick_str_to_int(time_per_step: str) -> int: + d = { + "30min": 30, + } + return d[time_per_step] + + +def _get_ticks_slice( + ticks_index: pd.DatetimeIndex, + start: pd.Timestamp, + end: pd.Timestamp, + include_end: bool = False, +) -> pd.DatetimeIndex: + if not include_end: + end = end - ONE_SEC + return ticks_index[ticks_index.slice_indexer(start, end)] + + +def _get_minutes(start_time: pd.Timestamp, end_time: pd.Timestamp) -> List[pd.Timestamp]: + minutes = [] + t = start_time + while t <= end_time: + minutes.append(t) + t += pd.Timedelta("1min") + return minutes + + +def _dataframe_append(df: pd.DataFrame, other: Any) -> pd.DataFrame: + # dataframe.append is deprecated + other_df = pd.DataFrame(other).set_index("datetime") + other_df.index.name = "datetime" + + res = pd.concat([df, other_df], axis=0) + return res + + +def _price_advantage( + exec_price: _float_or_ndarray, + baseline_price: float, + direction: OrderDir | int, +) -> _float_or_ndarray: + if baseline_price == 0: # something is wrong with data. Should be nan here + if isinstance(exec_price, float): + return 0.0 + else: + return np.zeros_like(exec_price) + if direction == OrderDir.BUY: + res = (1 - exec_price / baseline_price) * 10000 + elif direction == OrderDir.SELL: + res = (exec_price / baseline_price - 1) * 10000 + else: + raise ValueError(f"Unexpected order direction: {direction}") + res_wo_nan: np.ndarray = np.nan_to_num(res, nan=0.0) + if res_wo_nan.size == 1: + return res_wo_nan.item() + else: + return cast(_float_or_ndarray, res_wo_nan) \ No newline at end of file