1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00
Files
qlib/qlib/contrib/evaluate.py
Linlang 3097dcc995 fix(security): use RestrictedUnpickler in load_instance (#2153)
* fix(security): enforce RestrictedUnpickler for load_instance to prevent unsafe pickle deserialization

* fix: lint error
2026-03-10 20:45:38 +08:00

420 lines
14 KiB
Python

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
from __future__ import division
from __future__ import print_function
import numpy as np
import pandas as pd
import warnings
from typing import Union, Literal
from ..log import get_module_logger
from ..utils import get_date_range
from ..utils.resam import Freq
from ..strategy.base import BaseStrategy
from ..backtest import get_exchange, position, backtest as backtest_func, executor as _executor
from ..data import D
from ..config import C
from ..data.dataset.utils import get_level_index
logger = get_module_logger("Evaluate")
def risk_analysis(r, N: int = None, freq: str = "day", mode: Literal["sum", "product"] = "sum"):
"""Risk Analysis
NOTE:
The calculation of annualized return is different from the definition of annualized return.
It is implemented by design.
Qlib tries to cumulate returns by summation instead of production to avoid the cumulated curve being skewed exponentially.
All the calculation of annualized returns follows this principle in Qlib.
Parameters
----------
r : pandas.Series
daily return series.
N: int
scaler for annualizing information_ratio (day: 252, week: 50, month: 12), at least one of `N` and `freq` should exist
freq: str
analysis frequency used for calculating the scaler, at least one of `N` and `freq` should exist
mode: Literal["sum", "product"]
the method by which returns are accumulated:
- "sum": Arithmetic accumulation (linear returns).
- "product": Geometric accumulation (compounded returns).
"""
def cal_risk_analysis_scaler(freq):
_count, _freq = Freq.parse(freq)
_freq_scaler = {
Freq.NORM_FREQ_MINUTE: 240 * 238,
Freq.NORM_FREQ_DAY: 238,
Freq.NORM_FREQ_WEEK: 50,
Freq.NORM_FREQ_MONTH: 12,
}
return _freq_scaler[_freq] / _count
if N is None and freq is None:
raise ValueError("at least one of `N` and `freq` should exist")
if N is not None and freq is not None:
warnings.warn("risk_analysis freq will be ignored")
if N is None:
N = cal_risk_analysis_scaler(freq)
if mode == "sum":
mean = r.mean()
std = r.std(ddof=1)
annualized_return = mean * N
max_drawdown = (r.cumsum() - r.cumsum().cummax()).min()
elif mode == "product":
cumulative_curve = (1 + r).cumprod()
# geometric mean (compound annual growth rate)
mean = cumulative_curve.iloc[-1] ** (1 / len(r)) - 1
# volatility of log returns
std = np.log(1 + r).std(ddof=1)
cumulative_return = cumulative_curve.iloc[-1] - 1
annualized_return = (1 + cumulative_return) ** (N / len(r)) - 1
# max percentage drawdown from peak cumulative product
max_drawdown = (cumulative_curve / cumulative_curve.cummax() - 1).min()
else:
raise ValueError(f"risk_analysis accumulation mode {mode} is not supported. Expected `sum` or `product`.")
information_ratio = mean / std * np.sqrt(N)
data = {
"mean": mean,
"std": std,
"annualized_return": annualized_return,
"information_ratio": information_ratio,
"max_drawdown": max_drawdown,
}
res = pd.Series(data).to_frame("risk")
return res
def indicator_analysis(df, method="mean"):
"""analyze statistical time-series indicators of trading
Parameters
----------
df : pandas.DataFrame
columns: like ['pa', 'pos', 'ffr', 'deal_amount', 'value'].
Necessary fields:
- 'pa' is the price advantage in trade indicators
- 'pos' is the positive rate in trade indicators
- 'ffr' is the fulfill rate in trade indicators
Optional fields:
- 'deal_amount' is the total deal deal_amount, only necessary when method is 'amount_weighted'
- 'value' is the total trade value, only necessary when method is 'value_weighted'
index: Index(datetime)
method : str, optional
statistics method of pa/ffr, by default "mean"
- if method is 'mean', count the mean statistical value of each trade indicator
- if method is 'amount_weighted', count the deal_amount weighted mean statistical value of each trade indicator
- if method is 'value_weighted', count the value weighted mean statistical value of each trade indicator
Note: statistics method of pos is always "mean"
Returns
-------
pd.DataFrame
statistical value of each trade indicators
"""
weights_dict = {
"mean": df["count"],
"amount_weighted": df["deal_amount"].abs(),
"value_weighted": df["value"].abs(),
}
if method not in weights_dict:
raise ValueError(f"indicator_analysis method {method} is not supported!")
# statistic pa/ffr indicator
indicators_df = df[["ffr", "pa"]]
weights = weights_dict.get(method)
res = indicators_df.mul(weights, axis=0).sum() / weights.sum()
# statistic pos
weights = weights_dict.get("mean")
res.loc["pos"] = df["pos"].mul(weights).sum() / weights.sum()
res = res.to_frame("value")
return res
# This is the API for compatibility for legacy code
def backtest_daily(
start_time: Union[str, pd.Timestamp],
end_time: Union[str, pd.Timestamp],
strategy: Union[str, dict, BaseStrategy],
executor: Union[str, dict, _executor.BaseExecutor] = None,
account: Union[float, int, position.Position] = 1e8,
benchmark: str = "SH000300",
exchange_kwargs: dict = None,
pos_type: str = "Position",
):
"""initialize the strategy and executor, then executor the backtest of daily frequency
Parameters
----------
start_time : Union[str, pd.Timestamp]
closed start time for backtest
**NOTE**: This will be applied to the outmost executor's calendar.
end_time : Union[str, pd.Timestamp]
closed end time for backtest
**NOTE**: This will be applied to the outmost executor's calendar.
E.g. Executor[day](Executor[1min]), setting `end_time == 20XX0301` will include all the minutes on 20XX0301
strategy : Union[str, dict, BaseStrategy]
for initializing outermost portfolio strategy. Please refer to the docs of init_instance_by_config for more information.
E.g.
.. code-block:: python
# dict
strategy = {
"class": "TopkDropoutStrategy",
"module_path": "qlib.contrib.strategy.signal_strategy",
"kwargs": {
"signal": (model, dataset),
"topk": 50,
"n_drop": 5,
},
}
# BaseStrategy
pred_score = pd.read_pickle("score.pkl")["score"]
STRATEGY_CONFIG = {
"topk": 50,
"n_drop": 5,
"signal": pred_score,
}
strategy = TopkDropoutStrategy(**STRATEGY_CONFIG)
# str example.
# 1) specify a pickle object
# - path like 'file:///<path to pickle file>/obj.pkl'
# 2) specify a class name
# - "ClassName": getattr(module, "ClassName")() will be used.
# 3) specify module path with class name
# - "a.b.c.ClassName" getattr(<a.b.c.module>, "ClassName")() will be used.
executor : Union[str, dict, BaseExecutor]
for initializing the outermost executor.
benchmark: str
the benchmark for reporting.
account : Union[float, int, Position]
information for describing how to creating the account
For `float` or `int`:
Using Account with only initial cash
For `Position`:
Using Account with a Position
exchange_kwargs : dict
the kwargs for initializing Exchange
E.g.
.. code-block:: python
exchange_kwargs = {
"freq": freq,
"limit_threshold": None, # limit_threshold is None, using C.limit_threshold
"deal_price": None, # deal_price is None, using C.deal_price
"open_cost": 0.0005,
"close_cost": 0.0015,
"min_cost": 5,
}
pos_type : str
the type of Position.
Returns
-------
report_normal: pd.DataFrame
backtest report
positions_normal: pd.DataFrame
backtest positions
"""
freq = "day"
if executor is None:
executor_config = {
"time_per_step": freq,
"generate_portfolio_metrics": True,
}
executor = _executor.SimulatorExecutor(**executor_config)
_exchange_kwargs = {
"freq": freq,
"limit_threshold": None,
"deal_price": None,
"open_cost": 0.0005,
"close_cost": 0.0015,
"min_cost": 5,
}
if exchange_kwargs is not None:
_exchange_kwargs.update(exchange_kwargs)
portfolio_metric_dict, indicator_dict = backtest_func(
start_time=start_time,
end_time=end_time,
strategy=strategy,
executor=executor,
account=account,
benchmark=benchmark,
exchange_kwargs=_exchange_kwargs,
pos_type=pos_type,
)
analysis_freq = "{0}{1}".format(*Freq.parse(freq))
report_normal, positions_normal = portfolio_metric_dict.get(analysis_freq)
return report_normal, positions_normal
def long_short_backtest(
pred,
topk=50,
deal_price=None,
shift=1,
open_cost=0,
close_cost=0,
trade_unit=None,
limit_threshold=None,
min_cost=5,
subscribe_fields=[],
extract_codes=False,
):
"""
A backtest for long-short strategy
:param pred: The trading signal produced on day `T`.
:param topk: The short topk securities and long topk securities.
:param deal_price: The price to deal the trading.
:param shift: Whether to shift prediction by one day. The trading day will be T+1 if shift==1.
:param open_cost: open transaction cost.
:param close_cost: close transaction cost.
:param trade_unit: 100 for China A.
:param limit_threshold: limit move 0.1 (10%) for example, long and short with same limit.
:param min_cost: min transaction cost.
:param subscribe_fields: subscribe fields.
:param extract_codes: bool.
will we pass the codes extracted from the pred to the exchange.
NOTE: This will be faster with offline qlib.
:return: The result of backtest, it is represented by a dict.
{ "long": long_returns(excess),
"short": short_returns(excess),
"long_short": long_short_returns}
"""
if get_level_index(pred, level="datetime") == 1:
pred = pred.swaplevel().sort_index()
if trade_unit is None:
trade_unit = C.trade_unit
if limit_threshold is None:
limit_threshold = C.limit_threshold
if deal_price is None:
deal_price = C.deal_price
if deal_price[0] != "$":
deal_price = "$" + deal_price
subscribe_fields = subscribe_fields.copy()
profit_str = f"Ref({deal_price}, -1)/{deal_price} - 1"
subscribe_fields.append(profit_str)
trade_exchange = get_exchange(
pred=pred,
deal_price=deal_price,
subscribe_fields=subscribe_fields,
limit_threshold=limit_threshold,
open_cost=open_cost,
close_cost=close_cost,
min_cost=min_cost,
trade_unit=trade_unit,
extract_codes=extract_codes,
shift=shift,
)
_pred_dates = pred.index.get_level_values(level="datetime")
predict_dates = D.calendar(start_time=_pred_dates.min(), end_time=_pred_dates.max())
trade_dates = np.append(predict_dates[shift:], get_date_range(predict_dates[-1], left_shift=1, right_shift=shift))
long_returns = {}
short_returns = {}
ls_returns = {}
for pdate, date in zip(predict_dates, trade_dates):
score = pred.loc(axis=0)[pdate, :]
score = score.reset_index().sort_values(by="score", ascending=False)
long_stocks = list(score.iloc[:topk]["instrument"])
short_stocks = list(score.iloc[-topk:]["instrument"])
score = score.set_index(["datetime", "instrument"]).sort_index()
long_profit = []
short_profit = []
all_profit = []
for stock in long_stocks:
if not trade_exchange.is_stock_tradable(stock_id=stock, trade_date=date):
continue
profit = trade_exchange.get_quote_info(stock_id=stock, start_time=date, end_time=date, field=profit_str)
if np.isnan(profit):
long_profit.append(0)
else:
long_profit.append(profit)
for stock in short_stocks:
if not trade_exchange.is_stock_tradable(stock_id=stock, trade_date=date):
continue
profit = trade_exchange.get_quote_info(stock_id=stock, start_time=date, end_time=date, field=profit_str)
if np.isnan(profit):
short_profit.append(0)
else:
short_profit.append(profit * -1)
for stock in list(score.loc(axis=0)[pdate, :].index.get_level_values(level=0)):
# exclude the suspend stock
if trade_exchange.check_stock_suspended(stock_id=stock, trade_date=date):
continue
profit = trade_exchange.get_quote_info(stock_id=stock, start_time=date, end_time=date, field=profit_str)
if np.isnan(profit):
all_profit.append(0)
else:
all_profit.append(profit)
long_returns[date] = np.mean(long_profit) - np.mean(all_profit)
short_returns[date] = np.mean(short_profit) + np.mean(all_profit)
ls_returns[date] = np.mean(short_profit) + np.mean(long_profit)
return dict(
zip(
["long", "short", "long_short"],
map(pd.Series, [long_returns, short_returns, ls_returns]),
)
)
def t_run():
pred_FN = "./check_pred.csv"
pred: pd.DataFrame = pd.read_csv(pred_FN)
pred["datetime"] = pd.to_datetime(pred["datetime"])
pred = pred.set_index([pred.columns[0], pred.columns[1]])
pred = pred.iloc[:9000]
strategy_config = {
"topk": 50,
"n_drop": 5,
"signal": pred,
}
report_df, positions = backtest_daily(start_time="2017-01-01", end_time="2020-08-01", strategy=strategy_config)
print(report_df.head())
print(positions.keys())
print(positions[list(positions.keys())[0]])
return 0
if __name__ == "__main__":
t_run()