mirror of
https://github.com/microsoft/qlib.git
synced 2026-07-04 19:41:00 +08:00
* MVP for Indian Stocks in qlib using yahooquery * cleaned with black * cleaned with black * add YahooNormalizeIN and YahooNormalizeIN1d * cleaned the code * added 1min for IN and also updated readme * update comments * fix comments * recorder support upload both raw file and directory * fix comments * Update README.md * Fix docs of QlibRecorder * sort index after loader (#538) make sure the fetch method is based on a index-sorted pd.DataFrame * refactor online serving rolling api * refactor TRA * format by black * fix horizon * fix TRA when use single head * clean up * improve pretrain * update README * fix tra when logdir is None * fix tra when logdir is None * Update strategy.py * Update README.md * Update README.md * Conda Suggestion * code standard docs * Update ensemble.py (#560) * Fix CI Bug (#575) Co-authored-by: yuxwang <anduinnn@foxmail.com> * Update gen.py (#576) * Fix multi-process loop calls (#574) * check lexsort in the 'lazy_sort_index' function (#566) * check lexsort * check lexsort * lexsort comment * lexsort comment * Delete .DS_Store * Update README.md * bug fix & use oracle transport pretrain * mend * Add `backend_freq_config` parameter, support multi-freq uri * Add sample_config to QlibDataLoader, support multi-freq * add multi-freq example * get_cls_kwargs renamed get_callable_kwargs * support multi-freq uri * Add inst_processors to D.features * Fix typo * Fix the index type of the multi-freq example * Fix duplicate mlflow directories in tests * Add DataPathManager to QlibConfig && modify inst_processors to supports list only * Modify the default value in the multi_freq example * Modify client-server mode and dataset-cache to disable inst_processor * Add wheel package to github CI * fix comment * Update FAQ.rst * Update README.md Fix wrong link * Update the docs of TaskManager (#586) * Update manage.py * update yaml * update run_all_model * Modify the Feature to be case sensitive (#589) * update README * remove verbose * fix spell bug * fix typos (#592) * Update Release Note * fix portfolio bug * Add calendar support for resample * add freq kwargs * test.yml: Remove redundant code (#595) * Supporting shared processor (#596) * Supporting shared processor * fix readonly reverse bug * remove pytests dependency * with fit bug * fix parameter error * fix comments * Fix undefined names in Python code (#599) * Update pytorch_tabnet.py $ `flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics` ``` ./qlib/qlib/contrib/model/pytorch_tabnet.py:567:38: F821 undefined name 'inp' self.independ.append(GLU(inp, out_dim, vbs=vbs)) ^ ./qlib/examples/model_rolling/task_manager_rolling.py:75:18: F821 undefined name 'task_train' run_task(task_train, self.task_pool, experiment_name=self.experiment_name) ^ 2 F821 undefined name 'task_train' 2 ``` * Fix undefined names in Python code * from qlib.model.trainer import task_train * update seed * fix some docstring * add comments * Fix SimpleDatasetCache * Update setup.py updated classifiers * Update setup.py change to matplotlib==3.3 * Update python-publish.yml added python 3.9 * updategrade version number * Update model list * fix the type of filter_pipe * fix comment * fix record_temp * update cvxpy version * Update code_standard.rst (#587) * Update code_standard.rst * Update docs/developer/code_standard.rst Co-authored-by: you-n-g <you-n-g@users.noreply.github.com> Co-authored-by: you-n-g <you-n-g@users.noreply.github.com> * Add file lock for MLflowExpManager (#619) * fix torch version * Share version number (#620) * Update initialization.rst (#622) * Update initialization.rst * Update docs/start/initialization.rst Co-authored-by: you-n-g <you-n-g@users.noreply.github.com> * Update docs/start/initialization.rst Co-authored-by: you-n-g <you-n-g@users.noreply.github.com> Co-authored-by: you-n-g <you-n-g@users.noreply.github.com> * fix bugs for running previous exmaple * fix deal amount bug * update change doc (#623) * Add files via upload * Update README.md * Update README.md * Update README.md * Delete change doc.gif * Add files via upload * Update README.md * Delete change doc.gif * Add files via upload * Delete change doc.gif * Add files via upload * Update README.md Co-authored-by: you-n-g <you-n-g@users.noreply.github.com> Co-authored-by: you-n-g <you-n-g@users.noreply.github.com> * update doc * simplify run all model * fix run all model bug * Fix Models (#483) * fix gat dataset * fix tft model * Update tft.py * Fix tft.py Co-authored-by: Pengrong Zhu <zhu.pengrong@foxmail.com> * type and skip empty exp * fix model yaml config * fix tft import bug * skip empty result * fix model and yaml bug * fix wrong generate parameter * Modify multi-freq example (#626) * modify the example of multi-freq * add Copyright * add a comment to average_ops.py * modify the example of multi-freq * add comment to multi_freq_handler.py * add the Ref expression description to multi_freq_handler.py * add expression description to multi_freq_handler.py * update images * fix workflow and update framework Co-authored-by: Gaurav <2796gaurav@gmail.com> Co-authored-by: 2796gaurav <17353992+2796gaurav@users.noreply.github.com> Co-authored-by: bxdd <bxd98@126.com> Co-authored-by: Young <afe.young@gmail.com> Co-authored-by: you-n-g <you-n-g@users.noreply.github.com> Co-authored-by: Dong Zhou <Zhou.Dong@microsoft.com> Co-authored-by: ZhangTP1996 <ztp18@mails.tsinghua.edu.cn> Co-authored-by: demon143 <59681577+demon143@users.noreply.github.com> Co-authored-by: Wangwuyi123 <51237097+Wangwuyi123@users.noreply.github.com> Co-authored-by: yuxwang <anduinnn@foxmail.com> Co-authored-by: Pengrong Zhu <zhu.pengrong@foxmail.com> Co-authored-by: Mark Zhao <50850474+markzhao98@users.noreply.github.com> Co-authored-by: cslwqxx <cslwqxx@users.noreply.github.com> Co-authored-by: Dong Zhou <evanzd@users.noreply.github.com> Co-authored-by: SaintMalik <37118134+saintmalik@users.noreply.github.com> Co-authored-by: Christian Clauss <cclauss@me.com> Co-authored-by: Anurag Kumar <mailanu98@gmail.com> Co-authored-by: demon143 <785696300@qq.com>
339 lines
12 KiB
Python
339 lines
12 KiB
Python
# Copyright (c) Microsoft Corporation.
|
|
# Licensed under the MIT License.
|
|
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
from logging import warn
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
import warnings
|
|
from ..log import get_module_logger
|
|
from ..backtest import get_exchange, backtest as backtest_func
|
|
from ..utils import get_date_range
|
|
from ..utils.resam import Freq
|
|
|
|
from ..data import D
|
|
from ..config import C
|
|
from ..data.dataset.utils import get_level_index
|
|
|
|
|
|
logger = get_module_logger("Evaluate")
|
|
|
|
|
|
def risk_analysis(r, N: int = None, freq: str = "day"):
|
|
"""Risk Analysis
|
|
|
|
Parameters
|
|
----------
|
|
r : pandas.Series
|
|
daily return series.
|
|
N: int
|
|
scaler for annualizing information_ratio (day: 252, week: 50, month: 12), at least one of `N` and `freq` should exist
|
|
freq: str
|
|
analysis frequency used for calculating the scaler, at least one of `N` and `freq` should exist
|
|
"""
|
|
|
|
def cal_risk_analysis_scaler(freq):
|
|
_count, _freq = Freq.parse(freq)
|
|
# len(D.calendar(start_time='2010-01-01', end_time='2019-12-31', freq='day')) = 2384
|
|
_freq_scaler = {
|
|
Freq.NORM_FREQ_MINUTE: 240 * 238,
|
|
Freq.NORM_FREQ_DAY: 238,
|
|
Freq.NORM_FREQ_WEEK: 50,
|
|
Freq.NORM_FREQ_MONTH: 12,
|
|
}
|
|
return _freq_scaler[_freq] / _count
|
|
|
|
if N is None and freq is None:
|
|
raise ValueError("at least one of `N` and `freq` should exist")
|
|
if N is not None and freq is not None:
|
|
warnings.warn("risk_analysis freq will be ignored")
|
|
if N is None:
|
|
N = cal_risk_analysis_scaler(freq)
|
|
|
|
mean = r.mean()
|
|
std = r.std(ddof=1)
|
|
annualized_return = mean * N
|
|
information_ratio = mean / std * np.sqrt(N)
|
|
max_drawdown = (r.cumsum() - r.cumsum().cummax()).min()
|
|
data = {
|
|
"mean": mean,
|
|
"std": std,
|
|
"annualized_return": annualized_return,
|
|
"information_ratio": information_ratio,
|
|
"max_drawdown": max_drawdown,
|
|
}
|
|
res = pd.Series(data).to_frame("risk")
|
|
return res
|
|
|
|
|
|
def indicator_analysis(df, method="mean"):
|
|
"""analyze statistical time-series indicators of trading
|
|
|
|
Parameters
|
|
----------
|
|
df : pandas.DataFrame
|
|
columns: like ['pa', 'pos', 'ffr', 'deal_amount', 'value'].
|
|
Necessary fields:
|
|
- 'pa' is the price advantage in trade indicators
|
|
- 'pos' is the positive rate in trade indicators
|
|
- 'ffr' is the fulfill rate in trade indicators
|
|
Optional fields:
|
|
- 'deal_amount' is the total deal deal_amount, only necessary when method is 'amount_weighted'
|
|
- 'value' is the total trade value, only necessary when method is 'value_weighted'
|
|
|
|
index: Index(datetime)
|
|
method : str, optional
|
|
statistics method of pa/ffr, by default "mean"
|
|
- if method is 'mean', count the mean statistical value of each trade indicator
|
|
- if method is 'amount_weighted', count the deal_amount weighted mean statistical value of each trade indicator
|
|
- if method is 'value_weighted', count the value weighted mean statistical value of each trade indicator
|
|
Note: statistics method of pos is always "mean"
|
|
|
|
Returns
|
|
-------
|
|
pd.DataFrame
|
|
statistical value of each trade indicators
|
|
"""
|
|
weights_dict = {
|
|
"mean": df["count"],
|
|
"amount_weighted": df["deal_amount"].abs(),
|
|
"value_weighted": df["value"].abs(),
|
|
}
|
|
if method not in weights_dict:
|
|
raise ValueError(f"indicator_analysis method {method} is not supported!")
|
|
|
|
# statistic pa/ffr indicator
|
|
indicators_df = df[["ffr", "pa"]]
|
|
weights = weights_dict.get(method)
|
|
res = indicators_df.mul(weights, axis=0).sum() / weights.sum()
|
|
|
|
# statistic pos
|
|
weights = weights_dict.get("mean")
|
|
res.loc["pos"] = df["pos"].mul(weights).sum() / weights.sum()
|
|
res = res.to_frame("value")
|
|
return res
|
|
|
|
|
|
# This is the API for compatibility for legacy code
|
|
def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **kwargs):
|
|
"""This function will help you set a reasonable Exchange and provide default value for strategy
|
|
Parameters
|
|
----------
|
|
|
|
- **backtest workflow related or commmon arguments**
|
|
|
|
pred : pandas.DataFrame
|
|
predict should has <datetime, instrument> index and one `score` column.
|
|
account : float
|
|
init account value.
|
|
shift : int
|
|
whether to shift prediction by one day.
|
|
benchmark : str
|
|
benchmark code, default is SH000905 CSI 500.
|
|
verbose : bool
|
|
whether to print log.
|
|
|
|
- **strategy related arguments**
|
|
|
|
strategy : Strategy()
|
|
strategy used in backtest.
|
|
topk : int (Default value: 50)
|
|
top-N stocks to buy.
|
|
margin : int or float(Default value: 0.5)
|
|
- if isinstance(margin, int):
|
|
|
|
sell_limit = margin
|
|
|
|
- else:
|
|
|
|
sell_limit = pred_in_a_day.count() * margin
|
|
|
|
buffer margin, in single score_mode, continue holding stock if it is in nlargest(sell_limit).
|
|
sell_limit should be no less than topk.
|
|
n_drop : int
|
|
number of stocks to be replaced in each trading date.
|
|
risk_degree: float
|
|
0-1, 0.95 for example, use 95% money to trade.
|
|
str_type: 'amount', 'weight' or 'dropout'
|
|
strategy type: TopkAmountStrategy ,TopkWeightStrategy or TopkDropoutStrategy.
|
|
|
|
- **exchange related arguments**
|
|
|
|
exchange: Exchange()
|
|
pass the exchange for speeding up.
|
|
subscribe_fields: list
|
|
subscribe fields.
|
|
open_cost : float
|
|
open transaction cost. The default value is 0.002(0.2%).
|
|
close_cost : float
|
|
close transaction cost. The default value is 0.002(0.2%).
|
|
min_cost : float
|
|
min transaction cost.
|
|
trade_unit : int
|
|
100 for China A.
|
|
deal_price: str
|
|
dealing price type: 'close', 'open', 'vwap'.
|
|
limit_threshold : float
|
|
limit move 0.1 (10%) for example, long and short with same limit.
|
|
extract_codes: bool
|
|
will we pass the codes extracted from the pred to the exchange.
|
|
|
|
.. note:: This will be faster with offline qlib.
|
|
|
|
- **executor related arguments**
|
|
|
|
executor : BaseExecutor()
|
|
executor used in backtest.
|
|
verbose : bool
|
|
whether to print log.
|
|
|
|
"""
|
|
warnings.warn("this function is deprecated, please use backtest function in qlib.backtest", DeprecationWarning)
|
|
report_dict = backtest_func(
|
|
pred=pred, account=account, shift=shift, benchmark=benchmark, verbose=verbose, return_order=False, **kwargs
|
|
)
|
|
return report_dict.get("report_df"), report_dict.get("positions")
|
|
|
|
|
|
def long_short_backtest(
|
|
pred,
|
|
topk=50,
|
|
deal_price=None,
|
|
shift=1,
|
|
open_cost=0,
|
|
close_cost=0,
|
|
trade_unit=None,
|
|
limit_threshold=None,
|
|
min_cost=5,
|
|
subscribe_fields=[],
|
|
extract_codes=False,
|
|
):
|
|
"""
|
|
A backtest for long-short strategy
|
|
|
|
:param pred: The trading signal produced on day `T`.
|
|
:param topk: The short topk securities and long topk securities.
|
|
:param deal_price: The price to deal the trading.
|
|
:param shift: Whether to shift prediction by one day. The trading day will be T+1 if shift==1.
|
|
:param open_cost: open transaction cost.
|
|
:param close_cost: close transaction cost.
|
|
:param trade_unit: 100 for China A.
|
|
:param limit_threshold: limit move 0.1 (10%) for example, long and short with same limit.
|
|
:param min_cost: min transaction cost.
|
|
:param subscribe_fields: subscribe fields.
|
|
:param extract_codes: bool.
|
|
will we pass the codes extracted from the pred to the exchange.
|
|
NOTE: This will be faster with offline qlib.
|
|
:return: The result of backtest, it is represented by a dict.
|
|
{ "long": long_returns(excess),
|
|
"short": short_returns(excess),
|
|
"long_short": long_short_returns}
|
|
"""
|
|
if get_level_index(pred, level="datetime") == 1:
|
|
pred = pred.swaplevel().sort_index()
|
|
|
|
if trade_unit is None:
|
|
trade_unit = C.trade_unit
|
|
if limit_threshold is None:
|
|
limit_threshold = C.limit_threshold
|
|
if deal_price is None:
|
|
deal_price = C.deal_price
|
|
if deal_price[0] != "$":
|
|
deal_price = "$" + deal_price
|
|
|
|
subscribe_fields = subscribe_fields.copy()
|
|
profit_str = f"Ref({deal_price}, -1)/{deal_price} - 1"
|
|
subscribe_fields.append(profit_str)
|
|
|
|
trade_exchange = get_exchange(
|
|
pred=pred,
|
|
deal_price=deal_price,
|
|
subscribe_fields=subscribe_fields,
|
|
limit_threshold=limit_threshold,
|
|
open_cost=open_cost,
|
|
close_cost=close_cost,
|
|
min_cost=min_cost,
|
|
trade_unit=trade_unit,
|
|
extract_codes=extract_codes,
|
|
shift=shift,
|
|
)
|
|
|
|
_pred_dates = pred.index.get_level_values(level="datetime")
|
|
predict_dates = D.calendar(start_time=_pred_dates.min(), end_time=_pred_dates.max())
|
|
trade_dates = np.append(predict_dates[shift:], get_date_range(predict_dates[-1], left_shift=1, right_shift=shift))
|
|
|
|
long_returns = {}
|
|
short_returns = {}
|
|
ls_returns = {}
|
|
|
|
for pdate, date in zip(predict_dates, trade_dates):
|
|
score = pred.loc(axis=0)[pdate, :]
|
|
score = score.reset_index().sort_values(by="score", ascending=False)
|
|
|
|
long_stocks = list(score.iloc[:topk]["instrument"])
|
|
short_stocks = list(score.iloc[-topk:]["instrument"])
|
|
|
|
score = score.set_index(["datetime", "instrument"]).sort_index()
|
|
|
|
long_profit = []
|
|
short_profit = []
|
|
all_profit = []
|
|
|
|
for stock in long_stocks:
|
|
if not trade_exchange.is_stock_tradable(stock_id=stock, trade_date=date):
|
|
continue
|
|
profit = trade_exchange.get_quote_info(stock_id=stock, trade_date=date)[profit_str]
|
|
if np.isnan(profit):
|
|
long_profit.append(0)
|
|
else:
|
|
long_profit.append(profit)
|
|
|
|
for stock in short_stocks:
|
|
if not trade_exchange.is_stock_tradable(stock_id=stock, trade_date=date):
|
|
continue
|
|
profit = trade_exchange.get_quote_info(stock_id=stock, trade_date=date)[profit_str]
|
|
if np.isnan(profit):
|
|
short_profit.append(0)
|
|
else:
|
|
short_profit.append(-profit)
|
|
|
|
for stock in list(score.loc(axis=0)[pdate, :].index.get_level_values(level=0)):
|
|
# exclude the suspend stock
|
|
if trade_exchange.check_stock_suspended(stock_id=stock, trade_date=date):
|
|
continue
|
|
profit = trade_exchange.get_quote_info(stock_id=stock, trade_date=date)[profit_str]
|
|
if np.isnan(profit):
|
|
all_profit.append(0)
|
|
else:
|
|
all_profit.append(profit)
|
|
|
|
long_returns[date] = np.mean(long_profit) - np.mean(all_profit)
|
|
short_returns[date] = np.mean(short_profit) + np.mean(all_profit)
|
|
ls_returns[date] = np.mean(short_profit) + np.mean(long_profit)
|
|
|
|
return dict(
|
|
zip(
|
|
["long", "short", "long_short"],
|
|
map(pd.Series, [long_returns, short_returns, ls_returns]),
|
|
)
|
|
)
|
|
|
|
|
|
def t_run():
|
|
pred_FN = "./check_pred.csv"
|
|
pred = pd.read_csv(pred_FN)
|
|
pred["datetime"] = pd.to_datetime(pred["datetime"])
|
|
pred = pred.set_index([pred.columns[0], pred.columns[1]])
|
|
pred = pred.iloc[:9000]
|
|
report_df, positions = backtest(pred=pred)
|
|
print(report_df.head())
|
|
print(positions.keys())
|
|
print(positions[list(positions.keys())[0]])
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
t_run()
|