mirror of
https://github.com/microsoft/qlib.git
synced 2026-07-02 10:31:00 +08:00
Allow enhanced indexing to generate portfolio without industry related restriction.
This commit is contained in:
@@ -291,7 +291,7 @@ class EnhancedIndexingOptimizer(BaseOptimizer):
|
||||
lamb: float = 10,
|
||||
delta: float = 0.4,
|
||||
bench_dev: float = 0.01,
|
||||
inds_dev: float = 0.01,
|
||||
inds_dev: float = None,
|
||||
scale_alpha: bool = True,
|
||||
verbose: bool = False,
|
||||
warm_start: str = DO_NOT_START_FROM,
|
||||
@@ -302,7 +302,8 @@ class EnhancedIndexingOptimizer(BaseOptimizer):
|
||||
lamb (float): risk aversion parameter (larger `lamb` means less focus on return)
|
||||
delta (float): turnover rate limit
|
||||
bench_dev (float): benchmark deviation limit
|
||||
inds_dev (float): industry deviation limit
|
||||
inds_dev (float/None): industry deviation limit, set `inds_dev` to None to ignore industry specific
|
||||
restriction
|
||||
scale_alpha (bool): if to scale alpha to match the volatility of the covariance matrix
|
||||
verbose (bool): if print detailed information about the solver
|
||||
warm_start (str): whether try to warm start (`w0`/`benchmark`/``)
|
||||
@@ -341,7 +342,7 @@ class EnhancedIndexingOptimizer(BaseOptimizer):
|
||||
varU: np.ndarray,
|
||||
w0: np.ndarray,
|
||||
w_bench: np.ndarray,
|
||||
inds_onehot: np.ndarray,
|
||||
inds_onehot: np.ndarray = None,
|
||||
) -> Union[np.ndarray, pd.Series]:
|
||||
"""
|
||||
Args:
|
||||
@@ -354,6 +355,8 @@ class EnhancedIndexingOptimizer(BaseOptimizer):
|
||||
Returns:
|
||||
np.ndarray or pd.Series: optimized portfolio allocation
|
||||
"""
|
||||
assert inds_onehot is not None or self.inds_dev is None, "Industry onehot vector is required."
|
||||
|
||||
# scale alpha to match volatility
|
||||
if self.scale_alpha:
|
||||
u = u / u.std()
|
||||
@@ -366,15 +369,18 @@ class EnhancedIndexingOptimizer(BaseOptimizer):
|
||||
risk = cp.quad_form(v, covB) + cp.sum(cp.multiply(varU, w ** 2))
|
||||
obj = cp.Maximize(ret - self.lamb * risk)
|
||||
d_bench = w - w_bench
|
||||
d_inds = d_bench @ inds_onehot
|
||||
cons = [
|
||||
w >= 0,
|
||||
cp.sum(w) == 1,
|
||||
d_bench >= -self.bench_dev,
|
||||
d_bench <= self.bench_dev,
|
||||
d_inds >= -self.inds_dev,
|
||||
d_inds <= self.inds_dev,
|
||||
]
|
||||
|
||||
if self.inds_dev is not None:
|
||||
d_inds = d_bench @ inds_onehot
|
||||
cons.append(d_inds >= -self.inds_dev)
|
||||
cons.append(d_inds <= self.inds_dev)
|
||||
|
||||
if w0 is not None:
|
||||
turnover = cp.sum(cp.abs(w - w0))
|
||||
cons.append(turnover <= self.delta)
|
||||
|
||||
194
tests/test_enhanced_indexing.py
Normal file
194
tests/test_enhanced_indexing.py
Normal file
@@ -0,0 +1,194 @@
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import sys
|
||||
import shutil
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
import qlib
|
||||
from qlib.config import REG_CN, C
|
||||
from qlib.utils import drop_nan_by_y_index
|
||||
from qlib.contrib.model.gbdt import LGBModel
|
||||
from qlib.contrib.data.handler import Alpha158
|
||||
from qlib.contrib.strategy.strategy import TopkDropoutStrategy
|
||||
from qlib.contrib.evaluate import (
|
||||
backtest as normal_backtest,
|
||||
risk_analysis,
|
||||
)
|
||||
from qlib.utils import exists_qlib_data, init_instance_by_config, flatten_dict
|
||||
from qlib.workflow import R
|
||||
from qlib.workflow.record_temp import SignalRecord, SigAnaRecord, PortAnaRecord
|
||||
from qlib.tests.data import GetData
|
||||
from qlib.tests import TestAutoData
|
||||
|
||||
|
||||
market = "csi300"
|
||||
benchmark = "SH000300"
|
||||
|
||||
###################################
|
||||
# train model
|
||||
###################################
|
||||
data_handler_config = {
|
||||
"start_time": "2008-01-01",
|
||||
"end_time": "2020-08-01",
|
||||
"fit_start_time": "2008-01-01",
|
||||
"fit_end_time": "2014-12-31",
|
||||
"instruments": market,
|
||||
}
|
||||
|
||||
task = {
|
||||
"model": {
|
||||
"class": "LGBModel",
|
||||
"module_path": "qlib.contrib.model.gbdt",
|
||||
"kwargs": {
|
||||
"loss": "mse",
|
||||
"colsample_bytree": 0.8879,
|
||||
"learning_rate": 0.0421,
|
||||
"subsample": 0.8789,
|
||||
"lambda_l1": 205.6999,
|
||||
"lambda_l2": 580.9768,
|
||||
"max_depth": 8,
|
||||
"num_leaves": 210,
|
||||
"num_threads": 20,
|
||||
},
|
||||
},
|
||||
"dataset": {
|
||||
"class": "DatasetH",
|
||||
"module_path": "qlib.data.dataset",
|
||||
"kwargs": {
|
||||
"handler": {
|
||||
"class": "Alpha158",
|
||||
"module_path": "qlib.contrib.data.handler",
|
||||
"kwargs": data_handler_config,
|
||||
},
|
||||
"segments": {
|
||||
"train": ("2008-01-01", "2014-12-31"),
|
||||
"valid": ("2015-01-01", "2016-12-31"),
|
||||
"test": ("2017-01-01", "2020-08-01"),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
port_analysis_config = {
|
||||
"strategy": {
|
||||
"class": "TopkDropoutStrategy",
|
||||
"module_path": "qlib.contrib.strategy.strategy",
|
||||
"kwargs": {
|
||||
"topk": 50,
|
||||
"n_drop": 5,
|
||||
},
|
||||
},
|
||||
"backtest": {
|
||||
"verbose": False,
|
||||
"limit_threshold": 0.095,
|
||||
"account": 100000000,
|
||||
"benchmark": benchmark,
|
||||
"deal_price": "close",
|
||||
"open_cost": 0.0005,
|
||||
"close_cost": 0.0015,
|
||||
"min_cost": 5,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# train
|
||||
def train():
|
||||
"""train model
|
||||
|
||||
Returns
|
||||
-------
|
||||
pred_score: pandas.DataFrame
|
||||
predict scores
|
||||
performance: dict
|
||||
model performance
|
||||
"""
|
||||
|
||||
# model initiaiton
|
||||
model = init_instance_by_config(task["model"])
|
||||
dataset = init_instance_by_config(task["dataset"])
|
||||
|
||||
# start exp
|
||||
with R.start(experiment_name="workflow"):
|
||||
R.log_params(**flatten_dict(task))
|
||||
model.fit(dataset)
|
||||
|
||||
# prediction
|
||||
recorder = R.get_recorder()
|
||||
rid = recorder.id
|
||||
sr = SignalRecord(model, dataset, recorder)
|
||||
sr.generate()
|
||||
pred_score = sr.load()
|
||||
|
||||
# calculate ic and ric
|
||||
sar = SigAnaRecord(recorder)
|
||||
sar.generate()
|
||||
ic = sar.load(sar.get_path("ic.pkl"))
|
||||
ric = sar.load(sar.get_path("ric.pkl"))
|
||||
|
||||
return pred_score, {"ic": ic, "ric": ric}, rid
|
||||
|
||||
|
||||
def backtest_analysis(pred, rid):
|
||||
"""backtest and analysis
|
||||
|
||||
Parameters
|
||||
----------
|
||||
pred : pandas.DataFrame
|
||||
predict scores
|
||||
rid : str
|
||||
the id of the recorder to be used in this function
|
||||
|
||||
Returns
|
||||
-------
|
||||
analysis : pandas.DataFrame
|
||||
the analysis result
|
||||
|
||||
"""
|
||||
recorder = R.get_recorder(experiment_name="workflow", recorder_id=rid)
|
||||
# backtest
|
||||
par = PortAnaRecord(recorder, port_analysis_config)
|
||||
par.generate()
|
||||
analysis_df = par.load(par.get_path("port_analysis.pkl"))
|
||||
print(analysis_df)
|
||||
return analysis_df
|
||||
|
||||
|
||||
class TestAllFlow(TestAutoData):
|
||||
PRED_SCORE = None
|
||||
REPORT_NORMAL = None
|
||||
POSITIONS = None
|
||||
RID = None
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls) -> None:
|
||||
shutil.rmtree(str(Path(C["exp_manager"]["kwargs"]["uri"].strip("file:")).resolve()))
|
||||
|
||||
def test_0_train(self):
|
||||
TestAllFlow.PRED_SCORE, ic_ric, TestAllFlow.RID = train()
|
||||
self.assertGreaterEqual(ic_ric["ic"].all(), 0, "train failed")
|
||||
self.assertGreaterEqual(ic_ric["ric"].all(), 0, "train failed")
|
||||
|
||||
def test_1_backtest(self):
|
||||
analyze_df = backtest_analysis(TestAllFlow.PRED_SCORE, TestAllFlow.RID)
|
||||
self.assertGreaterEqual(
|
||||
analyze_df.loc(axis=0)["excess_return_with_cost", "annualized_return"].values[0],
|
||||
0.10,
|
||||
"backtest failed",
|
||||
)
|
||||
|
||||
|
||||
def suite():
|
||||
_suite = unittest.TestSuite()
|
||||
_suite.addTest(TestAllFlow("test_0_train"))
|
||||
_suite.addTest(TestAllFlow("test_1_backtest"))
|
||||
return _suite
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
runner = unittest.TextTestRunner()
|
||||
runner.run(suite())
|
||||
Reference in New Issue
Block a user