diff --git a/qlib/portfolio/optimizer.py b/qlib/portfolio/optimizer.py index 75c6c51f5..6ee396a51 100644 --- a/qlib/portfolio/optimizer.py +++ b/qlib/portfolio/optimizer.py @@ -291,7 +291,7 @@ class EnhancedIndexingOptimizer(BaseOptimizer): lamb: float = 10, delta: float = 0.4, bench_dev: float = 0.01, - inds_dev: float = 0.01, + inds_dev: float = None, scale_alpha: bool = True, verbose: bool = False, warm_start: str = DO_NOT_START_FROM, @@ -302,7 +302,8 @@ class EnhancedIndexingOptimizer(BaseOptimizer): lamb (float): risk aversion parameter (larger `lamb` means less focus on return) delta (float): turnover rate limit bench_dev (float): benchmark deviation limit - inds_dev (float): industry deviation limit + inds_dev (float/None): industry deviation limit, set `inds_dev` to None to ignore industry specific + restriction scale_alpha (bool): if to scale alpha to match the volatility of the covariance matrix verbose (bool): if print detailed information about the solver warm_start (str): whether try to warm start (`w0`/`benchmark`/``) @@ -341,7 +342,7 @@ class EnhancedIndexingOptimizer(BaseOptimizer): varU: np.ndarray, w0: np.ndarray, w_bench: np.ndarray, - inds_onehot: np.ndarray, + inds_onehot: np.ndarray = None, ) -> Union[np.ndarray, pd.Series]: """ Args: @@ -354,6 +355,8 @@ class EnhancedIndexingOptimizer(BaseOptimizer): Returns: np.ndarray or pd.Series: optimized portfolio allocation """ + assert inds_onehot is not None or self.inds_dev is None, "Industry onehot vector is required." + # scale alpha to match volatility if self.scale_alpha: u = u / u.std() @@ -366,15 +369,18 @@ class EnhancedIndexingOptimizer(BaseOptimizer): risk = cp.quad_form(v, covB) + cp.sum(cp.multiply(varU, w ** 2)) obj = cp.Maximize(ret - self.lamb * risk) d_bench = w - w_bench - d_inds = d_bench @ inds_onehot cons = [ w >= 0, cp.sum(w) == 1, d_bench >= -self.bench_dev, d_bench <= self.bench_dev, - d_inds >= -self.inds_dev, - d_inds <= self.inds_dev, ] + + if self.inds_dev is not None: + d_inds = d_bench @ inds_onehot + cons.append(d_inds >= -self.inds_dev) + cons.append(d_inds <= self.inds_dev) + if w0 is not None: turnover = cp.sum(cp.abs(w - w0)) cons.append(turnover <= self.delta) diff --git a/tests/test_enhanced_indexing.py b/tests/test_enhanced_indexing.py new file mode 100644 index 000000000..f6e77cba4 --- /dev/null +++ b/tests/test_enhanced_indexing.py @@ -0,0 +1,194 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +import sys +import shutil +import unittest +from pathlib import Path + +import numpy as np +import pandas as pd + +import qlib +from qlib.config import REG_CN, C +from qlib.utils import drop_nan_by_y_index +from qlib.contrib.model.gbdt import LGBModel +from qlib.contrib.data.handler import Alpha158 +from qlib.contrib.strategy.strategy import TopkDropoutStrategy +from qlib.contrib.evaluate import ( + backtest as normal_backtest, + risk_analysis, +) +from qlib.utils import exists_qlib_data, init_instance_by_config, flatten_dict +from qlib.workflow import R +from qlib.workflow.record_temp import SignalRecord, SigAnaRecord, PortAnaRecord +from qlib.tests.data import GetData +from qlib.tests import TestAutoData + + +market = "csi300" +benchmark = "SH000300" + +################################### +# train model +################################### +data_handler_config = { + "start_time": "2008-01-01", + "end_time": "2020-08-01", + "fit_start_time": "2008-01-01", + "fit_end_time": "2014-12-31", + "instruments": market, +} + +task = { + "model": { + "class": "LGBModel", + "module_path": "qlib.contrib.model.gbdt", + "kwargs": { + "loss": "mse", + "colsample_bytree": 0.8879, + "learning_rate": 0.0421, + "subsample": 0.8789, + "lambda_l1": 205.6999, + "lambda_l2": 580.9768, + "max_depth": 8, + "num_leaves": 210, + "num_threads": 20, + }, + }, + "dataset": { + "class": "DatasetH", + "module_path": "qlib.data.dataset", + "kwargs": { + "handler": { + "class": "Alpha158", + "module_path": "qlib.contrib.data.handler", + "kwargs": data_handler_config, + }, + "segments": { + "train": ("2008-01-01", "2014-12-31"), + "valid": ("2015-01-01", "2016-12-31"), + "test": ("2017-01-01", "2020-08-01"), + }, + }, + }, +} + +port_analysis_config = { + "strategy": { + "class": "TopkDropoutStrategy", + "module_path": "qlib.contrib.strategy.strategy", + "kwargs": { + "topk": 50, + "n_drop": 5, + }, + }, + "backtest": { + "verbose": False, + "limit_threshold": 0.095, + "account": 100000000, + "benchmark": benchmark, + "deal_price": "close", + "open_cost": 0.0005, + "close_cost": 0.0015, + "min_cost": 5, + }, +} + + +# train +def train(): + """train model + + Returns + ------- + pred_score: pandas.DataFrame + predict scores + performance: dict + model performance + """ + + # model initiaiton + model = init_instance_by_config(task["model"]) + dataset = init_instance_by_config(task["dataset"]) + + # start exp + with R.start(experiment_name="workflow"): + R.log_params(**flatten_dict(task)) + model.fit(dataset) + + # prediction + recorder = R.get_recorder() + rid = recorder.id + sr = SignalRecord(model, dataset, recorder) + sr.generate() + pred_score = sr.load() + + # calculate ic and ric + sar = SigAnaRecord(recorder) + sar.generate() + ic = sar.load(sar.get_path("ic.pkl")) + ric = sar.load(sar.get_path("ric.pkl")) + + return pred_score, {"ic": ic, "ric": ric}, rid + + +def backtest_analysis(pred, rid): + """backtest and analysis + + Parameters + ---------- + pred : pandas.DataFrame + predict scores + rid : str + the id of the recorder to be used in this function + + Returns + ------- + analysis : pandas.DataFrame + the analysis result + + """ + recorder = R.get_recorder(experiment_name="workflow", recorder_id=rid) + # backtest + par = PortAnaRecord(recorder, port_analysis_config) + par.generate() + analysis_df = par.load(par.get_path("port_analysis.pkl")) + print(analysis_df) + return analysis_df + + +class TestAllFlow(TestAutoData): + PRED_SCORE = None + REPORT_NORMAL = None + POSITIONS = None + RID = None + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree(str(Path(C["exp_manager"]["kwargs"]["uri"].strip("file:")).resolve())) + + def test_0_train(self): + TestAllFlow.PRED_SCORE, ic_ric, TestAllFlow.RID = train() + self.assertGreaterEqual(ic_ric["ic"].all(), 0, "train failed") + self.assertGreaterEqual(ic_ric["ric"].all(), 0, "train failed") + + def test_1_backtest(self): + analyze_df = backtest_analysis(TestAllFlow.PRED_SCORE, TestAllFlow.RID) + self.assertGreaterEqual( + analyze_df.loc(axis=0)["excess_return_with_cost", "annualized_return"].values[0], + 0.10, + "backtest failed", + ) + + +def suite(): + _suite = unittest.TestSuite() + _suite.addTest(TestAllFlow("test_0_train")) + _suite.addTest(TestAllFlow("test_1_backtest")) + return _suite + + +if __name__ == "__main__": + runner = unittest.TextTestRunner() + runner.run(suite())