diff --git a/examples/benchmarks/README.md b/examples/benchmarks/README.md index f1e7437fa..c3d965d85 100644 --- a/examples/benchmarks/README.md +++ b/examples/benchmarks/README.md @@ -17,6 +17,7 @@ The numbers shown below demonstrate the performance of the entire `workflow` of | ALSTM (Yao Qin, et al.) | Alpha360 | 0.0493±0.01 | 0.3778±0.06| 0.0585±0.00 | 0.4606±0.04 | 0.0513±0.03 | 0.6727±0.38| -0.1085±0.02 | | GATs (Petar Velickovic, et al.) | Alpha360 | 0.0475±0.00 | 0.3515±0.02| 0.0592±0.00 | 0.4585±0.01 | 0.0876±0.02 | 1.1513±0.27| -0.0795±0.02 | | DoubleEnsemble (Chuheng Zhang, et al.) | Alpha360 | 0.0407±0.00| 0.3053±0.00 | 0.0490±0.00 | 0.3840±0.00 | 0.0380±0.02 | 0.5000±0.21 | -0.0984±0.02 | +| TabNet (Sercan O. Arik, et al.)| Alpha360 | 0.0192±0.00 | 0.1401±0.00| 0.0291±0.00 | 0.2163±0.00 | -0.0258±0.00 | -0.2961±0.00| -0.1429±0.00 | ## Alpha158 dataset | Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown | @@ -32,6 +33,7 @@ The numbers shown below demonstrate the performance of the entire `workflow` of | ALSTM (Yao Qin, et al.) | Alpha158 (with selected 20 features) | 0.0385±0.01 | 0.3022±0.06| 0.0478±0.00 | 0.3874±0.04 | 0.0486±0.03 | 0.7141±0.45| -0.1088±0.03 | | GATs (Petar Velickovic, et al.) | Alpha158 (with selected 20 features) | 0.0349±0.00 | 0.2511±0.01| 0.0457±0.00 | 0.3537±0.01 | 0.0578±0.02 | 0.8221±0.25| -0.0824±0.02 | | DoubleEnsemble (Chuheng Zhang, et al.) | Alpha158 | 0.0544±0.00 | 0.4338±0.01 | 0.0523±0.00 | 0.4257±0.01 | 0.1253±0.01 | 1.4105±0.14 | -0.0902±0.01 | +| TabNet (Sercan O. Arik, et al.)| Alpha158 | 0.0383±0.00 | 0.3414±0.00| 0.0388±0.00 | 0.3460±0.00 | 0.0226±0.00 | 0.2652±0.00| -0.1072±0.00 | - The selected 20 features are based on the feature importance of a lightgbm-based model. - The base model of DoubleEnsemble is LGBM. diff --git a/examples/highfreq/README.md b/examples/highfreq/README.md index 30c2e19db..c07d8a2a0 100644 --- a/examples/highfreq/README.md +++ b/examples/highfreq/README.md @@ -25,4 +25,11 @@ The example is given in `workflow.py`, users can run the code as follows. Run the example by running the following command: ```bash python workflow.py dump_and_load_dataset -``` \ No newline at end of file +``` + +## Benchmarks Performance +### Signal Test +Here are the results of signal test for benchmark models. We will keep updating benchmark models in future. +| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Long precision| Short Precision | Long-Short Average Return | Long-Short Average Sharpe | +|---|---|---|---|---|---|---|---|---|---| +| LightGBM | Alpha158 | 0.3042±0.00 | 1.5372±0.00| 0.3117±0.00 | 1.6258±0.00 | 0.6720±0.00 | 0.6870±0.00 | 0.000769±0.00 | 1.0190±0.00 | diff --git a/examples/highfreq/workflow_config_High_Freq_Tree_Alpha158.yaml b/examples/highfreq/workflow_config_High_Freq_Tree_Alpha158.yaml new file mode 100644 index 000000000..45c59c670 --- /dev/null +++ b/examples/highfreq/workflow_config_High_Freq_Tree_Alpha158.yaml @@ -0,0 +1,65 @@ +qlib_init: + provider_uri: "~/.qlib/qlib_data/cn_data_1min" + region: cn +market: &market 'csi300' +start_time: &start_time "2020-09-15 00:00:00" +end_time: &end_time "2021-01-18 16:00:00" +train_end_time: &train_end_time "2020-11-15 16:00:00" +valid_start_time: &valid_start_time "2020-11-16 00:00:00" +valid_end_time: &valid_end_time "2020-11-30 16:00:00" +test_start_time: &test_start_time "2020-12-01 00:00:00" +data_handler_config: &data_handler_config + start_time: *start_time + end_time: *end_time + fit_start_time: *start_time + fit_end_time: *train_end_time + instruments: *market + freq: '1min' + infer_processors: + - class: 'RobustZScoreNorm' + kwargs: + fields_group: 'feature' + clip_outlier: false + - class: "Fillna" + kwargs: + fields_group: 'feature' + learn_processors: + - class: 'DropnaLabel' + - class: 'CSRankNorm' + kwargs: + fields_group: 'label' + label: ["Ref($close, -2) / Ref($close, -1) - 1"] + +task: + model: + class: "HFLGBModel" + module_path: "qlib.contrib.model.highfreq_gdbt_model" + kwargs: + objective: 'binary' + metric: ['binary_logloss','auc'] + verbosity: -1 + learning_rate: 0.01 + max_depth: 8 + num_leaves: 150 + lambda_l1: 1.5 + lambda_l2: 1 + num_threads: 20 + dataset: + class: "DatasetH" + module_path: "qlib.data.dataset" + kwargs: + handler: + class: "Alpha158" + module_path: "qlib.contrib.data.handler" + kwargs: *data_handler_config + segments: + train: [*start_time, *train_end_time] + valid: [*train_end_time, *valid_end_time] + test: [*test_start_time, *end_time] + record: + - class: "SignalRecord" + module_path: "qlib.workflow.record_temp" + kwargs: {} + - class: "HFSignalRecord" + module_path: "qlib.workflow.record_temp" + kwargs: {} \ No newline at end of file diff --git a/qlib/contrib/eva/alpha.py b/qlib/contrib/eva/alpha.py index c68571853..fadef9d16 100644 --- a/qlib/contrib/eva/alpha.py +++ b/qlib/contrib/eva/alpha.py @@ -8,6 +8,59 @@ import pandas as pd from typing import Tuple +def calc_long_short_prec( + pred: pd.Series, label: pd.Series, date_col="datetime", quantile: float = 0.2, dropna=False, is_alpha=False +) -> Tuple[pd.Series, pd.Series]: + """ + calculate the precision for long and short operation + + + :param pred/label: index is **pd.MultiIndex**, index name is **[datetime, instruments]**; columns names is **[score]**. + + .. code-block:: python + score + datetime instrument + 2020-12-01 09:30:00 SH600068 0.553634 + SH600195 0.550017 + SH600276 0.540321 + SH600584 0.517297 + SH600715 0.544674 + label : + label + date_col : + date_col + + Returns + ------- + (pd.Series, pd.Series) + long precision and short precision in time level + """ + if is_alpha: + label = label - label.mean(level=date_col) + if int(1 / quantile) >= len(label.index.get_level_values(1).unique()): + raise ValueError("Need more instruments to calculate precision") + + df = pd.DataFrame({"pred": pred, "label": label}) + if dropna: + df.dropna(inplace=True) + + group = df.groupby(level=date_col) + + N = lambda x: int(len(x) * quantile) + # find the top/low quantile of prediction and treat them as long and short target + long = group.apply(lambda x: x.nlargest(N(x), columns="pred").label).reset_index(level=0, drop=True) + short = group.apply(lambda x: x.nsmallest(N(x), columns="pred").label).reset_index(level=0, drop=True) + + groupll = long.groupby(date_col) + l_dom = groupll.apply(lambda x: x > 0) + l_c = groupll.count() + + groups = short.groupby(date_col) + s_dom = groups.apply(lambda x: x < 0) + s_c = groups.count() + return (l_dom.groupby(date_col).sum() / l_c), (s_dom.groupby(date_col).sum() / s_c) + + def calc_ic(pred: pd.Series, label: pd.Series, date_col="datetime", dropna=False) -> Tuple[pd.Series, pd.Series]: """calc_ic. diff --git a/qlib/contrib/model/highfreq_gdbt_model.py b/qlib/contrib/model/highfreq_gdbt_model.py new file mode 100644 index 000000000..5a2eeb50a --- /dev/null +++ b/qlib/contrib/model/highfreq_gdbt_model.py @@ -0,0 +1,157 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +import numpy as np +import pandas as pd +import lightgbm as lgb + +from qlib.model.base import ModelFT +from qlib.data.dataset import DatasetH +from qlib.data.dataset.handler import DataHandlerLP +import warnings + + +class HFLGBModel(ModelFT): + """LightGBM Model for high frequency prediction""" + + def __init__(self, loss="mse", **kwargs): + if loss not in {"mse", "binary"}: + raise NotImplementedError + self.params = {"objective": loss, "verbosity": -1} + self.params.update(kwargs) + self.model = None + + def _cal_signal_metrics(self, y_test, l_cut, r_cut): + """ + Calcaute the signal metrics by daily level + """ + up_pre, down_pre = [], [] + up_alpha_ll, down_alpha_ll = [], [] + for date in y_test.index.get_level_values(0).unique(): + df_res = y_test.loc[date].sort_values("pred") + if int(l_cut * len(df_res)) < 10: + warnings.warn("Warning: threhold is too low or instruments number is not enough") + continue + top = df_res.iloc[: int(l_cut * len(df_res))] + bottom = df_res.iloc[int(r_cut * len(df_res)) :] + + down_precision = len(top[top[top.columns[0]] < 0]) / (len(top)) + up_precision = len(bottom[bottom[top.columns[0]] > 0]) / (len(bottom)) + + down_alpha = top[top.columns[0]].mean() + up_alpha = bottom[bottom.columns[0]].mean() + + up_pre.append(up_precision) + down_pre.append(down_precision) + up_alpha_ll.append(up_alpha) + down_alpha_ll.append(down_alpha) + + return ( + np.array(up_pre).mean(), + np.array(down_pre).mean(), + np.array(up_alpha_ll).mean(), + np.array(down_alpha_ll).mean(), + ) + + def hf_signal_test(self, dataset: DatasetH, threhold=0.2): + """ + Test the sigal in high frequency test set + """ + if self.model == None: + raise ValueError("Model hasn't been trained yet") + df_test = dataset.prepare("test", col_set=["feature", "label"], data_key=DataHandlerLP.DK_I) + df_test.dropna(inplace=True) + x_test, y_test = df_test["feature"], df_test["label"] + # Convert label into alpha + y_test[y_test.columns[0]] = y_test[y_test.columns[0]] - y_test[y_test.columns[0]].mean(level=0) + + res = pd.Series(self.model.predict(x_test.values), index=x_test.index) + y_test["pred"] = res + + up_p, down_p, up_a, down_a = self._cal_signal_metrics(y_test, threhold, 1 - threhold) + print("===============================") + print("High frequency signal test") + print("===============================") + print("Test set precision: ") + print("Positive precision: {}, Negative precision: {}".format(up_p, down_p)) + print("Test Alpha Average in test set: ") + print("Positive average alpha: {}, Negative average alpha: {}".format(up_a, down_a)) + + def _prepare_data(self, dataset: DatasetH): + df_train, df_valid = dataset.prepare( + ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L + ) + + x_train, y_train = df_train["feature"], df_train["label"] + x_valid, y_valid = df_train["feature"], df_valid["label"] + if y_train.values.ndim == 2 and y_train.values.shape[1] == 1: + l_name = df_train["label"].columns[0] + # Convert label into alpha + df_train["label"][l_name] = df_train["label"][l_name] - df_train["label"][l_name].mean(level=0) + df_valid["label"][l_name] = df_valid["label"][l_name] - df_valid["label"][l_name].mean(level=0) + mapping_fn = lambda x: 0 if x < 0 else 1 + df_train["label_c"] = df_train["label"][l_name].apply(mapping_fn) + df_valid["label_c"] = df_valid["label"][l_name].apply(mapping_fn) + x_train, y_train = df_train["feature"], df_train["label_c"].values + x_valid, y_valid = df_valid["feature"], df_valid["label_c"].values + else: + raise ValueError("LightGBM doesn't support multi-label training") + + dtrain = lgb.Dataset(x_train.values, label=y_train) + dvalid = lgb.Dataset(x_valid.values, label=y_valid) + return dtrain, dvalid + + def fit( + self, + dataset: DatasetH, + num_boost_round=1000, + early_stopping_rounds=50, + verbose_eval=20, + evals_result=dict(), + **kwargs + ): + dtrain, dvalid = self._prepare_data(dataset) + self.model = lgb.train( + self.params, + dtrain, + num_boost_round=num_boost_round, + valid_sets=[dtrain, dvalid], + valid_names=["train", "valid"], + early_stopping_rounds=early_stopping_rounds, + verbose_eval=verbose_eval, + evals_result=evals_result, + **kwargs + ) + evals_result["train"] = list(evals_result["train"].values())[0] + evals_result["valid"] = list(evals_result["valid"].values())[0] + + def predict(self, dataset): + if self.model is None: + raise ValueError("model is not fitted yet!") + x_test = dataset.prepare("test", col_set="feature", data_key=DataHandlerLP.DK_I) + return pd.Series(self.model.predict(x_test.values), index=x_test.index) + + def finetune(self, dataset: DatasetH, num_boost_round=10, verbose_eval=20): + """ + finetune model + + Parameters + ---------- + dataset : DatasetH + dataset for finetuning + num_boost_round : int + number of round to finetune model + verbose_eval : int + verbose level + """ + # Based on existing model and finetune by train more rounds + dtrain, _ = self._prepare_data(dataset) + self.model = lgb.train( + self.params, + dtrain, + num_boost_round=num_boost_round, + init_model=self.model, + valid_sets=[dtrain], + valid_names=["train"], + verbose_eval=verbose_eval, + ) diff --git a/qlib/workflow/record_temp.py b/qlib/workflow/record_temp.py index ed8039ac8..dee327f64 100644 --- a/qlib/workflow/record_temp.py +++ b/qlib/workflow/record_temp.py @@ -13,7 +13,7 @@ from ..data.dataset.handler import DataHandlerLP from ..utils import init_instance_by_config, get_module_by_module_path from ..log import get_module_logger from ..utils import flatten_dict -from ..contrib.eva.alpha import calc_ic, calc_long_short_return +from ..contrib.eva.alpha import calc_ic, calc_long_short_return, calc_long_short_prec from ..contrib.strategy.strategy import BaseStrategy logger = get_module_logger("workflow", "INFO") @@ -162,6 +162,60 @@ class SignalRecord(RecordTemp): return super().load(name) +class HFSignalRecord(SignalRecord): + """ + This is the Signal Analysis Record class that generates the analysis results such as IC and IR. This class inherits the ``RecordTemp`` class. + """ + + artifact_path = "hg_sig_analysis" + + def __init__(self, recorder, **kwargs): + super().__init__(recorder=recorder) + + def generate(self): + pred = self.load("pred.pkl") + raw_label = self.load("label.pkl") + long_pre, short_pre = calc_long_short_prec(pred.iloc[:, 0], raw_label.iloc[:, 0], is_alpha=True) + ic, ric = calc_ic(pred.iloc[:, 0], raw_label.iloc[:, 0]) + metrics = { + "IC": ic.mean(), + "ICIR": ic.mean() / ic.std(), + "Rank IC": ric.mean(), + "Rank ICIR": ric.mean() / ric.std(), + "Long precision": long_pre.mean(), + "Short precision": short_pre.mean(), + } + objects = {"ic.pkl": ic, "ric.pkl": ric} + objects.update({"long_pre.pkl": long_pre, "short_pre.pkl": short_pre}) + long_short_r, long_avg_r = calc_long_short_return(pred.iloc[:, 0], raw_label.iloc[:, 0]) + metrics.update( + { + "Long-Short Average Return": long_short_r.mean(), + "Long-Short Average Sharpe": long_short_r.mean() / long_short_r.std(), + } + ) + objects.update( + { + "long_short_r.pkl": long_short_r, + "long_avg_r.pkl": long_avg_r, + } + ) + self.recorder.log_metrics(**metrics) + self.recorder.save_objects(**objects, artifact_path=self.get_path()) + pprint(metrics) + + def list(self): + paths = [ + self.get_path("ic.pkl"), + self.get_path("ric.pkl"), + self.get_path("long_pre.pkl"), + self.get_path("short_pre.pkl"), + self.get_path("long_short_r.pkl"), + self.get_path("long_avg_r.pkl"), + ] + return paths + + class SigAnaRecord(SignalRecord): """ This is the Signal Analysis Record class that generates the analysis results such as IC and IR. This class inherits the ``RecordTemp`` class.