mirror of
https://github.com/microsoft/qlib.git
synced 2026-06-06 05:51:17 +08:00
Add Risk Prediction Demo
This commit is contained in:
179
examples/workflow_by_code_lgb_risk_demo.py
Normal file
179
examples/workflow_by_code_lgb_risk_demo.py
Normal file
@@ -0,0 +1,179 @@
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import qlib
|
||||
from qlib.config import REG_CN
|
||||
from qlib.utils import exists_qlib_data, init_instance_by_config, flatten_dict
|
||||
from qlib.workflow import R
|
||||
from qlib.workflow.record_temp import SignalRecord, PortAnaRecord
|
||||
from qlib.data.dataset.handler import DataHandlerLP
|
||||
|
||||
import seaborn as sns
|
||||
import matplotlib.pyplot as plt
|
||||
import math
|
||||
import pandas as pd
|
||||
from scipy.stats.stats import pearsonr
|
||||
import numpy as np
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# use default data
|
||||
provider_uri = "~/.qlib/qlib_data/cn_data" # target_dir
|
||||
if not exists_qlib_data(provider_uri):
|
||||
print(f"Qlib data is not found in {provider_uri}")
|
||||
sys.path.append(str(Path(__file__).resolve().parent.parent.joinpath("scripts")))
|
||||
from get_data import GetData
|
||||
|
||||
GetData().qlib_data(target_dir=provider_uri, region=REG_CN)
|
||||
|
||||
qlib.init(provider_uri=provider_uri, region=REG_CN)
|
||||
|
||||
market = "csi300"
|
||||
benchmark = "SH000300"
|
||||
|
||||
###################################
|
||||
# train model
|
||||
###################################
|
||||
data_handler_config = {
|
||||
"start_time": "2008-01-01",
|
||||
"end_time": "2020-08-01",
|
||||
"fit_start_time": "2008-01-01",
|
||||
"fit_end_time": "2014-12-31",
|
||||
"instruments": market,
|
||||
"infer_processors": [
|
||||
{"class": "ProcessInf", "kwargs": {}},
|
||||
{"class": "ZScoreNorm", "kwargs": {"fields_group": "feature"}},
|
||||
{"class": "Fillna", "kwargs": {}},
|
||||
],
|
||||
"learn_processors": [{
|
||||
"class": "DropnaLabel", },
|
||||
],
|
||||
"label": (["Ref(Min($low, 5), -4)/$close - 1"], ["LABEL0"]) # the period for risk prediction is 5 days
|
||||
}
|
||||
|
||||
task = {
|
||||
"model": {
|
||||
"class": "LGBModel",
|
||||
"module_path": "qlib.contrib.model.gbdt",
|
||||
"kwargs": {
|
||||
"loss": "mse",
|
||||
"colsample_bytree": 0.8999,
|
||||
"learning_rate": 0.02,
|
||||
"subsample": 0.7,
|
||||
"lambda_l1": 11.9668,
|
||||
"lambda_l2": 339.1301,
|
||||
"max_depth": 16,
|
||||
"num_leaves": 31,
|
||||
"num_threads": 20,
|
||||
},
|
||||
},
|
||||
"dataset": {
|
||||
"class": "DatasetH",
|
||||
"module_path": "qlib.data.dataset",
|
||||
"kwargs": {
|
||||
"handler": {
|
||||
"class": "Alpha360",
|
||||
"module_path": "qlib.contrib.data.handler",
|
||||
"kwargs": data_handler_config,
|
||||
},
|
||||
"segments": {
|
||||
"train": ("2008-01-01", "2014-12-31"),
|
||||
"valid": ("2015-01-01", "2016-12-31"),
|
||||
"test": ("2017-01-01", "2020-08-01"),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
port_analysis_config = {
|
||||
"strategy": {
|
||||
"class": "TopkDropoutStrategy",
|
||||
"module_path": "qlib.contrib.strategy.strategy",
|
||||
"kwargs": {
|
||||
"topk": 50,
|
||||
"n_drop": 5,
|
||||
},
|
||||
},
|
||||
"backtest": {
|
||||
"verbose": False,
|
||||
"limit_threshold": 0.095,
|
||||
"account": 100000000,
|
||||
"benchmark": benchmark,
|
||||
"deal_price": "close",
|
||||
"open_cost": 0.0005,
|
||||
"close_cost": 0.0015,
|
||||
"min_cost": 5,
|
||||
"return_order": True,
|
||||
},
|
||||
}
|
||||
|
||||
# model initiaiton
|
||||
model = init_instance_by_config(task["model"])
|
||||
dataset = init_instance_by_config(task["dataset"])
|
||||
|
||||
# NOTE: This line is optional
|
||||
# It demonstrates that the dataset can be used standalone.
|
||||
example_df = dataset.prepare("train")
|
||||
print(example_df.head())
|
||||
|
||||
def heatmap(actual_risk, predicted_risk, step=0.02):
|
||||
"""
|
||||
plot the precision heatmap as a visualized evaluation for risk predition
|
||||
:param actual_risk: the LABEL0 of test samples
|
||||
:param predicted_risk: the predicted results of test samples
|
||||
:param step: the internal size of risk values on axis
|
||||
:return:
|
||||
"""
|
||||
num_step = math.ceil(-predicted_risk.min() / step)
|
||||
matrix = np.zeros((num_step, num_step), dtype=np.float)
|
||||
for pred_thresh in range(num_step):
|
||||
for act_thresh in range(num_step):
|
||||
actual_positive = actual_risk < -act_thresh*step
|
||||
predicted_alarm = predicted_risk < -pred_thresh*step
|
||||
num_alarm = predicted_alarm.sum()
|
||||
num_tp = (actual_positive & predicted_alarm).sum()
|
||||
matrix[pred_thresh, act_thresh] = num_tp / num_alarm
|
||||
axis_labels = ['{:.3f}'.format(-x * step) for x in range(num_step)]
|
||||
return matrix, axis_labels
|
||||
|
||||
# start exp
|
||||
with R.start(experiment_name="workflow"):
|
||||
R.log_params(**flatten_dict(task))
|
||||
model.fit(dataset)
|
||||
|
||||
# prediction
|
||||
actual_risk = dataset.prepare("test", col_set="label", data_key=DataHandlerLP.DK_I)['LABEL0']
|
||||
pred = model.predict(dataset)
|
||||
|
||||
result_df = pd.concat((actual_risk, pred), axis=1)
|
||||
result_df.columns = ['Actual Risk', 'Predicted Risk']
|
||||
result_df.dropna(inplace=True)
|
||||
actual_risk, predicted_risk = result_df.iloc[:, 0], result_df.iloc[:, 1]
|
||||
corr = pearsonr(actual_risk, predicted_risk)[0]
|
||||
print('The correlation between predicted risk and actual risk is: {:.6f}'.format(corr))
|
||||
|
||||
# visualized results
|
||||
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
|
||||
sns.histplot(actual_risk, ax=axes[0, 0])
|
||||
axes[0, 0].set_title('Market: {} Actual Risk'.format(market))
|
||||
axes[0, 0].grid()
|
||||
|
||||
sns.histplot(predicted_risk, ax=axes[0, 1])
|
||||
axes[0, 1].set_title('Feature: {} Predicted Risk'.format(task['dataset']['kwargs']['handler']['class']))
|
||||
axes[0, 1].grid()
|
||||
|
||||
sns.scatterplot(data=result_df, ax=axes[1, 0], x='Actual Risk', y='Predicted Risk', s=20)
|
||||
axes[1, 0].set_title('Market: {} Feature: {} Corr: {:.5f}'.format(
|
||||
market, task['dataset']['kwargs']['handler']['class'], corr))
|
||||
axes[1, 0].grid()
|
||||
|
||||
matrix, ax_labels = heatmap(actual_risk, predicted_risk)
|
||||
sns.heatmap(matrix, annot=True, fmt=".3f", xticklabels=ax_labels, yticklabels=ax_labels, ax=axes[1, 1],
|
||||
)
|
||||
axes[1, 1].set_xlabel('Predicted Alarm Threshold')
|
||||
axes[1, 1].set_ylabel('Actual Positive Threshold')
|
||||
axes[1, 1].set_title('Risk Prediction Precision Heatmap')
|
||||
plt.show()
|
||||
Reference in New Issue
Block a user