mirror of
https://github.com/microsoft/qlib.git
synced 2026-06-06 05:51:17 +08:00
338 lines
8.4 KiB
Plaintext
338 lines
8.4 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import sys\n",
|
|
"from pathlib import Path\n",
|
|
"\n",
|
|
"import qlib\n",
|
|
"import pandas as pd\n",
|
|
"from qlib.config import REG_CN\n",
|
|
"from qlib.contrib.model.gbdt import LGBModel\n",
|
|
"from qlib.contrib.estimator.handler import Alpha158\n",
|
|
"from qlib.contrib.strategy.strategy import TopkDropoutStrategy\n",
|
|
"from qlib.contrib.evaluate import (\n",
|
|
" backtest as normal_backtest,\n",
|
|
" risk_analysis,\n",
|
|
")\n",
|
|
"from qlib.utils import exists_qlib_data"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# use default data\n",
|
|
"# NOTE: need to download data from remote: python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn\n",
|
|
"provider_uri = \"~/.qlib/qlib_data/cn_data\" # target_dir\n",
|
|
"if not exists_qlib_data(provider_uri):\n",
|
|
" print(f\"Qlib data is not found in {provider_uri}\")\n",
|
|
" sys.path.append(str(Path.cwd().parent.joinpath(\"scripts\")))\n",
|
|
" from get_data import GetData\n",
|
|
" GetData().qlib_data(target_dir=provider_uri)\n",
|
|
"qlib.init(provider_uri=provider_uri, region=REG_CN)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"MARKET = \"csi300\"\n",
|
|
"BENCHMARK = \"SH000300\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# train model"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"###################################\n",
|
|
"# train model\n",
|
|
"###################################\n",
|
|
"DATA_HANDLER_CONFIG = {\n",
|
|
" \"dropna_label\": True,\n",
|
|
" \"start_date\": \"2008-01-01\",\n",
|
|
" \"end_date\": \"2020-08-01\",\n",
|
|
" \"market\": MARKET,\n",
|
|
"}\n",
|
|
"\n",
|
|
"TRAINER_CONFIG = {\n",
|
|
" \"train_start_date\": \"2008-01-01\",\n",
|
|
" \"train_end_date\": \"2014-12-31\",\n",
|
|
" \"validate_start_date\": \"2015-01-01\",\n",
|
|
" \"validate_end_date\": \"2016-12-31\",\n",
|
|
" \"test_start_date\": \"2017-01-01\",\n",
|
|
" \"test_end_date\": \"2020-08-01\",\n",
|
|
"}\n",
|
|
"\n",
|
|
"# use default DataHandler\n",
|
|
"# custom DataHandler, refer to: TODO: DataHandler api url\n",
|
|
"x_train, y_train, x_validate, y_validate, x_test, y_test = Alpha158(**DATA_HANDLER_CONFIG).get_split_data(**TRAINER_CONFIG)\n",
|
|
"\n",
|
|
"\n",
|
|
"MODEL_CONFIG = {\n",
|
|
" \"loss\": \"mse\",\n",
|
|
" \"colsample_bytree\": 0.8879,\n",
|
|
" \"learning_rate\": 0.0421,\n",
|
|
" \"subsample\": 0.8789,\n",
|
|
" \"lambda_l1\": 205.6999,\n",
|
|
" \"lambda_l2\": 580.9768,\n",
|
|
" \"max_depth\": 8,\n",
|
|
" \"num_leaves\": 210,\n",
|
|
" \"num_threads\": 20,\n",
|
|
"}\n",
|
|
"# use default model\n",
|
|
"# custom Model, refer to: TODO: Model api url\n",
|
|
"model = LGBModel(**MODEL_CONFIG)\n",
|
|
"model.fit(x_train, y_train, x_validate, y_validate)\n",
|
|
"_pred = model.predict(x_test)\n",
|
|
"_pred = pd.DataFrame(_pred, index=x_test.index, columns=y_test.columns)\n",
|
|
"\n",
|
|
"# backtest requires pred_score\n",
|
|
"pred_score = pd.DataFrame(index=_pred.index)\n",
|
|
"pred_score[\"score\"] = _pred.iloc(axis=1)[0]\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# backtest"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"###################################\n",
|
|
"# backtest\n",
|
|
"###################################\n",
|
|
"STRATEGY_CONFIG = {\n",
|
|
" \"topk\": 50,\n",
|
|
" \"n_drop\": 5}\n",
|
|
"BACKTEST_CONFIG = {\n",
|
|
" \"verbose\": False,\n",
|
|
" \"limit_threshold\": 0.095,\n",
|
|
" \"account\": 100000000,\n",
|
|
" \"benchmark\": BENCHMARK,\n",
|
|
" \"deal_price\": \"close\",\n",
|
|
" \"open_cost\": 0.0005,\n",
|
|
" \"close_cost\": 0.0015,\n",
|
|
" \"min_cost\": 5,\n",
|
|
" \n",
|
|
"}\n",
|
|
"\n",
|
|
"# use default strategy\n",
|
|
"# custom Strategy, refer to: TODO: Strategy api url\n",
|
|
"strategy = TopkDropoutStrategy(**STRATEGY_CONFIG)\n",
|
|
"report_normal, positions_normal = normal_backtest(pred_score, strategy=strategy, **BACKTEST_CONFIG)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# analyze"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"###################################\n",
|
|
"# analyze\n",
|
|
"# If need a more detailed analysis, refer to: examples/train_and_bakctest.ipynb\n",
|
|
"###################################\n",
|
|
"analysis = dict()\n",
|
|
"analysis[\"excess_return_without_cost\"] = risk_analysis(report_normal[\"return\"] - report_normal[\"bench\"])\n",
|
|
"analysis[\"excess_return_with_cost\"] = risk_analysis(\n",
|
|
" report_normal[\"return\"] - report_normal[\"bench\"] - report_normal[\"cost\"]\n",
|
|
")\n",
|
|
"analysis_df = pd.concat(analysis) # type: pd.DataFrame\n",
|
|
"print(analysis_df)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# analyze graphs"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from qlib.contrib.report import analysis_model, analysis_position\n",
|
|
"from qlib.data import D\n",
|
|
"pred_df_dates = pred_score.index.get_level_values(level='datetime')\n",
|
|
"report_normal_df = report_normal\n",
|
|
"positions = positions_normal\n",
|
|
"pred_df = pred_score"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## analysis position"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"stock_ret = D.features(D.instruments(MARKET), ['Ref($close, -1)/$close - 1'], pred_df_dates.min(), pred_df_dates.max())\n",
|
|
"stock_ret.columns = ['label']"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### report"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"analysis_position.report_graph(report_normal_df)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### risk analysis"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"analysis_position.risk_analysis_graph(analysis_df, report_normal_df)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## analysis model"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"label_df = D.features(D.instruments(MARKET), ['Ref($close, -2)/Ref($close, -1) - 1'], pred_df_dates.min(), pred_df_dates.max())\n",
|
|
"label_df.columns = ['label']"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### score IC"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"pred_label = pd.concat([label_df, pred_df], axis=1, sort=True).reindex(label_df.index)\n",
|
|
"analysis_position.score_ic_graph(pred_label)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### model performance"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"analysis_model.model_performance_graph(pred_label)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3"
|
|
},
|
|
"toc": {
|
|
"base_numbering": 1,
|
|
"nav_menu": {},
|
|
"number_sections": true,
|
|
"sideBar": true,
|
|
"skip_h1_title": false,
|
|
"title_cell": "Table of Contents",
|
|
"title_sidebar": "Contents",
|
|
"toc_cell": false,
|
|
"toc_position": {},
|
|
"toc_section_display": true,
|
|
"toc_window_display": false
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
} |