init commit

2026-07-06 04:20:57 +08:00 · 2020-09-22 01:43:21 +00:00
parent aa51e5aad3
commit 99ebd87cba
131 changed files with 20218 additions and 0 deletions
--- a/examples/train_backtest_analyze.ipynb
+++ b/examples/train_backtest_analyze.ipynb
@@ -0,0 +1,355 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "from pathlib import Path\n",
+    "\n",
+    "import qlib\n",
+    "import pandas as pd\n",
+    "from qlib.config import REG_CN\n",
+    "from qlib.contrib.model.gbdt import LGBModel\n",
+    "from qlib.contrib.estimator.handler import QLibDataHandlerClose\n",
+    "from qlib.contrib.strategy.strategy import TopkDropoutStrategy\n",
+    "from qlib.contrib.evaluate import (\n",
+    "    backtest as normal_backtest,\n",
+    "    risk_analysis,\n",
+    ")\n",
+    "from qlib.utils import exists_qlib_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# use default data\n",
+    "# NOTE: need to download data from remote: python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data\n",
+    "provider_uri = \"~/.qlib/qlib_data/cn_data\"  # target_dir\n",
+    "if not exists_qlib_data(provider_uri):\n",
+    "    print(f\"Qlib data is not found in {provider_uri}\")\n",
+    "    sys.path.append(str(Path.cwd().parent.joinpath(\"scripts\")))\n",
+    "    from get_data import GetData\n",
+    "    GetData().qlib_data_cn(provider_uri)\n",
+    "qlib.init(provider_uri=provider_uri, region=REG_CN)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "MARKET = \"csi300\"\n",
+    "BENCHMARK = \"SH000300\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# train model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true,
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "###################################\n",
+    "# train model\n",
+    "###################################\n",
+    "DATA_HANDLER_CONFIG = {\n",
+    "    \"dropna_label\": True,\n",
+    "    \"start_date\": \"2008-01-01\",\n",
+    "    \"end_date\": \"2020-08-01\",\n",
+    "    \"market\": MARKET,\n",
+    "}\n",
+    "\n",
+    "TRAINER_CONFIG = {\n",
+    "    \"train_start_date\": \"2008-01-01\",\n",
+    "    \"train_end_date\": \"2014-12-31\",\n",
+    "    \"validate_start_date\": \"2015-01-01\",\n",
+    "    \"validate_end_date\": \"2016-12-31\",\n",
+    "    \"test_start_date\": \"2017-01-01\",\n",
+    "    \"test_end_date\": \"2020-08-01\",\n",
+    "}\n",
+    "\n",
+    "# use default DataHandler\n",
+    "# custom DataHandler, refer to: TODO: DataHandler api url\n",
+    "x_train, y_train, x_validate, y_validate, x_test, y_test = QLibDataHandlerClose(**DATA_HANDLER_CONFIG).get_split_data(**TRAINER_CONFIG)\n",
+    "\n",
+    "\n",
+    "MODEL_CONFIG = {\n",
+    "    \"loss\": \"mse\",\n",
+    "    \"colsample_bytree\": 0.8879,\n",
+    "    \"learning_rate\": 0.0421,\n",
+    "    \"subsample\": 0.8789,\n",
+    "    \"lambda_l1\": 205.6999,\n",
+    "    \"lambda_l2\": 580.9768,\n",
+    "    \"max_depth\": 8,\n",
+    "    \"num_leaves\": 210,\n",
+    "    \"num_threads\": 20,\n",
+    "}\n",
+    "# use default model\n",
+    "# custom Model, refer to: TODO: Model api url\n",
+    "model = LGBModel(**MODEL_CONFIG)\n",
+    "model.fit(x_train, y_train, x_validate, y_validate)\n",
+    "_pred = model.predict(x_test)\n",
+    "_pred = pd.DataFrame(_pred, index=x_test.index, columns=y_test.columns)\n",
+    "\n",
+    "# backtest requires pred_score\n",
+    "pred_score = pd.DataFrame(index=_pred.index)\n",
+    "pred_score[\"score\"] = _pred.iloc(axis=1)[0]\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# backtest"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "###################################\n",
+    "# backtest\n",
+    "###################################\n",
+    "STRATEGY_CONFIG = {\n",
+    "    \"topk\": 50,\n",
+    "    \"n_drop\": 5",
+    "}\n",
+    "BACKTEST_CONFIG = {\n",
+    "    \"verbose\": False,\n",
+    "    \"limit_threshold\": 0.095,\n",
+    "    \"account\": 100000000,\n",
+    "    \"benchmark\": BENCHMARK,\n",
+    "    \"deal_price\": \"close\",\n",
+    "    \"open_cost\": 0.0005,\n",
+    "    \"close_cost\": 0.0015,\n",
+    "    \"min_cost\": 5,\n",
+    "    \n",
+    "}\n",
+    "\n",
+    "# use default strategy\n",
+    "# custom Strategy, refer to: TODO: Strategy api url\n",
+    "strategy = TopkDropoutStrategy(**STRATEGY_CONFIG)\n",
+    "report_normal, positions_normal = normal_backtest(pred_score, strategy=strategy, **BACKTEST_CONFIG)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# analyze"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "###################################\n",
+    "# analyze\n",
+    "# If need a more detailed analysis, refer to: examples/train_and_bakctest.ipynb\n",
+    "###################################\n",
+    "analysis = dict()\n",
+    "analysis[\"sub_bench\"] = risk_analysis(report_normal[\"return\"] - report_normal[\"bench\"])\n",
+    "analysis[\"sub_cost\"] = risk_analysis(\n",
+    "    report_normal[\"return\"] - report_normal[\"bench\"] - report_normal[\"cost\"]\n",
+    ")\n",
+    "analysis_df = pd.concat(analysis)  # type: pd.DataFrame\n",
+    "print(analysis_df)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# analyze graphs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from qlib.contrib.report import analysis_model, analysis_position"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# get label data\n",
+    "from qlib.data import D\n",
+    "pred_df_dates = pred_score.index.get_level_values(level='datetime')\n",
+    "features_df = D.features(D.instruments(MARKET), ['Ref($close, -1)/$close - 1'], pred_df_dates.min(), pred_df_dates.max())\n",
+    "features_df.columns = ['label']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## analysis position"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "analysis_position.report_graph(report_normal)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### score IC"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pred_label = pd.concat([features_df, pred_score], axis=1, sort=True).reindex(features_df.index)\n",
+    "analysis_position.score_ic_graph(pred_label)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### cumulative return"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "analysis_position.cumulative_return_graph(positions_normal, report_normal, features_df)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### risk analysis"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "analysis_position.risk_analysis_graph(analysis_df, report_normal)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### rank label"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "analysis_position.rank_label_graph(positions_normal, features_df, pred_df_dates.min(), pred_df_dates.max())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## analysis model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### model performance"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "analysis_model.model_performance_graph(pred_label)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}