qlib/examples/estimator/analyze_from_estimator.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "import json\n",
    "import yaml\n",
    "import pickle\n",
    "from pathlib import Path\n",
    "\n",
    "import qlib\n",
    "import pandas as pd\n",
    "from qlib.config import REG_CN\n",
    "from qlib.utils import exists_qlib_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "CUR_DIR = Path.cwd()\n",
    "MARKET = \"csi300\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# use default data\n",
    "# NOTE: need to download data from remote: python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data\n",
    "provider_uri = \"~/.qlib/qlib_data/cn_data\"  # target_dir\n",
    "if not exists_qlib_data(provider_uri):\n",
    "    print(f\"Qlib data is not found in {provider_uri}\")\n",
    "    sys.path.append(str(CUR_DIR.parent.parent.joinpath(\"scripts\")))\n",
    "    from get_data import GetData\n",
    "    GetData().qlib_data_cn(provider_uri)\n",
    "qlib.init(provider_uri=provider_uri, region=REG_CN)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with CUR_DIR.joinpath('estimator_config.yaml').open() as fp:\n",
    "    estimator_name = yaml.load(fp, Loader=yaml.FullLoader)['experiment']['name']\n",
    "with CUR_DIR.joinpath(estimator_name, 'exp_info.json').open() as fp:\n",
    "    latest_id = json.load(fp)['id']\n",
    "    \n",
    "estimator_dir = CUR_DIR.joinpath(estimator_name, 'sacred', latest_id)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# read estimator result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pred_df = pd.read_pickle(estimator_dir.joinpath('pred.pkl'))\n",
    "report_normal_df = pd.read_pickle(estimator_dir.joinpath('report_normal.pkl'))\n",
    "report_normal_df.index.names = ['index']\n",
    "\n",
    "analysis_df = pd.read_pickle(estimator_dir.joinpath('analysis.pkl'))\n",
    "positions = pickle.load(estimator_dir.joinpath('positions.pkl').open('rb'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# analyze graphs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from qlib.data import D\n",
    "from qlib.contrib.report import analysis_model, analysis_position\n",
    "pred_df_dates = pred_df.index.get_level_values(level='datetime')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## analysis position"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "stock_ret = D.features(D.instruments(MARKET), ['Ref($close, -1)/$close - 1'], pred_df_dates.min(), pred_df_dates.max())\n",
    "stock_ret.columns = ['label']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "analysis_position.report_graph(report_normal_df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### risk analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "analysis_position.risk_analysis_graph(analysis_df, report_normal_df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## analysis model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "label_df = D.features(D.instruments(MARKET), ['Ref($close, -2)/Ref($close, -1) - 1'], pred_df_dates.min(), pred_df_dates.max())\n",
    "label_df.columns = ['label']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### score IC"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pred_label = pd.concat([label_df, pred_df], axis=1, sort=True).reindex(label_df.index)\n",
    "analysis_position.score_ic_graph(pred_label)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### model performance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "analysis_model.model_performance_graph(pred_label)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}