init commit

2026-07-05 20:11:08 +08:00 · 2020-09-22 01:43:21 +00:00
parent aa51e5aad3
commit 99ebd87cba
131 changed files with 20218 additions and 0 deletions
--- a/examples/estimator/analyze_from_estimator.ipynb
+++ b/examples/estimator/analyze_from_estimator.ipynb
@@ -0,0 +1,257 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "import json\n",
+    "import yaml\n",
+    "import pickle\n",
+    "from pathlib import Path\n",
+    "\n",
+    "import qlib\n",
+    "import pandas as pd\n",
+    "from qlib.config import REG_CN\n",
+    "from qlib.utils import exists_qlib_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "CUR_DIR = Path.cwd()\n",
+    "MARKET = \"csi300\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# use default data\n",
+    "# NOTE: need to download data from remote: python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data\n",
+    "provider_uri = \"~/.qlib/qlib_data/cn_data\"  # target_dir\n",
+    "if not exists_qlib_data(provider_uri):\n",
+    "    print(f\"Qlib data is not found in {provider_uri}\")\n",
+    "    sys.path.append(str(CUR_DIR.parent.parent.joinpath(\"scripts\")))\n",
+    "    from get_data import GetData\n",
+    "    GetData().qlib_data_cn(provider_uri)\n",
+    "qlib.init(provider_uri=provider_uri, region=REG_CN)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with CUR_DIR.joinpath('estimator_config.yaml').open() as fp:\n",
+    "    estimator_name = yaml.load(fp, Loader=yaml.FullLoader)['experiment']['name']\n",
+    "with CUR_DIR.joinpath(estimator_name, 'exp_info.json').open() as fp:\n",
+    "    latest_id = json.load(fp)['id']\n",
+    "    \n",
+    "estimator_dir = CUR_DIR.joinpath(estimator_name, 'sacred', latest_id)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# read estimator result"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pred_df = pd.read_pickle(estimator_dir.joinpath('pred.pkl'))\n",
+    "report_normal_df = pd.read_pickle(estimator_dir.joinpath('report_normal.pkl'))\n",
+    "report_normal_df.index.names = ['index']\n",
+    "\n",
+    "analysis_df = pd.read_pickle(estimator_dir.joinpath('analysis.pkl'))\n",
+    "positions = pickle.load(estimator_dir.joinpath('positions.pkl').open('rb'))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# get label data from qlib"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from qlib.data import D\n",
+    "pred_df_dates = pred_df.index.get_level_values(level='datetime')\n",
+    "features_df = D.features(D.instruments(MARKET), ['Ref($close, -1)/$close - 1'], pred_df_dates.min(), pred_df_dates.max())\n",
+    "features_df.columns = ['label']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# analyze graphs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from qlib.contrib.report import analysis_model, analysis_position"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## analysis position"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "analysis_position.report_graph(report_normal_df)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### score IC"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pred_label = pd.concat([features_df, pred_df], axis=1, sort=True).reindex(features_df.index)\n",
+    "analysis_position.score_ic_graph(pred_label)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### cumulative return"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "analysis_position.cumulative_return_graph(positions, report_normal_df, features_df)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### risk analysis"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "analysis_position.risk_analysis_graph(analysis_df, report_normal_df)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### rank label"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "analysis_position.rank_label_graph(positions, features_df, pred_df_dates.min(), pred_df_dates.max())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## analysis model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### model performance"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "analysis_model.model_performance_graph(pred_label)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
--- a/examples/estimator/estimator_config.yaml
+++ b/examples/estimator/estimator_config.yaml
@@ -0,0 +1,55 @@
+experiment:
+  name: estimator_example
+  observer_type: file_storage
+  mode: train
+
+model:
+  class: LGBModel
+  module_path: qlib.contrib.model.gbdt
+  args:
+    loss: mse
+    colsample_bytree: 0.8879
+    learning_rate: 0.0421
+    subsample: 0.8789
+    lambda_l1: 205.6999
+    lambda_l2: 580.9768
+    max_depth: 8
+    num_leaves: 64
+    num_threads: 20
+    min_data_in_leaf: 10
+data:
+  class: QLibDataHandlerClose
+  args:
+    dropna_label: True
+  filter:
+    market: csi300
+trainer:
+  class: StaticTrainer
+  args:
+    train_start_date: 2008-01-01
+    train_end_date: 2014-12-31
+    validate_start_date: 2015-01-01
+    validate_end_date: 2016-12-31
+    test_start_date: 2017-01-01
+    test_end_date: 2020-08-01
+strategy:
+  class: TopkDropoutStrategy
+  args:
+    topk: 50
+    n_drop: 5
+backtest:
+  normal_backtest_args:
+    verbose: False
+    limit_threshold: 0.095
+    account: 100000000
+    benchmark: SH000300
+    deal_price: close
+    open_cost: 0.0005
+    close_cost: 0.0015
+    min_cost: 5
+
+qlib_data:
+  # when testing, please modify the following parameters according to the specific environment
+  provider_uri: "~/.qlib/qlib_data/cn_data"
+  region: "cn"
+  redis_port: 4312
--- a/examples/estimator/estimator_config_dnn.yaml
+++ b/examples/estimator/estimator_config_dnn.yaml
@@ -0,0 +1,57 @@
+experiment:
+  name: estimator_example
+  observer_type: file_storage
+  mode: train
+
+model:
+    module_path: qlib.contrib.model.pytorch_nn
+    class: DNNModelPytorch
+    args:
+        loss: mse
+        input_dim: 158
+        output_dim: 1
+        lr: 0.002
+        lr_decay: 0.96
+        lr_decay_steps: 100
+        optimizer: 'adam'
+        max_steps: 8000
+        batch_size: 4096
+        GPU: '0'
+data:
+  class: QLibDataHandlerClose
+  args:
+    dropna_label: True
+    dropna_feature: True
+  filter:
+    market: csi300
+trainer:
+  class: StaticTrainer
+  args:
+    train_start_date: 2007-01-01
+    train_end_date: 2014-12-31
+    validate_start_date: 2015-01-01
+    validate_end_date: 2016-12-31
+    test_start_date: 2017-01-01
+    test_end_date: 2020-08-01
+strategy:
+  class: TopkDropoutStrategy
+  args:
+    topk: 50
+    n_drop: 5
+backtest:
+  normal_backtest_args:
+    verbose: False
+    limit_threshold: 0.095
+    account: 100000000
+    benchmark: SH000300
+    deal_price: close
+    open_cost: 0.0005
+    close_cost: 0.0015
+    min_cost: 5
+  long_short_backtest_args:
+    topk: 50
+
+qlib_data:
+  # when testing, please modify the following parameters according to the specific environment
+  provider_uri: "~/.qlib/qlib_data/cn_data"
+  region: "cn"