mirror of
https://github.com/microsoft/qlib.git
synced 2026-06-30 01:21:18 +08:00
437 lines
18 KiB
Plaintext
437 lines
18 KiB
Plaintext
{
|
|
"metadata": {
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.7.9-final"
|
|
},
|
|
"orig_nbformat": 2,
|
|
"kernelspec": {
|
|
"name": "python3",
|
|
"display_name": "Python 3"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2,
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import sys\n",
|
|
"import copy\n",
|
|
"from pathlib import Path\n",
|
|
"\n",
|
|
"import qlib\n",
|
|
"import numpy as np\n",
|
|
"import pandas as pd\n",
|
|
"from qlib.config import REG_CN\n",
|
|
"from qlib.contrib.model.gbdt import LGBModel\n",
|
|
"from qlib.contrib.data.handler import Alpha158\n",
|
|
"from qlib.contrib.strategy.strategy import TopkDropoutStrategy\n",
|
|
"from qlib.contrib.evaluate import (\n",
|
|
" backtest as normal_backtest,\n",
|
|
" risk_analysis,\n",
|
|
")\n",
|
|
"from qlib.utils import exists_qlib_data, init_instance_by_config\n",
|
|
"from qlib.workflow import R\n",
|
|
"from qlib.workflow.record_temp import SignalRecord, PortAnaRecord\n",
|
|
"from qlib.utils import flatten_dict"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stderr",
|
|
"text": [
|
|
"[36502:MainThread](2020-11-27 16:26:57,240) INFO - qlib.Initialization - [__init__.py:41] - default_conf: client.\n",
|
|
"[36502:MainThread](2020-11-27 16:26:57,242) WARNING - qlib.Initialization - [__init__.py:57] - redis connection failed(host=127.0.0.1 port=6379), cache will not be used!\n",
|
|
"[36502:MainThread](2020-11-27 16:26:57,243) INFO - qlib.Initialization - [__init__.py:76] - qlib successfully initialized based on client settings.\n",
|
|
"[36502:MainThread](2020-11-27 16:26:57,244) INFO - qlib.Initialization - [__init__.py:79] - data_path=/home/dongzho/.qlib/qlib_data/cn_data\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# use default data\n",
|
|
"# NOTE: need to download data from remote: python scripts/get_data.py qlib_data_cn --target_dir ~/.qlib/qlib_data/cn_data\n",
|
|
"provider_uri = \"~/.qlib/qlib_data/cn_data\" # target_dir\n",
|
|
"if not exists_qlib_data(provider_uri):\n",
|
|
" print(f\"Qlib data is not found in {provider_uri}\")\n",
|
|
" sys.path.append(str(Path.cwd().parent.joinpath(\"scripts\")))\n",
|
|
" from get_data import GetData\n",
|
|
" GetData().qlib_data(target_dir=provider_uri, region=REG_CN)\n",
|
|
"qlib.init(provider_uri=provider_uri, region=REG_CN)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"market = \"csi300\"\n",
|
|
"benchmark = \"SH000300\""
|
|
]
|
|
},
|
|
{
|
|
"source": [
|
|
"## Model Training"
|
|
],
|
|
"cell_type": "markdown",
|
|
"metadata": {}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stderr",
|
|
"text": [
|
|
"[36502:MainThread](2020-11-27 16:27:17,338) INFO - qlib.timer - [log.py:81] - Time cost: 19.994s | Loading data Done\n",
|
|
"[36502:MainThread](2020-11-27 16:27:18,164) INFO - qlib.timer - [log.py:81] - Time cost: 0.245s | DropnaLabel Done\n",
|
|
"[36502:MainThread](2020-11-27 16:27:26,086) INFO - qlib.timer - [log.py:81] - Time cost: 7.921s | CSZScoreNorm Done\n",
|
|
"[36502:MainThread](2020-11-27 16:27:26,087) INFO - qlib.timer - [log.py:81] - Time cost: 8.747s | fit & process data Done\n",
|
|
"[36502:MainThread](2020-11-27 16:27:26,088) INFO - qlib.timer - [log.py:81] - Time cost: 28.744s | Init data Done\n",
|
|
"[36502:MainThread](2020-11-27 16:27:26,097) INFO - qlib.workflow - [exp.py:180] - Experiment 2 starts running ...\n",
|
|
"[36502:MainThread](2020-11-27 16:27:26,221) INFO - qlib.workflow - [recorder.py:234] - Recorder 3fa4def1f6694119a3d336a7a06c88cb starts running under Experiment 2 ...\n",
|
|
"[36502:MainThread](2020-11-27 16:27:26,223) INFO - qlib.workflow - [expm.py:251] - No tracking URI is provided. The default tracking URI is set as `mlruns` under the working directory.\n",
|
|
"Training until validation scores don't improve for 50 rounds\n",
|
|
"[20]\ttrain's l2: 0.990559\tvalid's l2: 0.994332\n",
|
|
"[40]\ttrain's l2: 0.98687\tvalid's l2: 0.993702\n",
|
|
"[60]\ttrain's l2: 0.984308\tvalid's l2: 0.993503\n",
|
|
"[80]\ttrain's l2: 0.982202\tvalid's l2: 0.993446\n",
|
|
"[100]\ttrain's l2: 0.980318\tvalid's l2: 0.993423\n",
|
|
"[120]\ttrain's l2: 0.97854\tvalid's l2: 0.993409\n",
|
|
"[140]\ttrain's l2: 0.97679\tvalid's l2: 0.993413\n",
|
|
"[160]\ttrain's l2: 0.975116\tvalid's l2: 0.993473\n",
|
|
"Early stopping, best iteration is:\n",
|
|
"[127]\ttrain's l2: 0.977957\tvalid's l2: 0.993381\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"###################################\n",
|
|
"# train model\n",
|
|
"###################################\n",
|
|
"data_handler_config = {\n",
|
|
" \"start_time\": \"2008-01-01\",\n",
|
|
" \"end_time\": \"2020-08-01\",\n",
|
|
" \"fit_start_time\": \"2008-01-01\",\n",
|
|
" \"fit_end_time\": \"2014-12-31\",\n",
|
|
" \"instruments\": market,\n",
|
|
"}\n",
|
|
"\n",
|
|
"task = {\n",
|
|
" \"model\": {\n",
|
|
" \"class\": \"LGBModel\",\n",
|
|
" \"module_path\": \"qlib.contrib.model.gbdt\",\n",
|
|
" \"kwargs\": {\n",
|
|
" \"loss\": \"mse\",\n",
|
|
" \"colsample_bytree\": 0.8879,\n",
|
|
" \"learning_rate\": 0.0421,\n",
|
|
" \"subsample\": 0.8789,\n",
|
|
" \"lambda_l1\": 205.6999,\n",
|
|
" \"lambda_l2\": 580.9768,\n",
|
|
" \"max_depth\": 8,\n",
|
|
" \"num_leaves\": 210,\n",
|
|
" \"num_threads\": 20,\n",
|
|
" },\n",
|
|
" },\n",
|
|
" \"dataset\": {\n",
|
|
" \"class\": \"DatasetH\",\n",
|
|
" \"module_path\": \"qlib.data.dataset\",\n",
|
|
" \"kwargs\": {\n",
|
|
" \"handler\": {\n",
|
|
" \"class\": \"Alpha158\",\n",
|
|
" \"module_path\": \"qlib.contrib.data.handler\",\n",
|
|
" \"kwargs\": data_handler_config,\n",
|
|
" },\n",
|
|
" \"segments\": {\n",
|
|
" \"train\": (\"2008-01-01\", \"2014-12-31\"),\n",
|
|
" \"valid\": (\"2015-01-01\", \"2016-12-31\"),\n",
|
|
" \"test\": (\"2017-01-01\", \"2017-12-31\"), # NOTE: use a shorter time range\n",
|
|
" },\n",
|
|
" },\n",
|
|
" },\n",
|
|
"}\n",
|
|
"\n",
|
|
"# model initiaiton\n",
|
|
"model = init_instance_by_config(task[\"model\"])\n",
|
|
"dataset = init_instance_by_config(task[\"dataset\"])\n",
|
|
"\n",
|
|
"# start exp to train model\n",
|
|
"with R.start(experiment_name=\"train_model\"):\n",
|
|
" R.log_params(**flatten_dict(task))\n",
|
|
" model.fit(dataset)\n",
|
|
" R.save_objects(trained_model=model)\n",
|
|
" rid = R.get_recorder().id\n"
|
|
]
|
|
},
|
|
{
|
|
"source": [
|
|
"## Optimization Based Strategy"
|
|
],
|
|
"cell_type": "markdown",
|
|
"metadata": {}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from qlib.contrib.strategy.strategy import BaseStrategy\n",
|
|
"\n",
|
|
"\n",
|
|
"class OptBasedStrategy(BaseStrategy):\n",
|
|
" \"\"\"Optimization Based Strategy\"\"\"\n",
|
|
"\n",
|
|
" def __init__(self, data_handler, cov_estimator, optimizer):\n",
|
|
" self.data_handler = data_handler\n",
|
|
" self.cov_estimator = cov_estimator\n",
|
|
" self.optimizer = optimizer\n",
|
|
"\n",
|
|
" def generate_order_list(self, score_series, current, trade_exchange, pred_date, trade_date):\n",
|
|
" \"\"\"\n",
|
|
" Parameters\n",
|
|
" -----------\n",
|
|
" score_series : pd.Seires\n",
|
|
" stock_id , score.\n",
|
|
" current : Position()\n",
|
|
" current of account.\n",
|
|
" trade_exchange : Exchange()\n",
|
|
" exchange.\n",
|
|
" trade_date : pd.Timestamp\n",
|
|
" date.\n",
|
|
" \"\"\"\n",
|
|
" score_series = score_series.dropna()\n",
|
|
"\n",
|
|
" # check stock holdings, if\n",
|
|
" # 1. doesn't have score: target amount = 0 (force sell)\n",
|
|
" # 2. stock not tradable: target amount = current amount\n",
|
|
" current_position = current.get_stock_amount_dict()\n",
|
|
" target_position = {}\n",
|
|
" for stock_id in current_position:\n",
|
|
" if not trade_exchange.is_stock_tradable(stock_id=stock_id, trade_date=trade_date):\n",
|
|
" target_position[stock_id] = current_position[stock_id]\n",
|
|
" elif stock_id not in score_series.index:\n",
|
|
" target_position[stock_id] = 0\n",
|
|
" else:\n",
|
|
" # need to be solved by optimizer\n",
|
|
" pass\n",
|
|
"\n",
|
|
" # filter scores, if\n",
|
|
" # 1. kept in `amount_dict` by previous rules\n",
|
|
" # 2. not tradable\n",
|
|
" skipped = []\n",
|
|
" for stock_id in score_series.index:\n",
|
|
" if stock_id in target_position:\n",
|
|
" skipped.append(stock_id)\n",
|
|
" elif not trade_exchange.is_stock_tradable(stock_id=stock_id, trade_date=trade_date):\n",
|
|
" skipped.append(stock_id)\n",
|
|
" score_series = score_series[~score_series.index.isin(skipped)]\n",
|
|
"\n",
|
|
" # calc remaining value\n",
|
|
" current_value = pd.Series({\n",
|
|
" stock_id: current.get_stock_price(stock_id) * amount\n",
|
|
" for stock_id, amount in current_position.items()\n",
|
|
" })\n",
|
|
" risk_total_value = self.get_risk_degree(trade_date) * current.calculate_value()\n",
|
|
" traded_value = risk_total_value - current_value.loc[list(target_position)].sum()\n",
|
|
"\n",
|
|
" # portfolio init weight\n",
|
|
" init_weight = current_value.reindex(score_series.index, fill_value=0)\n",
|
|
" init_weight_sum = init_weight.sum()\n",
|
|
" if init_weight_sum > 0:\n",
|
|
" init_weight /= init_weight_sum\n",
|
|
"\n",
|
|
" # covariance estimation\n",
|
|
" selector = (self.data_handler.get_range_selector(pred_date, 252), score_series.index)\n",
|
|
" price = self.data_handler.fetch(selector, level=None, squeeze=True)\n",
|
|
" cov = self.cov_estimator(price)\n",
|
|
" cov = cov.reindex(\n",
|
|
" index=score_series.index, \n",
|
|
" columns=score_series.index, \n",
|
|
" #fill_value=cov.max().max()\n",
|
|
" )\n",
|
|
"\n",
|
|
" # optimize target portfolio\n",
|
|
" try:\n",
|
|
" if init_weight.sum() > 0:\n",
|
|
" target_weight = self.optimizer(cov, score_series, init_weight)\n",
|
|
" else:\n",
|
|
" target_weight = self.optimizer(cov, score_series)\n",
|
|
" target_weight = target_weight[target_weight > 1e-6]\n",
|
|
" for stock_id, weight in target_weight.items():\n",
|
|
" target_position[stock_id] = int(traded_value * weight / trade_exchange.get_close(stock_id, pred_date))\n",
|
|
" except Exception as e:\n",
|
|
" print('Unknown exception:', trade_date, e)\n",
|
|
" for stock_id in score_series.index:\n",
|
|
" if stock_id in current_position:\n",
|
|
" target_position[stock_id] = current_position[stock_id]\n",
|
|
"\n",
|
|
" # generate order list\n",
|
|
" order_list = trade_exchange.generate_order_for_target_amount_position(\n",
|
|
" target_position=target_position,\n",
|
|
" current_position=current_position,\n",
|
|
" trade_date=trade_date,\n",
|
|
" )\n",
|
|
"\n",
|
|
" return order_list"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from qlib.data.dataset.loader import QlibDataLoader\n",
|
|
"from qlib.data.dataset.handler import DataHandler\n",
|
|
"from qlib.model.riskmodel import ShrinkCovEstimator\n",
|
|
"from qlib.portfolio.optimizer import PortfolioOptimizer"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stderr",
|
|
"text": [
|
|
"[36502:MainThread](2020-11-27 16:27:43,722) INFO - qlib.timer - [log.py:81] - Time cost: 6.369s | Loading data Done\n",
|
|
"[36502:MainThread](2020-11-27 16:27:43,724) INFO - qlib.timer - [log.py:81] - Time cost: 6.371s | Init data Done\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"data_loader = QlibDataLoader([\"$close\"])\n",
|
|
"data_handler = DataHandler(\"all\", \"2015-01-01\", \"2020-08-01\", data_loader)\n",
|
|
"cov_estimator = ShrinkCovEstimator(nan_option=\"mask\")\n",
|
|
"optimizer = PortfolioOptimizer(\"mvo\", lamb=2, delta=0.2, tol=1e-5)\n",
|
|
"strategy = OptBasedStrategy(data_handler, cov_estimator, optimizer)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {
|
|
"tags": []
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stderr",
|
|
"text": [
|
|
"[36502:MainThread](2020-11-27 16:27:43,761) INFO - qlib.workflow - [exp.py:180] - Experiment 3 starts running ...\n",
|
|
"[36502:MainThread](2020-11-27 16:27:43,779) INFO - qlib.workflow - [recorder.py:234] - Recorder 67d105113f424259889fc0b6b0b94973 starts running under Experiment 3 ...\n",
|
|
"[36502:MainThread](2020-11-27 16:27:43,780) INFO - qlib.workflow - [expm.py:251] - No tracking URI is provided. The default tracking URI is set as `mlruns` under the working directory.\n",
|
|
"[36502:MainThread](2020-11-27 16:27:43,991) INFO - qlib.workflow - [record_temp.py:127] - Signal record 'pred.pkl' has been saved as the artifact of the Experiment 3\n",
|
|
"[36502:MainThread](2020-11-27 16:27:44,050) INFO - qlib.Evaluate - [evaluate.py:161] - Create new exchange\n",
|
|
"'The following are prediction results of the LGBModel model.'\n",
|
|
" score\n",
|
|
"datetime instrument \n",
|
|
"2017-01-03 SH600000 -0.053414\n",
|
|
" SH600008 0.001820\n",
|
|
" SH600009 0.023472\n",
|
|
" SH600010 -0.005625\n",
|
|
" SH600015 -0.137476\n",
|
|
"/home/dongzho/miniconda3/lib/python3.7/site-packages/ipykernel_launcher.py:55: DeprecationWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.\n",
|
|
"/home/dongzho/qlib/qlib/portfolio/optimizer.py:256: UserWarning: optimization not success (9)\n",
|
|
" warnings.warn(f\"optimization not success ({sol.status})\")\n",
|
|
"Unknown exception: 2017-01-16 00:00:00 ('SZ300104', Timestamp('2017-01-13 00:00:00'))\n",
|
|
"Unknown exception: 2017-01-23 00:00:00 ('SZ000671', Timestamp('2017-01-20 00:00:00'))\n",
|
|
"Unknown exception: 2017-03-03 00:00:00 ('SZ002465', Timestamp('2017-03-02 00:00:00'))\n",
|
|
"Unknown exception: 2017-03-07 00:00:00 ('SH601127', Timestamp('2017-03-06 00:00:00'))\n",
|
|
"/home/dongzho/qlib/qlib/portfolio/optimizer.py:256: UserWarning: optimization not success (4)\n",
|
|
" warnings.warn(f\"optimization not success ({sol.status})\")\n",
|
|
"Unknown exception: 2017-05-08 00:00:00 ('SH601727', Timestamp('2017-05-05 00:00:00'))\n",
|
|
"Unknown exception: 2017-06-20 00:00:00 ('SH600036', Timestamp('2017-06-19 00:00:00'))\n",
|
|
"Unknown exception: 2017-06-21 00:00:00 ('SH600739', Timestamp('2017-06-20 00:00:00'))\n",
|
|
"Unknown exception: 2017-06-29 00:00:00 ('SZ300168', Timestamp('2017-06-28 00:00:00'))\n",
|
|
"Unknown exception: 2017-09-01 00:00:00 ('SH601088', Timestamp('2017-08-31 00:00:00'))\n",
|
|
"Unknown exception: 2017-09-12 00:00:00 ('SH601872', Timestamp('2017-09-11 00:00:00'))\n",
|
|
"Unknown exception: 2017-09-21 00:00:00 ('SH600100', Timestamp('2017-09-20 00:00:00'))\n",
|
|
"Unknown exception: 2017-09-22 00:00:00 ('SH600021', Timestamp('2017-09-21 00:00:00'))\n",
|
|
"Unknown exception: 2017-10-11 00:00:00 ('SH600959', Timestamp('2017-10-10 00:00:00'))\n",
|
|
"Unknown exception: 2017-10-25 00:00:00 ('SZ000792', Timestamp('2017-10-24 00:00:00'))\n",
|
|
"Unknown exception: 2017-12-26 00:00:00 ('SH600682', Timestamp('2017-12-25 00:00:00'))\n",
|
|
"[36502:MainThread](2020-11-27 17:28:14,269) INFO - qlib.workflow - [record_temp.py:249] - Portfolio analysis record 'port_analysis.pkl' has been saved as the artifact of the Experiment 3\n",
|
|
"'The following are analysis results of the excess return without cost.'\n",
|
|
" risk\n",
|
|
"mean 0.001247\n",
|
|
"std 0.005437\n",
|
|
"annualized_return 0.314237\n",
|
|
"information_ratio 3.640637\n",
|
|
"max_drawdown -0.033416\n",
|
|
"'The following are analysis results of the excess return with cost.'\n",
|
|
" risk\n",
|
|
"mean 0.001028\n",
|
|
"std 0.005432\n",
|
|
"annualized_return 0.259041\n",
|
|
"information_ratio 3.003970\n",
|
|
"max_drawdown -0.041455\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"###################################\n",
|
|
"# prediction, backtest & analysis\n",
|
|
"###################################\n",
|
|
"port_analysis_config = {\n",
|
|
" \"strategy\": strategy,\n",
|
|
" \"backtest\": {\n",
|
|
" \"verbose\": False,\n",
|
|
" \"limit_threshold\": 0.095,\n",
|
|
" \"account\": 100000000,\n",
|
|
" \"benchmark\": benchmark,\n",
|
|
" \"deal_price\": \"close\",\n",
|
|
" \"open_cost\": 0.0005,\n",
|
|
" \"close_cost\": 0.0015,\n",
|
|
" \"min_cost\": 5,\n",
|
|
" },\n",
|
|
"}\n",
|
|
"\n",
|
|
"\n",
|
|
"# backtest and analysis\n",
|
|
"with R.start(experiment_name=\"backtest_analysis\"):\n",
|
|
" recorder = R.get_recorder(rid, experiment_name=\"train_model\")\n",
|
|
" model = recorder.load_object(\"trained_model\")\n",
|
|
"\n",
|
|
" # prediction\n",
|
|
" recorder = R.get_recorder()\n",
|
|
" ba_rid = recorder.id\n",
|
|
" sr = SignalRecord(model, dataset, recorder)\n",
|
|
" sr.generate()\n",
|
|
"\n",
|
|
" # backtest & analysis\n",
|
|
" par = PortAnaRecord(recorder, port_analysis_config)\n",
|
|
" par.generate()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
]
|
|
} |