mirror of
https://github.com/microsoft/qlib.git
synced 2026-06-06 05:51:17 +08:00
Add ipynb format check (#1439)
* Update test_qlib_from_source.yml * add ipynb format check to workflow * test ipynb CI * modify nbqa check path * add pylint flake8 mypy check to ipynb * check ipynb with black and pylint * reformat .ipynb files * format line length nbqa black . -l 120 * update nbqa .ipynb format CI * format old ipynb files * add nbconvert check to CI * adjust CI order to avoid repeating download data
This commit is contained in:
@@ -88,6 +88,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from qlib.tests.data import GetData\n",
|
||||
"\n",
|
||||
"GetData().qlib_data(exists_skip=True)"
|
||||
]
|
||||
},
|
||||
@@ -99,6 +100,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import qlib\n",
|
||||
"\n",
|
||||
"qlib.init()"
|
||||
]
|
||||
},
|
||||
@@ -134,7 +136,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from qlib.data import D\n",
|
||||
"D.calendar(start_time='2010-01-01', end_time='2017-12-31', freq='day')[:2] # calendar data"
|
||||
"\n",
|
||||
"print(D.calendar(start_time=\"2010-01-01\", end_time=\"2017-12-31\", freq=\"day\")[:2]) # calendar data"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -152,7 +155,12 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df = D.features(['SH601216'], ['$open', '$high', '$low', '$close', '$factor'], start_time='2020-05-01', end_time='2020-05-31') "
|
||||
"df = D.features(\n",
|
||||
" [\"SH601216\"],\n",
|
||||
" [\"$open\", \"$high\", \"$low\", \"$close\", \"$factor\"],\n",
|
||||
" start_time=\"2020-05-01\",\n",
|
||||
" end_time=\"2020-05-31\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -163,11 +171,18 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import plotly.graph_objects as go\n",
|
||||
"fig = go.Figure(data=[go.Candlestick(x=df.index.get_level_values(\"datetime\"),\n",
|
||||
" open=df['$open'],\n",
|
||||
" high=df['$high'],\n",
|
||||
" low=df['$low'],\n",
|
||||
" close=df['$close'])])\n",
|
||||
"\n",
|
||||
"fig = go.Figure(\n",
|
||||
" data=[\n",
|
||||
" go.Candlestick(\n",
|
||||
" x=df.index.get_level_values(\"datetime\"),\n",
|
||||
" open=df[\"$open\"],\n",
|
||||
" high=df[\"$high\"],\n",
|
||||
" low=df[\"$low\"],\n",
|
||||
" close=df[\"$close\"],\n",
|
||||
" )\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"fig.show()"
|
||||
]
|
||||
},
|
||||
@@ -197,11 +212,18 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import plotly.graph_objects as go\n",
|
||||
"fig = go.Figure(data=[go.Candlestick(x=df.index.get_level_values(\"datetime\"),\n",
|
||||
" open=df['$open'] / df['$factor'],\n",
|
||||
" high=df['$high'] / df['$factor'],\n",
|
||||
" low=df['$low'] / df['$factor'],\n",
|
||||
" close=df['$close'] / df['$factor'])])\n",
|
||||
"\n",
|
||||
"fig = go.Figure(\n",
|
||||
" data=[\n",
|
||||
" go.Candlestick(\n",
|
||||
" x=df.index.get_level_values(\"datetime\"),\n",
|
||||
" open=df[\"$open\"] / df[\"$factor\"],\n",
|
||||
" high=df[\"$high\"] / df[\"$factor\"],\n",
|
||||
" low=df[\"$low\"] / df[\"$factor\"],\n",
|
||||
" close=df[\"$close\"] / df[\"$factor\"],\n",
|
||||
" )\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"fig.show()"
|
||||
]
|
||||
},
|
||||
@@ -240,7 +262,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# dynamic universe\n",
|
||||
"universe = D.list_instruments(D.instruments('csi100'), start_time='2010-01-01', end_time='2020-12-31')\n",
|
||||
"universe = D.list_instruments(D.instruments(\"csi100\"), start_time=\"2010-01-01\", end_time=\"2020-12-31\")\n",
|
||||
"pprint(universe)"
|
||||
]
|
||||
},
|
||||
@@ -271,8 +293,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df = D.features(D.instruments('csi100'), ['$close'], start_time='2010-01-01', end_time='2020-12-31') \n",
|
||||
"df.groupby('datetime').size().plot()"
|
||||
"df = D.features(D.instruments(\"csi100\"), [\"$close\"], start_time=\"2010-01-01\", end_time=\"2020-12-31\")\n",
|
||||
"df.groupby(\"datetime\").size().plot()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -313,8 +335,7 @@
|
||||
" !cd ../../scripts/data_collector/pit/ && pip install -r requirements.txt\n",
|
||||
" !cd ../../scripts/data_collector/pit/ && python collector.py download_data --source_dir ~/.qlib/stock_data/source/pit --start 2000-01-01 --end 2020-01-01 --interval quarterly --symbol_regex \"^(600519|000725).*\"\n",
|
||||
" !cd ../../scripts/data_collector/pit/ && python collector.py normalize_data --interval quarterly --source_dir ~/.qlib/stock_data/source/pit --normalize_dir ~/.qlib/stock_data/source/pit_normalized\n",
|
||||
" !cd ../../scripts/ && python dump_pit.py dump --csv_path ~/.qlib/stock_data/source/pit_normalized --qlib_dir ~/.qlib/qlib_data/cn_data --interval quarterly\n",
|
||||
" pass"
|
||||
" !cd ../../scripts/ && python dump_pit.py dump --csv_path ~/.qlib/stock_data/source/pit_normalized --qlib_dir ~/.qlib/qlib_data/cn_data --interval quarterly"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -338,7 +359,13 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"instruments = [\"sh600519\"]\n",
|
||||
"data = D.features(instruments, ['P($$roewa_q)'], start_time=\"2019-01-01\", end_time=\"2019-07-19\", freq=\"day\")"
|
||||
"data = D.features(\n",
|
||||
" instruments,\n",
|
||||
" [\"P($$roewa_q)\"],\n",
|
||||
" start_time=\"2019-01-01\",\n",
|
||||
" end_time=\"2019-07-19\",\n",
|
||||
" freq=\"day\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -366,7 +393,10 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"D.features([\"sh600519\"], ['(EMA($close, 12) - EMA($close, 26))/$close - EMA((EMA($close, 12) - EMA($close, 26))/$close, 9)/$close'])"
|
||||
"D.features(\n",
|
||||
" [\"sh600519\"],\n",
|
||||
" [\"(EMA($close, 12) - EMA($close, 26))/$close - EMA((EMA($close, 12) - EMA($close, 26))/$close, 9)/$close\"],\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -418,7 +448,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"qdl = QlibDataLoader(config=(['$close / Ref($close, 10)'], ['RET10']))"
|
||||
"qdl = QlibDataLoader(config=([\"$close / Ref($close, 10)\"], [\"RET10\"]))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -428,7 +458,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"qdl.load(instruments=['sh600519'], start_time='20190101', end_time='20191231')"
|
||||
"qdl.load(instruments=[\"sh600519\"], start_time=\"20190101\", end_time=\"20191231\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -456,7 +486,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df = qdl.load(instruments=['sh600519'], start_time='20190101', end_time='20191231')"
|
||||
"df = qdl.load(instruments=[\"sh600519\"], start_time=\"20190101\", end_time=\"20191231\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -476,7 +506,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df.plot(kind='hist')"
|
||||
"df.plot(kind=\"hist\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -508,9 +538,16 @@
|
||||
"source": [
|
||||
"# NOTE: normally, the training & validation time range will be `fit_start_time` , `fit_end_time`\n",
|
||||
"# however,all the components are decomposed, so the training & validation time range is unknown when preprocessing.\n",
|
||||
"dh = DataHandlerLP(instruments=['sh600519'], start_time='20170101', end_time='20191231',\n",
|
||||
" infer_processors=[ZScoreNorm(fit_start_time='20170101', fit_end_time='20181231'), Fillna()],\n",
|
||||
" data_loader=qdl)"
|
||||
"dh = DataHandlerLP(\n",
|
||||
" instruments=[\"sh600519\"],\n",
|
||||
" start_time=\"20170101\",\n",
|
||||
" end_time=\"20191231\",\n",
|
||||
" infer_processors=[\n",
|
||||
" ZScoreNorm(fit_start_time=\"20170101\", fit_end_time=\"20181231\"),\n",
|
||||
" Fillna(),\n",
|
||||
" ],\n",
|
||||
" data_loader=qdl,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -550,7 +587,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df.plot(kind='hist')"
|
||||
"df.plot(kind=\"hist\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -586,7 +623,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ds = DatasetH(dh, segments={\"train\": ('20180101', '20181231'), \"valid\": ('20190101', '20191231')})"
|
||||
"ds = DatasetH(dh, segments={\"train\": (\"20180101\", \"20181231\"), \"valid\": (\"20190101\", \"20191231\")})"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -596,7 +633,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ds.prepare('train')"
|
||||
"ds.prepare(\"train\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -606,7 +643,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ds.prepare('valid')"
|
||||
"ds.prepare(\"valid\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -628,8 +665,12 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ds = TSDatasetH(step_len=10, handler=dh, segments={\"train\": ('20180101', '20181231'), \"valid\": ('20190101', '20191231')})\n",
|
||||
"train_sampler = ds.prepare('train')"
|
||||
"ds = TSDatasetH(\n",
|
||||
" step_len=10,\n",
|
||||
" handler=dh,\n",
|
||||
" segments={\"train\": (\"20180101\", \"20181231\"), \"valid\": (\"20190101\", \"20191231\")},\n",
|
||||
")\n",
|
||||
"train_sampler = ds.prepare(\"train\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -649,7 +690,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"train_sampler[0] # Retrieving the first example"
|
||||
"train_sampler[0] # Retrieving the first example"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -659,7 +700,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"train_sampler['2018-01-08', 'sh600519'] # get the time series by <'timestamp', 'instrument_id'> index"
|
||||
"train_sampler[\"2018-01-08\", \"sh600519\"] # get the time series by <'timestamp', 'instrument_id'> index"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -682,11 +723,11 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"handler_kwargs = {\n",
|
||||
" \"start_time\": \"2008-01-01\",\n",
|
||||
" \"end_time\": \"2020-08-01\",\n",
|
||||
" \"fit_start_time\": \"2008-01-01\",\n",
|
||||
" \"fit_end_time\": \"2014-12-31\",\n",
|
||||
" \"instruments\": MARKET,\n",
|
||||
" \"start_time\": \"2008-01-01\",\n",
|
||||
" \"end_time\": \"2020-08-01\",\n",
|
||||
" \"fit_start_time\": \"2008-01-01\",\n",
|
||||
" \"fit_end_time\": \"2014-12-31\",\n",
|
||||
" \"instruments\": MARKET,\n",
|
||||
"}\n",
|
||||
"handler_conf = {\n",
|
||||
" \"class\": \"Alpha158\",\n",
|
||||
@@ -735,6 +776,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from qlib.contrib.data.handler import Alpha158\n",
|
||||
"\n",
|
||||
"hd = Alpha158(**handler_kwargs)"
|
||||
]
|
||||
},
|
||||
@@ -826,7 +868,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"hd.process_type # appending type"
|
||||
"hd.process_type # appending type"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -857,16 +899,16 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset_conf = {\n",
|
||||
" \"class\": \"DatasetH\",\n",
|
||||
" \"module_path\": \"qlib.data.dataset\",\n",
|
||||
" \"kwargs\": {\n",
|
||||
" \"handler\": hd,\n",
|
||||
" \"segments\": {\n",
|
||||
" \"train\": (\"2008-01-01\", \"2014-12-31\"),\n",
|
||||
" \"valid\": (\"2015-01-01\", \"2016-12-31\"),\n",
|
||||
" \"test\": (\"2017-01-01\", \"2020-08-01\"),\n",
|
||||
" },\n",
|
||||
" \"class\": \"DatasetH\",\n",
|
||||
" \"module_path\": \"qlib.data.dataset\",\n",
|
||||
" \"kwargs\": {\n",
|
||||
" \"handler\": hd,\n",
|
||||
" \"segments\": {\n",
|
||||
" \"train\": (\"2008-01-01\", \"2014-12-31\"),\n",
|
||||
" \"valid\": (\"2015-01-01\", \"2016-12-31\"),\n",
|
||||
" \"test\": (\"2017-01-01\", \"2020-08-01\"),\n",
|
||||
" },\n",
|
||||
" },\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
@@ -908,7 +950,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = init_instance_by_config({\n",
|
||||
"model = init_instance_by_config(\n",
|
||||
" {\n",
|
||||
" \"class\": \"LGBModel\",\n",
|
||||
" \"module_path\": \"qlib.contrib.model.gbdt\",\n",
|
||||
" \"kwargs\": {\n",
|
||||
@@ -922,7 +965,8 @@
|
||||
" \"num_leaves\": 210,\n",
|
||||
" \"num_threads\": 20,\n",
|
||||
" },\n",
|
||||
"})"
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -938,7 +982,7 @@
|
||||
" R.save_objects(trained_model=model)\n",
|
||||
"\n",
|
||||
" rec = R.get_recorder()\n",
|
||||
" rid = rec.id # save the record id\n",
|
||||
" rid = rec.id # save the record id\n",
|
||||
"\n",
|
||||
" # Inference and saving signal\n",
|
||||
" sr = SignalRecord(model, dataset, rec)\n",
|
||||
@@ -1001,12 +1045,11 @@
|
||||
"\n",
|
||||
"# backtest and analysis\n",
|
||||
"with R.start(experiment_name=EXP_NAME, recorder_id=rid, resume=True):\n",
|
||||
"\n",
|
||||
" # signal-based analysis\n",
|
||||
" rec = R.get_recorder()\n",
|
||||
" sar = SigAnaRecord(rec)\n",
|
||||
" sar.generate()\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" # portfolio-based analysis: backtest\n",
|
||||
" par = PortAnaRecord(rec, port_analysis_config, \"day\")\n",
|
||||
" par.generate()"
|
||||
@@ -1137,7 +1180,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"label_df = dataset.prepare(\"test\", col_set=\"label\")\n",
|
||||
"label_df.columns = ['label']"
|
||||
"label_df.columns = [\"label\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user