1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-09 07:21:26 +08:00

Compare commits

..

3 Commits

Author SHA1 Message Date
Young
77ba7b4e91 Add analyser example and finetune example 2021-06-06 07:51:52 +00:00
Young
7a639eeea7 add IC and rank IC 2021-06-06 07:43:26 +00:00
Young
cddaf90ef5 monitor initial version 2021-06-06 07:43:26 +00:00
257 changed files with 4645 additions and 20460 deletions

View File

@@ -12,9 +12,8 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [windows-latest, macos-latest, macos-11]
# not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
python-version: [3.7, 3.8]
os: [windows-latest, macos-latest]
python-version: [3.6, 3.7, 3.8]
steps:
- uses: actions/checkout@v2
@@ -45,8 +44,7 @@ jobs:
- name: Build wheel on Linux
uses: RalfG/python-wheels-manylinux-build@v0.3.1-manylinux2010_x86_64
with:
# not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
python-versions: 'cp37-cp37m cp38-cp38'
python-versions: 'cp36-cp36m cp37-cp37m cp38-cp38'
build-requirements: 'numpy cython'
- name: Set up Python
uses: actions/setup-python@v2

View File

@@ -1,4 +1,4 @@
name: Test
name: Test
on:
push:
@@ -12,9 +12,8 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [windows-latest, ubuntu-18.04, ubuntu-20.04]
# not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
python-version: [3.7, 3.8]
os: [windows-latest, ubuntu-16.04, ubuntu-18.04, ubuntu-20.04, macos-latest]
python-version: [3.6, 3.7, 3.8, 3.9]
steps:
- uses: actions/checkout@v2
@@ -26,41 +25,96 @@ jobs:
- name: Lint with Black
run: |
pip install --upgrade pip
pip install black wheel
black qlib -l 120 --check --diff
cd ..
if [ "$RUNNER_OS" == "Windows" ]; then
$CONDA\\python.exe -m pip install black
$CONDA\\python.exe -m black qlib -l 120 --check --diff
else
sudo $CONDA/bin/python -m pip install black
$CONDA/bin/python -m black qlib -l 120 --check --diff
fi
shell: bash
# Test Qlib installed with pip
- name: Install Qlib with pip
run: |
pip install numpy==1.19.5 ruamel.yaml
pip install pyqlib --ignore-installed
if [ "$RUNNER_OS" == "Windows" ]; then
$CONDA\\python.exe -m pip install numpy==1.19.5
$CONDA\\python.exe -m pip install pyqlib --ignore-installed ruamel.yaml numpy --user
else
sudo $CONDA/bin/python -m pip install numpy==1.19.5
sudo $CONDA/bin/python -m pip install pyqlib --ignore-installed ruamel.yaml numpy
fi
shell: bash
- name: Install Lightgbm for MacOS
if: runner.os == 'macOS'
run: |
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Microsoft/qlib/main/.github/brew_install.sh)"
HOMEBREW_NO_AUTO_UPDATE=1 brew install lightgbm
- name: Test data downloads
run: |
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn
if [ "$RUNNER_OS" == "Windows" ]; then
$CONDA\\python.exe scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn
else
$CONDA/bin/python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn
fi
shell: bash
- name: Test workflow by config (install from pip)
run: |
python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
python -m pip uninstall -y pyqlib
# Test Qlib installed from source
if [ "$RUNNER_OS" == "Windows" ]; then
$CONDA\\python.exe qlib\\workflow\\cli.py examples\\benchmarks\\LightGBM\\workflow_config_lightgbm_Alpha158.yaml
$CONDA\\python.exe -m pip uninstall -y pyqlib
else
$CONDA/bin/python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
sudo $CONDA/bin/python -m pip uninstall -y pyqlib
fi
shell: bash
# Test Qlib installed from source
- name: Install Qlib from source
run: |
pip install --upgrade cython jupyter jupyter_contrib_nbextensions numpy scipy scikit-learn # installing without this line will cause errors on GitHub Actions, while instsalling locally won't
pip install -e .
if [ "$RUNNER_OS" == "Windows" ]; then
$CONDA\\python.exe -m pip install --upgrade cython
$CONDA\\python.exe -m pip install numpy jupyter jupyter_contrib_nbextensions
$CONDA\\python.exe -m pip install -U scipy scikit-learn # installing without this line will cause errors on GitHub Actions, while instsalling locally won't
$CONDA\\python.exe setup.py install
else
sudo $CONDA/bin/python -m pip install --upgrade cython
sudo $CONDA/bin/python -m pip install numpy jupyter jupyter_contrib_nbextensions
sudo $CONDA/bin/python -m pip install -U scipy scikit-learn # installing without this line will cause errors on GitHub Actions, while instsalling locally won't
sudo $CONDA/bin/python setup.py install
fi
shell: bash
- name: Install test dependencies
run: |
pip install --upgrade pip
pip install black pytest
if [ "$RUNNER_OS" == "Windows" ]; then
$CONDA\\python.exe -m pip install --upgrade pip
$CONDA\\python.exe -m pip install black pytest
else
sudo $CONDA/bin/python -m pip install --upgrade pip
sudo $CONDA/bin/python -m pip install black pytest
fi
shell: bash
- name: Unit tests with Pytest
run: |
cd tests
python -m pytest . --durations=10
if [ "$RUNNER_OS" == "Windows" ]; then
$CONDA\\python.exe -m pytest . --durations=0
else
$CONDA/bin/python -m pytest . --durations=0
fi
shell: bash
- name: Test workflow by config (install from source)
run: |
python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
if [ "$RUNNER_OS" == "Windows" ]; then
$CONDA\\python.exe qlib\\workflow\\cli.py examples\\benchmarks\\LightGBM\\workflow_config_lightgbm_Alpha158.yaml
else
$CONDA/bin/python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
fi
shell: bash

View File

@@ -1,75 +0,0 @@
# There are some issues (in the downloading data phase) on MacOS when running with other tests. So we split it into an individual config.
name: Test MacOS
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [macos-11, macos-latest]
# not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
python-version: [3.7, 3.8]
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Lint with Black
run: |
cd ..
python -m pip install pip --upgrade
python -m pip install wheel --upgrade
python -m pip install black
python -m black qlib -l 120 --check --diff
# Test Qlib installed with pip
- name: Install Qlib with pip
run: |
python -m pip install numpy==1.19.5
python -m pip install pyqlib --ignore-installed ruamel.yaml numpy
- name: Install Lightgbm for MacOS
run: |
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Microsoft/qlib/main/.github/brew_install.sh)"
HOMEBREW_NO_AUTO_UPDATE=1 brew install lightgbm
# FIX MacOS error: Segmentation fault
# reference: https://github.com/microsoft/LightGBM/issues/4229
wget https://raw.githubusercontent.com/Homebrew/homebrew-core/fb8323f2b170bd4ae97e1bac9bf3e2983af3fdb0/Formula/libomp.rb
brew unlink libomp
brew install libomp.rb
- name: Test data downloads
run: |
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn
- name: Test workflow by config (install from pip)
run: |
python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
python -m pip uninstall -y pyqlib
# Test Qlib installed from source
- name: Install Qlib from source
run: |
python -m pip install --upgrade cython
python -m pip install numpy jupyter jupyter_contrib_nbextensions
python -m pip install -U scipy scikit-learn # installing without this line will cause errors on GitHub Actions, while instsalling locally won't
python setup.py install
- name: Install test dependencies
run: |
python -m pip install --upgrade pip
python -m pip install -U pyopenssl idna
python -m pip install black pytest
- name: Unit tests with Pytest
run: |
cd tests
python -m pytest . --durations=0
- name: Test workflow by config (install from source)
run: |
python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml

1
.gitignore vendored
View File

@@ -20,7 +20,6 @@ dist/
.nvimrc
.vscode
qlib/VERSION.txt
qlib/data/_libs/expanding.cpp
qlib/data/_libs/rolling.cpp
examples/estimator/estimator_example/

View File

@@ -159,21 +159,6 @@ Version 0.5.0
- Add baselines
- public data crawler
Version 0.8.0
--------------------
- The backtest is greatly refactored.
- Nested decision execution framework is supported
- There are lots of changes for daily trading, it is hard to list all of them. But a few important changes could be noticed
- The trading limitation is more accurate;
- In `previous version <https://github.com/microsoft/qlib/blob/v0.7.2/qlib/contrib/backtest/exchange.py#L160>`_, longing and shorting actions share the same action.
- In `current verison <https://github.com/microsoft/qlib/blob/7c31012b507a3823117bddcc693fc64899460b2a/qlib/backtest/exchange.py#L304>`_, the trading limitation is different between loging and shorting action.
- The constant is different when calculating annualized metrics.
- `Current version <https://github.com/microsoft/qlib/blob/7c31012b507a3823117bddcc693fc64899460b2a/qlib/contrib/evaluate.py#L42>`_ uses more accurate constant than `previous version <https://github.com/microsoft/qlib/blob/v0.7.2/qlib/contrib/evaluate.py#L22>`_
- `A new version <https://github.com/microsoft/qlib/blob/7c31012b507a3823117bddcc693fc64899460b2a/qlib/tests/data.py#L17>`_ of data is released. Due to the unstability of Yahoo data source, the data may be different after downloading data again.
- Users could chec kout the backtesting results between `Current version <https://github.com/microsoft/qlib/tree/7c31012b507a3823117bddcc693fc64899460b2a/examples/benchmarks>`_ and `previous version <https://github.com/microsoft/qlib/tree/v0.7.2/examples/benchmarks>`_
Other Versions
Version greater than Version 0.5.0
----------------------------------
Please refer to `Github release Notes <https://github.com/microsoft/qlib/releases>`_

View File

@@ -1 +0,0 @@
include qlib/VERSION.txt

View File

@@ -11,10 +11,6 @@
Recent released features
| Feature | Status |
| -- | ------ |
|Temporal Routing Adaptor (TRA) | [Released](https://github.com/microsoft/qlib/pull/531) on July 30, 2021 |
| Transformer & Localformer | [Released](https://github.com/microsoft/qlib/pull/508) on July 22, 2021 |
| Release Qlib v0.7.0 | [Released](https://github.com/microsoft/qlib/releases/tag/v0.7.0) on July 12, 2021 |
| TCTS Model | [Released](https://github.com/microsoft/qlib/pull/491) on July 1, 2021 |
| Online serving and automatic model rolling | :star: [Released](https://github.com/microsoft/qlib/pull/290) on May 17, 2021 |
| DoubleEnsemble Model | [Released](https://github.com/microsoft/qlib/pull/286) on Mar 2, 2021 |
| High-frequency data processing example | [Released](https://github.com/microsoft/qlib/pull/257) on Feb 5, 2021 |
@@ -24,8 +20,10 @@ Recent released features
Features released before 2021 are not listed here.
<p align="center">
<img src="http://fintech.msra.cn/images_v070/logo/1.png" />
<img src="http://fintech.msra.cn/images_v060/logo/1.png" />
</p>
@@ -44,7 +42,7 @@ For more details, please refer to our paper ["Qlib: An AI-oriented Quantitative
- [Data Preparation](#data-preparation)
- [Auto Quant Research Workflow](#auto-quant-research-workflow)
- [Building Customized Quant Research Workflow by Code](#building-customized-quant-research-workflow-by-code)
- [**Quant Model(Paper) Zoo**](#quant-model-paper-zoo)
- [**Quant Model Zoo**](#quant-model-zoo)
- [Run a single model](#run-a-single-model)
- [Run multiple models](#run-multiple-models)
- [**Quant Dataset Zoo**](#quant-dataset-zoo)
@@ -70,7 +68,7 @@ Your feedbacks about the features are very important.
# Framework of Qlib
<div style="align: center">
<img src="docs/_static/img/framework.svg" />
<img src="http://fintech.msra.cn/images_v060/framework.png?v=0.1" />
</div>
@@ -100,14 +98,14 @@ Here is a quick **[demo](https://terminalizer.com/view/3f24561a4470)** shows how
This table demonstrates the supported Python version of `Qlib`:
| | install with pip | install from source | plot |
| ------------- |:---------------------:|:--------------------:|:----:|
| Python 3.6 | :heavy_check_mark: | :heavy_check_mark: (only with `Anaconda`) | :heavy_check_mark: |
| Python 3.7 | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
| Python 3.8 | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
| Python 3.9 | :x: | :heavy_check_mark: | :x: |
**Note**:
1. **Conda** is suggested for managing your Python environment.
1. Please pay attention that installing cython in Python 3.6 will raise some error when installing ``Qlib`` from source. If users use Python 3.6 on their machines, it is recommended to *upgrade* Python to version 3.7 or use `conda`'s Python to install ``Qlib`` from source.
1. For Python 3.9, `Qlib` supports running workflows such as training models, doing backtest and plot most of the related figures (those included in [notebook](examples/workflow_by_code.ipynb)). However, plotting for the *model performance* is not supported for now and we will fix this when the dependent packages are upgraded in the future.
2. For Python 3.9, `Qlib` supports running workflows such as training models, doing backtest and plot most of the related figures (those included in [notebook](examples/workflow_by_code.ipynb)). However, plotting for the *model performance* is not supported for now and we will fix this when the dependent packages are upgraded in the future.
### Install with pip
Users can easily install ``Qlib`` by pip according to the following command.
@@ -161,28 +159,6 @@ Users could create the same dataset with it.
*Please pay **ATTENTION** that the data is collected from [Yahoo Finance](https://finance.yahoo.com/lookup), and the data might not be perfect.
We recommend users to prepare their own data if they have a high-quality dataset. For more information, users can refer to the [related document](https://qlib.readthedocs.io/en/latest/component/data.html#converting-csv-format-into-qlib-format)*.
### Automatic update of daily frequency data (from yahoo finance)
> It is recommended that users update the data manually once (--trading_date 2021-05-25) and then set it to update automatically.
> For more information refer to: [yahoo collector](https://github.com/microsoft/qlib/tree/main/scripts/data_collector/yahoo#automatic-update-of-daily-frequency-datafrom-yahoo-finance)
* Automatic update of data to the "qlib" directory each trading day(Linux)
* use *crontab*: `crontab -e`
* set up timed tasks:
```
* * * * 1-5 python <script path> update_data_to_bin --qlib_data_1d_dir <user data dir>
```
* **script path**: *scripts/data_collector/yahoo/collector.py*
* Manual update of data
```
python scripts/data_collector/yahoo/collector.py update_data_to_bin --qlib_data_1d_dir <user data dir> --trading_date <start date> --end_date <end date>
```
* *trading_date*: start of trading day
* *end_date*: end of trading day(not included)
<!--
- Run the initialization code and get stock data:
@@ -246,19 +222,19 @@ Qlib provides a tool named `qrun` to run the whole workflow automatically (inclu
2. Graphical Reports Analysis: Run `examples/workflow_by_code.ipynb` with `jupyter notebook` to get graphical reports
- Forecasting signal (model prediction) analysis
- Cumulative Return of groups
![Cumulative Return](http://fintech.msra.cn/images_v070/analysis/analysis_model_cumulative_return.png?v=0.1)
![Cumulative Return](http://fintech.msra.cn/images_v060/analysis/analysis_model_cumulative_return.png?v=0.1)
- Return distribution
![long_short](http://fintech.msra.cn/images_v070/analysis/analysis_model_long_short.png?v=0.1)
![long_short](http://fintech.msra.cn/images_v060/analysis/analysis_model_long_short.png?v=0.1)
- Information Coefficient (IC)
![Information Coefficient](http://fintech.msra.cn/images_v070/analysis/analysis_model_IC.png?v=0.1)
![Monthly IC](http://fintech.msra.cn/images_v070/analysis/analysis_model_monthly_IC.png?v=0.1)
![IC](http://fintech.msra.cn/images_v070/analysis/analysis_model_NDQ.png?v=0.1)
![Information Coefficient](http://fintech.msra.cn/images_v060/analysis/analysis_model_IC.png?v=0.1)
![Monthly IC](http://fintech.msra.cn/images_v060/analysis/analysis_model_monthly_IC.png?v=0.1)
![IC](http://fintech.msra.cn/images_v060/analysis/analysis_model_NDQ.png?v=0.1)
- Auto Correlation of forecasting signal (model prediction)
![Auto Correlation](http://fintech.msra.cn/images_v070/analysis/analysis_model_auto_correlation.png?v=0.1)
![Auto Correlation](http://fintech.msra.cn/images_v060/analysis/analysis_model_auto_correlation.png?v=0.1)
- Portfolio analysis
- Backtest return
![Report](http://fintech.msra.cn/images_v070/analysis/report.png?v=0.1)
![Report](http://fintech.msra.cn/images_v060/analysis/report.png?v=0.1)
<!--
- Score IC
![Score IC](docs/_static/img/score_ic.png)
@@ -275,25 +251,21 @@ Qlib provides a tool named `qrun` to run the whole workflow automatically (inclu
The automatic workflow may not suit the research workflow of all Quant researchers. To support a flexible Quant research workflow, Qlib also provides a modularized interface to allow researchers to build their own workflow by code. [Here](examples/workflow_by_code.ipynb) is a demo for customized Quant research workflow by code.
# [Quant Model (Paper) Zoo](examples/benchmarks)
# [Quant Model Zoo](examples/benchmarks)
Here is a list of models built on `Qlib`.
- [GBDT based on XGBoost (Tianqi Chen, et al. KDD 2016)](qlib/contrib/model/xgboost.py)
- [GBDT based on LightGBM (Guolin Ke, et al. NIPS 2017)](qlib/contrib/model/gbdt.py)
- [GBDT based on Catboost (Liudmila Prokhorenkova, et al. NIPS 2018)](qlib/contrib/model/catboost_model.py)
- [GBDT based on XGBoost (Tianqi Chen, et al. 2016)](qlib/contrib/model/xgboost.py)
- [GBDT based on LightGBM (Guolin Ke, et al. 2017)](qlib/contrib/model/gbdt.py)
- [GBDT based on Catboost (Liudmila Prokhorenkova, et al. 2017)](qlib/contrib/model/catboost_model.py)
- [MLP based on pytorch](qlib/contrib/model/pytorch_nn.py)
- [LSTM based on pytorch (Sepp Hochreiter, et al. Neural omputation 1997)](qlib/contrib/model/pytorch_lstm.py)
- [LSTM based on pytorch (Sepp Hochreiter, et al. 1997)](qlib/contrib/model/pytorch_lstm.py)
- [GRU based on pytorch (Kyunghyun Cho, et al. 2014)](qlib/contrib/model/pytorch_gru.py)
- [ALSTM based on pytorch (Yao Qin, et al. IJCAI 2017)](qlib/contrib/model/pytorch_alstm.py)
- [ALSTM based on pytorch (Yao Qin, et al. 2017)](qlib/contrib/model/pytorch_alstm.py)
- [GATs based on pytorch (Petar Velickovic, et al. 2017)](qlib/contrib/model/pytorch_gats.py)
- [SFM based on pytorch (Liheng Zhang, et al. KDD 2017)](qlib/contrib/model/pytorch_sfm.py)
- [TFT based on tensorflow (Bryan Lim, et al. International Journal of Forecasting 2019)](examples/benchmarks/TFT/tft.py)
- [TabNet based on pytorch (Sercan O. Arik, et al. AAAI 2019)](qlib/contrib/model/pytorch_tabnet.py)
- [DoubleEnsemble based on LightGBM (Chuheng Zhang, et al. ICDM 2020)](qlib/contrib/model/double_ensemble.py)
- [TCTS based on pytorch (Xueqing Wu, et al. ICML 2021)](qlib/contrib/model/pytorch_tcts.py)
- [Transformer based on pytorch (Ashish Vaswani, et al. NeurIPS 2017)](qlib/contrib/model/pytorch_transformer.py)
- [Localformer based on pytorch (Juyong Jiang, et al.)](qlib/contrib/model/pytorch_localformer.py)
- [TRA based on pytorch (Hengxu, Dong, et al. KDD 2021)](qlib/contrib/model/pytorch_tra.py)
- [SFM based on pytorch (Liheng Zhang, et al. 2017)](qlib/contrib/model/pytorch_sfm.py)
- [TFT based on tensorflow (Bryan Lim, et al. 2019)](examples/benchmarks/TFT/tft.py)
- [TabNet based on pytorch (Sercan O. Arik, et al. 2019)](qlib/contrib/model/pytorch_tabnet.py)
- [DoubleEnsemble based on LightGBM (Chuheng Zhang, et al. 2020)](qlib/contrib/model/double_ensemble.py)
Your PR of new Quant models is highly welcomed.
@@ -306,17 +278,16 @@ All the models listed above are runnable with ``Qlib``. Users can find the confi
- Users can use the tool `qrun` mentioned above to run a model's workflow based from a config file.
- Users can create a `workflow_by_code` python script based on the [one](examples/workflow_by_code.py) listed in the `examples` folder.
- Users can use the script [`run_all_model.py`](examples/run_all_model.py) listed in the `examples` folder to run a model. Here is an example of the specific shell command to be used: `python run_all_model.py run --models=lightgbm`, where the `--models` arguments can take any number of models listed above(the available models can be found in [benchmarks](examples/benchmarks/)). For more use cases, please refer to the file's [docstrings](examples/run_all_model.py).
- **NOTE**: Each baseline has different environment dependencies, please make sure that your python version aligns with the requirements(e.g. TFT only supports Python 3.6~3.7 due to the limitation of `tensorflow==1.15.0`)
- Users can use the script [`run_all_model.py`](examples/run_all_model.py) listed in the `examples` folder to run a model. Here is an example of the specific shell command to be used: `python run_all_model.py --models=lightgbm`, where the `--models` arguments can take any number of models listed above(the available models can be found in [benchmarks](examples/benchmarks/)). For more use cases, please refer to the file's [docstrings](examples/run_all_model.py).
## Run multiple models
`Qlib` also provides a script [`run_all_model.py`](examples/run_all_model.py) which can run multiple models for several iterations. (**Note**: the script only support *Linux* for now. Other OS will be supported in the future. Besides, it doesn't support parallel running the same model for multiple times as well, and this will be fixed in the future development too.)
`Qlib` also provides a script [`run_all_model.py`](examples/run_all_model.py) which can run multiple models for several iterations. (**Note**: the script only support *Linux* for now. Other OS will be supported in the future. Besides, it doesn't support parrallel running the same model for multiple times as well, and this will be fixed in the future development too.)
The script will create a unique virtual environment for each model, and delete the environments after training. Thus, only experiment results such as `IC` and `backtest` results will be generated and stored.
Here is an example of running all the models for 10 iterations:
```python
python run_all_model.py run 10
python run_all_model.py 10
```
It also provides the API to run specific models at once. For more use cases, please refer to the file's [docstrings](examples/run_all_model.py).
@@ -375,7 +346,9 @@ Such overheads greatly slow down the data loading process.
Qlib data are stored in a compact format, which is efficient to be combined into arrays for scientific computation.
# Related Reports
- [【华泰金工林晓明团队】图神经网络选股与Qlib实践——华泰人工智能系列之四十二](https://mp.weixin.qq.com/s/w5fDB6oAv9dO6vlhf1kmhA)
- [Guide To Qlib: Microsofts AI Investment Platform](https://analyticsindiamag.com/qlib/)
- [【华泰金工林晓明团队】微软AI量化投资平台Qlib体验——华泰人工智能系列之四十](https://mp.weixin.qq.com/s/Brcd7im4NibJOJzZfMn6tQ)
- [微软也搞AI量化平台还是开源的](https://mp.weixin.qq.com/s/47bP5YwxfTp2uTHjUBzJQQ)
- [微矿Qlib业内首个AI量化投资开源平台](https://mp.weixin.qq.com/s/vsJv7lsgjEi-ALYUz4CvtQ)
@@ -388,21 +361,11 @@ Qlib data are stored in a compact format, which is efficient to be combined into
Join IM discussion groups:
|[Gitter](https://gitter.im/Microsoft/qlib)|
|----|
|![image](http://fintech.msra.cn/images_v070/qrcode/gitter_qr.png)|
|![image](http://fintech.msra.cn/images_v060/qrcode/gitter_qr.png)|
# Contributing
This project welcomes contributions and suggestions.
**Here are some
[code standards](docs/developer/code_standard.rst) when you submit a pull request.**
If you want to contribute to Qlib's document, you can follow the steps in the figure below.
<p align="center">
<img src="https://github.com/demon143/qlib/blob/main/docs/_static/img/change%20doc.gif" />
</p>
Most contributions require you to agree to a
This project welcomes contributions and suggestions. Most contributions require you to agree to a
Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
the right to use your contribution. For details, visit https://cla.opensource.microsoft.com.

View File

@@ -1 +0,0 @@
0.7.2.99

View File

@@ -97,57 +97,4 @@ Also, feel free to post a new issue in our GitHub repository. We always check ea
python setup.py build_ext --inplace
- If the error occurs when importing ``qlib`` package with command ``python`` , users need to change the running directory to ensure that the script does not run in the project directory.
4. BadNamespaceError: / is not a connected namespace
------------------------------------------------------------------------------------------------------------------------------------
.. code-block:: python
File "qlib_online.py", line 35, in <module>
cal = D.calendar()
File "e:\code\python\microsoft\qlib_latest\qlib\qlib\data\data.py", line 973, in calendar
return Cal.calendar(start_time, end_time, freq, future=future)
File "e:\code\python\microsoft\qlib_latest\qlib\qlib\data\data.py", line 798, in calendar
self.conn.send_request(
File "e:\code\python\microsoft\qlib_latest\qlib\qlib\data\client.py", line 101, in send_request
self.sio.emit(request_type + "_request", request_content)
File "G:\apps\miniconda\envs\qlib\lib\site-packages\python_socketio-5.3.0-py3.8.egg\socketio\client.py", line 369, in emit
raise exceptions.BadNamespaceError(
BadNamespaceError: / is not a connected namespace.
- The version of ``python-socketio`` in qlib needs to be the same as the version of ``python-socketio`` in qlib-server:
.. code-block:: bash
pip install -U python-socketio==<qlib-server python-socketio version>
5. TypeError: send() got an unexpected keyword argument 'binary'
------------------------------------------------------------------------------------------------------------------------------------
.. code-block:: python
File "qlib_online.py", line 35, in <module>
cal = D.calendar()
File "e:\code\python\microsoft\qlib_latest\qlib\qlib\data\data.py", line 973, in calendar
return Cal.calendar(start_time, end_time, freq, future=future)
File "e:\code\python\microsoft\qlib_latest\qlib\qlib\data\data.py", line 798, in calendar
self.conn.send_request(
File "e:\code\python\microsoft\qlib_latest\qlib\qlib\data\client.py", line 101, in send_request
self.sio.emit(request_type + "_request", request_content)
File "G:\apps\miniconda\envs\qlib\lib\site-packages\socketio\client.py", line 263, in emit
self._send_packet(packet.Packet(packet.EVENT, namespace=namespace,
File "G:\apps\miniconda\envs\qlib\lib\site-packages\socketio\client.py", line 339, in _send_packet
self.eio.send(ep, binary=binary)
TypeError: send() got an unexpected keyword argument 'binary'
- The ``python-engineio`` version needs to be compatible with the ``python-socketio`` version, reference: https://github.com/miguelgrinberg/python-socketio#version-compatibility
.. code-block:: bash
pip install -U python-engineio==<compatible python-socketio version>
# or
pip install -U python-socketio==3.1.2 python-engineio==3.13.2
- If the error occurs when importing ``qlib`` package with command ``python`` , users need to change the running directory to ensure that the script does not run in the project directory.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 37 KiB

After

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 23 KiB

After

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 44 KiB

After

Width:  |  Height:  |  Size: 47 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 53 KiB

After

Width:  |  Height:  |  Size: 63 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 16 KiB

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 144 KiB

After

Width:  |  Height:  |  Size: 160 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 45 KiB

After

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 10 KiB

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 52 KiB

After

Width:  |  Height:  |  Size: 54 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 48 KiB

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 44 KiB

After

Width:  |  Height:  |  Size: 47 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 93 KiB

After

Width:  |  Height:  |  Size: 102 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 208 KiB

After

Width:  |  Height:  |  Size: 271 KiB

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 98 KiB

View File

@@ -30,7 +30,7 @@ The simple example of the default strategy is as follows.
from qlib.contrib.evaluate import backtest
# pred_score is the prediction score
report, positions = backtest(pred_score, topk=50, n_drop=0.5, limit_threshold=0.0095)
report, positions = backtest(pred_score, topk=50, n_drop=0.5, verbose=False, limit_threshold=0.0095)
To know more about backtesting with a specific ``Strategy``, please refer to `Portfolio Strategy <strategy.html>`_.

View File

@@ -67,34 +67,6 @@ After running the above command, users can find china-stock and us-stock data in
When ``Qlib`` is initialized with this dataset, users could build and evaluate their own models with it. Please refer to `Initialization <../start/initialization.html>`_ for more details.
Automatic update of daily frequency data
----------------------------------------
**It is recommended that users update the data manually once (\-\-trading_date 2021-05-25) and then set it to update automatically.**
For more information refer to: `yahoo collector <https://github.com/microsoft/qlib/tree/main/scripts/data_collector/yahoo#Automatic-update-of-daily-frequency-data>`_
- Automatic update of data to the "qlib" directory each trading day(Linux)
- use *crontab*: `crontab -e`
- set up timed tasks:
.. code-block:: bash
* * * * 1-5 python <script path> update_data_to_bin --qlib_data_1d_dir <user data dir>
- **script path**: *scripts/data_collector/yahoo/collector.py*
- Manual update of data
.. code-block:: bash
python scripts/data_collector/yahoo/collector.py update_data_to_bin --qlib_data_1d_dir <user data dir> --trading_date <start date> --end_date <end date>
- *trading_date*: start of trading day
- *end_date*: end of trading day(not included)
Converting CSV Format into Qlib Format
-------------------------------------------
@@ -179,7 +151,6 @@ After conversion, users can find their Qlib format data in the directory `~/.qli
The Restoration factor. Normally, ``factor = adjusted_price / original_price``, `adjusted price` reference: `split adjusted <https://www.investopedia.com/terms/s/splitadjusted.asp>`_
In the convention of `Qlib` data processing, `open, close, high, low, volume, money and factor` will be set to NaN if the stock is suspended.
If you want to use your own alpha-factor which can't be calculate by OCHLV, like PE, EPS and so on, you could add it to the CSV files with OHCLV together and then dump it to the Qlib format data.
Stock Pool (Market)
--------------------------------

View File

@@ -1,120 +0,0 @@
.. _highfreq:
============================================
Design of hierarchical order execution framework
============================================
.. currentmodule:: qlib
Introduction
===================
In order to support reinforcement learning algorithms for high-frequency trading, a corresponding framework is required. None of the publicly available high-frequency trading frameworks now consider multi-layer trading mechanisms, and the currently designed algorithms cannot directly use existing frameworks.
In addition to supporting the basic intraday multi-layer trading, the linkage with the day-ahead strategy is also a factor that affects the performance evaluation of the strategy. Different day strategies generate different order distributions and different patterns on different stocks. To verify that high-frequency trading strategies perform well on real trading orders, it is necessary to support day-frequency and high-frequency multi-level linkage trading. In addition to more accurate backtesting of high-frequency trading algorithms, if the distribution of day-frequency orders is considered when training a high-frequency trading model, the algorithm can also be optimized more for product-specific day-frequency orders.
Therefore, innovation in the high-frequency trading framework is necessary to solve the various problems mentioned above, for which we designed a hierarchical order execution framework that can link daily-frequency and intra-day trading at different granularities.
.. image:: ../_static/img/framework.svg
The design of the framework is shown in the figure above. At each layer consists of Trading Agent and Execution Env. The Trading Agent has its own data processing module (Information Extractor), forecasting module (Forecast Model) and decision generator (Decision Generator). The trading algorithm generates the corresponding decisions by the Decision Generator based on the forecast signals output by the Forecast Module, and the decisions generated by the trading algorithm are passed to the Execution Env, which returns the execution results. Here the frequency of trading algorithm, decision content and execution environment can be customized by users (e.g. intra-day trading, daily-frequency trading, weekly-frequency trading), and the execution environment can be nested with finer-grained trading algorithm and execution environment inside (i.e. sub-workflow in the figure, e.g. daily-frequency orders can be turned into finer-grained decisions by splitting orders within the day). The hierarchical order execution framework is user-defined in terms of hierarchy division and decision frequency, making it easy for users to explore the effects of combining different levels of trading algorithms and breaking down the barriers between different levels of trading algorithm optimization.
In addition to the innovation in the framework, the hierarchical order execution framework also takes into account various details of the real backtesting environment, minimizing the differences with the final real environment as much as possible. At the same time, the framework is designed to unify the interface between online and offline (e.g. data pre-processing level supports using the same set of code to process both offline and online data) to reduce the cost of strategy go-live as much as possible.
Prepare Data
===================
.. _data:: ../../examples/highfreq/README.md
Example
===========================
Here is an example of highfreq execution.
.. code-block:: python
import qlib
# init qlib
provider_uri_day = "~/.qlib/qlib_data/cn_data"
provider_uri_1min = "~/.qlib/qlib_data/cn_data_1min"
provider_uri_map = {"1min": provider_uri_1min, "day": provider_uri_day}
qlib.init(provider_uri=provider_uri_day, expression_cache=None, dataset_cache=None)
# data freq and backtest time
freq = "1min"
inst_list = D.list_instruments(D.instruments("all"), as_list=True)
start_time = "2020-01-01"
start_time = "2020-01-31"
When initializing qlib, if the default data is used, then both daily and minute frequency data need to be passed in.
.. code-block:: python
# random order strategy config
strategy_config = {
"class": "RandomOrderStrategy",
"module_path": "qlib.contrib.strategy.rule_strategy",
"kwargs": {
"trade_range": TradeRangeByTime("9:30", "15:00"),
"sample_ratio": 1.0,
"volume_ratio": 0.01,
"market": market,
},
}
.. code-block:: python
# backtest config
backtest_config = {
"start_time": start_time,
"end_time": end_time,
"account": 100000000,
"benchmark": None,
"exchange_kwargs": {
"freq": freq,
"limit_threshold": 0.095,
"deal_price": "close",
"open_cost": 0.0005,
"close_cost": 0.0015,
"min_cost": 5,
"codes": market,
},
"pos_type": "InfPosition", # Position with infinitive position
}
please refer to "../../qlib/backtest".
.. code-block:: python
# excutor config
executor_config = {
"class": "NestedExecutor",
"module_path": "qlib.backtest.executor",
"kwargs": {
"time_per_step": "day",
"inner_executor": {
"class": "SimulatorExecutor",
"module_path": "qlib.backtest.executor",
"kwargs": {
"time_per_step": freq,
"generate_portfolio_metrics": True,
"verbose": False,
# "verbose": True,
"indicator_config": {
"show_indicator": False,
},
},
},
"inner_strategy": {
"class": "TWAPStrategy",
"module_path": "qlib.contrib.strategy.rule_strategy",
},
"track_data": True,
"generate_portfolio_metrics": True,
"indicator_config": {
"show_indicator": True,
},
},
}
NestedExecutor represents not the innermost layer, the initialization parameters should contain inner_executor and inner_strategy. simulatorExecutor represents the current excutor is the innermost layer, the innermost strategy used here is the TWAP strategy, the framework currently also supports the VWAP strategy
.. code-block:: python
# backtest
portfolio_metrics_dict, indicator_dict = backtest(executor=executor_config, strategy=strategy_config, **backtest_config)
The metrics of backtest are included in the portfolio_metrics_dict and indicator_dict.

View File

@@ -21,8 +21,6 @@ which including `Online Manager <#Online Manager>`_, `Online Strategy <#Online S
If you have many models or `task` needs to be managed, please consider `Task Management <../advanced/task_management.html>`_.
The `examples <https://github.com/microsoft/qlib/tree/main/examples/online_srv>`_ are based on some components in `Task Management <../advanced/task_management.html>`_ such as ``TrainerRM`` or ``Collector``.
**NOTE**: User should keep his data source updated to support online serving. For example, Qlib provides `a batch of scripts <https://github.com/microsoft/qlib/blob/main/scripts/data_collector/yahoo/README.md#automatic-update-of-daily-frequency-datafrom-yahoo-finance>`_ to help users update Yahoo daily data.
Online Manager
=============
@@ -45,4 +43,4 @@ Updater
=============
.. automodule:: qlib.workflow.online.update
:members:
:members:

View File

@@ -123,6 +123,7 @@ Here is a simple exampke of what is done in ``PortAnaRecord``, which users can r
"n_drop": 5,
}
BACKTEST_CONFIG = {
"verbose": False,
"limit_threshold": 0.095,
"account": 100000000,
"benchmark": BENCHMARK,

View File

@@ -93,6 +93,7 @@ Usage & Example
"n_drop": 5,
}
BACKTEST_CONFIG = {
"verbose": False,
"limit_threshold": 0.095,
"account": 100000000,
"benchmark": BENCHMARK,

View File

@@ -53,10 +53,8 @@ Below is a typical config file of ``qrun``.
kwargs:
topk: 50
n_drop: 5
signal:
- <MODEL>
- <DATASET>
backtest:
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
@@ -92,12 +90,12 @@ Below is a typical config file of ``qrun``.
test: [2017-01-01, 2020-08-01]
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs: {}
module_path: qlib.workflow.record_temp
kwargs: {}
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config
After saving the config into `configuration.yaml`, users could start the workflow and test their ideas with a single command below.
@@ -144,7 +142,7 @@ The meaning of each field is as follows:
- `region`
- If `region` == "us", ``Qlib`` will be initialized in US-stock mode.
- If `region` == "cn", ``Qlib`` will be initialized in China-stock mode.
- If `region` == "cn", ``Qlib`` will be initialized in china-stock mode.
.. note::
@@ -243,10 +241,8 @@ The following script is the configuration of `backtest` and the `strategy` used
kwargs:
topk: 50
n_drop: 5
signal:
- <MODEL>
- <DATASET>
backtest:
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark

View File

@@ -1,22 +0,0 @@
.. _code_standard:
=================================
Code Standard
=================================
Docstring
=================================
Please use the `Numpydoc Style <https://stackoverflow.com/a/24385103>`_.
Continuous Integration
=================================
Continuous Integration (CI) tools help you stick to the quality standards by running tests every time you push a new commit and reporting the results to a pull request.
When you submit a PR request, you can check whether your code passes the CI tests in the "check" section at the bottom of the web page.
A common error is the mixed use of space and tab. You can fix the bug by inputing the following code in the command line.
.. code-block:: python
pip install black
python -m black . -l 120

View File

@@ -93,6 +93,7 @@ We write a simple configuration example as following,
fend_time: 2018-12-11
backtest:
normal_backtest_args:
verbose: False
limit_threshold: 0.095
account: 500000
benchmark: SH000905
@@ -305,6 +306,7 @@ About the data and backtest
fend_time: 2018-12-11
backtest:
normal_backtest_args:
verbose: False
limit_threshold: 0.095
account: 500000
benchmark: SH000905

View File

@@ -15,7 +15,7 @@ With ``Qlib``, users can easily try their ideas to create better Quant investmen
Framework
===================
.. image:: ../_static/img/framework.svg
.. image:: ../_static/img/framework.png
:align: center

View File

@@ -241,7 +241,6 @@ Online Tool
.. automodule:: qlib.workflow.online.utils
:members:
RecordUpdater
--------------------
.. automodule:: qlib.workflow.online.update
@@ -258,4 +257,4 @@ Serializable
:members:

View File

@@ -77,8 +77,7 @@ Besides `provider_uri` and `region`, `qlib.init` has other parameters. The follo
})
- `mongo`
Type: dict, optional parameter, the setting of `MongoDB <https://www.mongodb.com/>`_ which will be used in some features such as `Task Management <../advanced/task_management.html>`_, with high performance and clustered processing.
Users need to follow the steps in `installation <https://www.mongodb.com/try/download/community>`_ to install MongoDB firstly and then access it via a URI.
Users can access mongodb with credential by setting "task_url" to a string like `"mongodb://%s:%s@%s" % (user, pwd, host + ":" + port)`.
Users need finished `installation <https://www.mongodb.com/try/download/community>`_ firstly, and run it in a fixed URL.
.. code-block:: Python

View File

@@ -34,24 +34,19 @@ data_handler_config: &data_handler_config
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
module_path: qlib.contrib.strategy.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: ALSTM
@@ -86,9 +81,7 @@ task:
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:

View File

@@ -26,24 +26,19 @@ data_handler_config: &data_handler_config
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
module_path: qlib.contrib.strategy.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: ALSTM
@@ -76,9 +71,7 @@ task:
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
@@ -87,4 +80,4 @@ task:
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config
config: *port_analysis_config

View File

@@ -12,24 +12,19 @@ data_handler_config: &data_handler_config
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
module_path: qlib.contrib.strategy.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: CatBoostModel
@@ -58,9 +53,7 @@ task:
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:

View File

@@ -19,24 +19,19 @@ data_handler_config: &data_handler_config
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
module_path: qlib.contrib.strategy.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: CatBoostModel
@@ -65,9 +60,7 @@ task:
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:

View File

@@ -12,24 +12,19 @@ data_handler_config: &data_handler_config
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
module_path: qlib.contrib.strategy.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: DEnsembleModel
@@ -80,18 +75,16 @@ task:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
kwargs:
ana_long_short: False
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
kwargs:
config: *port_analysis_config

View File

@@ -19,24 +19,19 @@ data_handler_config: &data_handler_config
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
module_path: qlib.contrib.strategy.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: DEnsembleModel
@@ -87,12 +82,10 @@ task:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
@@ -100,5 +93,5 @@ task:
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config
kwargs:
config: *port_analysis_config

View File

@@ -33,24 +33,19 @@ data_handler_config: &data_handler_config
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
module_path: qlib.contrib.strategy.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: GATs
@@ -66,6 +61,7 @@ task:
metric: loss
loss: mse
base_model: LSTM
with_pretrain: True
model_path: "benchmarks/LSTM/csi300_lstm_ts.pkl"
GPU: 0
dataset:
@@ -84,9 +80,7 @@ task:
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
@@ -95,4 +89,4 @@ task:
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config
config: *port_analysis_config

View File

@@ -26,24 +26,19 @@ data_handler_config: &data_handler_config
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
module_path: qlib.contrib.strategy.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: GATs
@@ -59,6 +54,7 @@ task:
metric: loss
loss: mse
base_model: LSTM
with_pretrain: True
model_path: "benchmarks/LSTM/model_lstm_csi300.pkl"
GPU: 0
dataset:
@@ -76,9 +72,7 @@ task:
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
@@ -87,4 +81,4 @@ task:
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config
config: *port_analysis_config

View File

@@ -34,24 +34,19 @@ data_handler_config: &data_handler_config
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
module_path: qlib.contrib.strategy.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: GRU
@@ -85,9 +80,7 @@ task:
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:

View File

@@ -26,24 +26,19 @@ data_handler_config: &data_handler_config
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
module_path: qlib.contrib.strategy.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: GRU
@@ -75,9 +70,7 @@ task:
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
@@ -86,4 +79,4 @@ task:
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config
config: *port_analysis_config

View File

@@ -34,24 +34,19 @@ data_handler_config: &data_handler_config
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
module_path: qlib.contrib.strategy.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: LSTM
@@ -85,9 +80,7 @@ task:
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:

View File

@@ -26,24 +26,19 @@ data_handler_config: &data_handler_config
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
module_path: qlib.contrib.strategy.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: LSTM
@@ -75,9 +70,7 @@ task:
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
@@ -86,4 +79,4 @@ task:
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config
config: *port_analysis_config

View File

@@ -1,18 +0,0 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import pandas as pd
from qlib.data.inst_processor import InstProcessor
from qlib.utils.resam import resam_calendar
class ResampleNProcessor(InstProcessor):
def __init__(self, target_frq: str, **kwargs):
self.target_frq = target_frq
def __call__(self, df: pd.DataFrame, *args, **kwargs):
df.index = pd.to_datetime(df.index)
res_index = resam_calendar(df.index, "1min", self.target_frq)
df = df.resample(self.target_frq).last().reindex(res_index)
return df

View File

@@ -1,16 +0,0 @@
import datetime
import pandas as pd
from qlib.data.inst_processor import InstProcessor
class Resample1minProcessor(InstProcessor):
def __init__(self, hour: int, minute: int, **kwargs):
self.hour = hour
self.minute = minute
def __call__(self, df: pd.DataFrame, *args, **kwargs):
df.index = pd.to_datetime(df.index)
df = df.loc[df.index.time == datetime.time(self.hour, self.minute)]
df.index = df.index.normalize()
return df

View File

@@ -1,135 +0,0 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import pandas as pd
from qlib.data.dataset.loader import QlibDataLoader
from qlib.contrib.data.handler import DataHandlerLP, _DEFAULT_LEARN_PROCESSORS, check_transform_proc
class Avg15minLoader(QlibDataLoader):
def load(self, instruments=None, start_time=None, end_time=None) -> pd.DataFrame:
df = super(Avg15minLoader, self).load(instruments, start_time, end_time)
if self.is_group:
# feature_day(day freq) and feature_15min(1min freq, Average every 15 minutes) renamed feature
df.columns = df.columns.map(lambda x: ("feature", x[1]) if x[0].startswith("feature") else x)
return df
class Avg15minHandler(DataHandlerLP):
def __init__(
self,
instruments="csi500",
start_time=None,
end_time=None,
freq="day",
infer_processors=[],
learn_processors=_DEFAULT_LEARN_PROCESSORS,
fit_start_time=None,
fit_end_time=None,
process_type=DataHandlerLP.PTYPE_A,
filter_pipe=None,
inst_processor=None,
**kwargs,
):
infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
data_loader = Avg15minLoader(
config=self.loader_config(), filter_pipe=filter_pipe, freq=freq, inst_processor=inst_processor
)
super().__init__(
instruments=instruments,
start_time=start_time,
end_time=end_time,
data_loader=data_loader,
infer_processors=infer_processors,
learn_processors=learn_processors,
process_type=process_type,
)
def loader_config(self):
# Results for dataset: df: pd.DataFrame
# len(df.columns) == 6 + 6 * 16, len(df.index.get_level_values(level="datetime").unique()) == T
# df.columns: close0, close1, ..., close16, open0, ..., open16, ..., vwap16
# freq == day:
# close0, open0, low0, high0, volume0, vwap0
# freq == 1min:
# close1, ..., close16, ..., vwap1, ..., vwap16
# df.index.name == ["datetime", "instrument"]: pd.MultiIndex
# Example:
# feature ... label
# close0 open0 low0 ... vwap1 vwap16 LABEL0
# datetime instrument ...
# 2020-10-09 SH600000 11.794546 11.819587 11.769505 ... NaN NaN -0.005214
# 2020-10-15 SH600000 12.044961 11.944795 11.932274 ... NaN NaN -0.007202
# ... ... ... ... ... ... ... ...
# 2021-05-28 SZ300676 6.369684 6.495406 6.306568 ... NaN NaN -0.001321
# 2021-05-31 SZ300676 6.601626 6.465643 6.465130 ... NaN NaN -0.023428
# features day: len(columns) == 6, freq = day
# $close is the closing price of the current trading day:
# if the user needs to get the `close` before the last T days, use Ref($close, T-1), for example:
# $close Ref($close, 1) Ref($close, 2) Ref($close, 3) Ref($close, 4)
# instrument datetime
# SH600519 2021-06-01 244.271530
# 2021-06-02 242.205917 244.271530
# 2021-06-03 242.229889 242.205917 244.271530
# 2021-06-04 245.421524 242.229889 242.205917 244.271530
# 2021-06-07 247.547089 245.421524 242.229889 242.205917 244.271530
# WARNING: Ref($close, N), if N == 0, Ref($close, N) ==> $close
fields = ["$close", "$open", "$low", "$high", "$volume", "$vwap"]
# names: close0, open0, ..., vwap0
names = list(map(lambda x: x.strip("$") + "0", fields))
config = {"feature_day": (fields, names)}
# features 15min: len(columns) == 6 * 16, freq = 1min
# $close is the closing price of the current trading day:
# if the user gets 'close' for the i-th 15min of the last T days, use `Ref(Mean($close, 15), (T-1) * 240 + i * 15)`, for example:
# Ref(Mean($close, 15), 225) Ref(Mean($close, 15), 465) Ref(Mean($close, 15), 705)
# instrument datetime
# SH600519 2021-05-31 241.769897 243.077942 244.712997
# 2021-06-01 244.271530 241.769897 243.077942
# 2021-06-02 242.205917 244.271530 241.769897
# WARNING: Ref(Mean($close, 15), N), if N == 0, Ref(Mean($close, 15), N) ==> Mean($close, 15)
# Results of the current script:
# time: 09:00 --> 09:14, ..., 14:45 --> 14:59
# fields: Ref(Mean($close, 15), 225), ..., Mean($close, 15)
# name: close1, ..., close16
#
# Expression description: take close as an example
# Mean($close, 15) ==> df["$close"].rolling(15, min_periods=1).mean()
# Ref(Mean($close, 15), 15) ==> df["$close"].rolling(15, min_periods=1).mean().shift(15)
# NOTE: The last data of each trading day, which is the average of the i-th 15 minutes
# Average:
# Average of the i-th 15-minute period of each trading day: 1 <= i <= 250 // 16
# Avg(15minutes): Ref(Mean($close, 15), 240 - i * 15)
#
# Average of the first 15 minutes of each trading day; i = 1
# Avg(09:00 --> 09:14), df.index.loc["09:14"]: Ref(Mean($close, 15), 240- 1 * 15) ==> Ref(Mean($close, 15), 225)
# Average of the last 15 minutes of each trading day; i = 16
# Avg(14:45 --> 14:59), df.index.loc["14:59"]: Ref(Mean($close, 15), 240 - 16 * 15) ==> Ref(Mean($close, 15), 0) ==> Mean($close, 15)
# 15min resample to day
# df.resample("1d").last()
tmp_fields = []
tmp_names = []
for i, _f in enumerate(fields):
_fields = [f"Ref(Mean({_f}, 15), {j * 15})" for j in range(1, 240 // 15)]
_names = [f"{names[i][:-1]}{int(names[i][-1])+j}" for j in range(240 // 15 - 1, 0, -1)]
_fields.append(f"Mean({_f}, 15)")
_names.append(f"{names[i][:-1]}{int(names[i][-1])+240 // 15}")
tmp_fields += _fields
tmp_names += _names
config["feature_15min"] = (tmp_fields, tmp_names)
# label
config["label"] = (["Ref($close, -2)/Ref($close, -1) - 1"], ["LABEL0"])
return config

View File

@@ -12,23 +12,19 @@ data_handler_config: &data_handler_config
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
module_path: qlib.contrib.strategy.strategy
kwargs:
model: <MODEL>
dataset: <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: LGBModel
@@ -58,9 +54,7 @@ task:
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
@@ -69,4 +63,4 @@ task:
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config
config: *port_analysis_config

View File

@@ -1,86 +0,0 @@
qlib_init:
provider_uri:
day: "~/.qlib/qlib_data/cn_data"
1min: "~/.qlib/qlib_data/cn_data_1min"
region: cn
dataset_cache: null
maxtasksperchild: 1
market: &market csi300
benchmark: &benchmark SH000300
data_handler_config: &data_handler_config
start_time: 2008-01-01
# 1min closing time is 15:00:00
end_time: "2020-08-01 15:00:00"
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
freq:
label: day
feature: 1min
# with label as reference
inst_processor:
feature:
- class: Resample1minProcessor
module_path: features_sample.py
kwargs:
hour: 14
minute: 56
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy.strategy
kwargs:
topk: 50
n_drop: 5
signal:
- <MODEL>
- <DATASET>
backtest:
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: LGBModel
module_path: qlib.contrib.model.gbdt
kwargs:
loss: mse
colsample_bytree: 0.8879
learning_rate: 0.2
subsample: 0.8789
lambda_l1: 205.6999
lambda_l2: 580.9768
max_depth: 8
num_leaves: 210
num_threads: 20
dataset:
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: Alpha158
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
ana_long_short: False
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -19,24 +19,19 @@ data_handler_config: &data_handler_config
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
module_path: qlib.contrib.strategy.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: LGBModel
@@ -66,9 +61,7 @@ task:
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
@@ -77,4 +70,4 @@ task:
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config
config: *port_analysis_config

View File

@@ -27,24 +27,19 @@ data_handler_config: &data_handler_config
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
module_path: qlib.contrib.strategy.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: LGBModel
@@ -74,9 +69,7 @@ task:
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
@@ -85,4 +78,4 @@ task:
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config
config: *port_analysis_config

View File

@@ -1,90 +0,0 @@
qlib_init:
provider_uri:
day: "~/.qlib/qlib_data/cn_data"
1min: "~/.qlib/qlib_data/cn_data_1min"
region: cn
dataset_cache: null
maxtasksperchild: null
market: &market csi300
benchmark: &benchmark SH000300
data_handler_config: &data_handler_config
start_time: 2008-01-01
# 1min closing time is 15:00:00
end_time: "2020-08-01 15:00:00"
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
freq:
label: day
feature_15min: 1min
feature_day: day
# with label as reference
inst_processor:
feature_15min:
- class: ResampleNProcessor
module_path: features_resample_N.py
kwargs:
target_frq: 1d
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: LGBModel
module_path: qlib.contrib.model.gbdt
kwargs:
loss: mse
colsample_bytree: 0.8879
learning_rate: 0.2
subsample: 0.8789
lambda_l1: 205.6999
lambda_l2: 580.9768
max_depth: 8
num_leaves: 210
num_threads: 20
dataset:
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: Avg15minHandler
module_path: multi_freq_handler.py
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
ana_long_short: False
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -26,24 +26,19 @@ data_handler_config: &data_handler_config
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
module_path: qlib.contrib.strategy.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: LinearModel
@@ -62,18 +57,16 @@ task:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
kwargs:
ana_long_short: True
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
kwargs:
config: *port_analysis_config

View File

@@ -1,3 +0,0 @@
numpy==1.17.4
pandas==1.1.2
torch==1.2.0

View File

@@ -1,89 +0,0 @@
qlib_init:
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
market: &market csi300
benchmark: &benchmark SH000300
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
infer_processors:
- class: FilterCol
kwargs:
fields_group: feature
col_list: ["RESI5", "WVMA5", "RSQR5", "KLEN", "RSQR10", "CORR5", "CORD5", "CORR10",
"ROC60", "RESI10", "VSTD5", "RSQR60", "CORR60", "WVMA60", "STD5",
"RSQR20", "CORD60", "CORD10", "CORR20", "KLOW"
]
- class: RobustZScoreNorm
kwargs:
fields_group: feature
clip_outlier: true
- class: Fillna
kwargs:
fields_group: feature
learn_processors:
- class: DropnaLabel
- class: CSRankNorm
kwargs:
fields_group: label
label: ["Ref($close, -2) / Ref($close, -1) - 1"]
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: LocalformerModel
module_path: qlib.contrib.model.pytorch_localformer_ts
kwargs:
seed: 0
n_jobs: 20
dataset:
class: TSDatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: Alpha158
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
step_len: 20
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
ana_long_short: False
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -1,80 +0,0 @@
qlib_init:
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
market: &market csi300
benchmark: &benchmark SH000300
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
infer_processors:
- class: RobustZScoreNorm
kwargs:
fields_group: feature
clip_outlier: true
- class: Fillna
kwargs:
fields_group: feature
learn_processors:
- class: DropnaLabel
- class: CSRankNorm
kwargs:
fields_group: label
label: ["Ref($close, -2) / Ref($close, -1) - 1"]
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: LocalformerModel
module_path: qlib.contrib.model.pytorch_localformer
kwargs:
d_feat: 6
seed: 0
dataset:
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: Alpha360
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
ana_long_short: False
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -39,24 +39,19 @@ data_handler_config: &data_handler_config
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
module_path: qlib.contrib.strategy.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: DNNModelPytorch
@@ -88,9 +83,7 @@ task:
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
@@ -99,4 +92,4 @@ task:
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config
config: *port_analysis_config

View File

@@ -27,24 +27,19 @@ data_handler_config: &data_handler_config
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
module_path: qlib.contrib.strategy.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: DNNModelPytorch
@@ -75,9 +70,7 @@ task:
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
@@ -86,4 +79,4 @@ task:
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config
config: *port_analysis_config

View File

@@ -1,60 +1,39 @@
# Benchmarks Performance
Here are the results of each benchmark model running on Qlib's `Alpha360` and `Alpha158` dataset with China's A shared-stock & CSI300 data respectively. The values of each metric are the mean and std calculated based on 20 runs with different random seeds.
Here are the results of each benchmark model running on Qlib's `Alpha360` and `Alpha158` dataset with China's A shared-stock & CSI300 data respectively. The values of each metric are the mean and std calculated based on 20 runs.
The numbers shown below demonstrate the performance of the entire `workflow` of each model. We will update the `workflow` as well as models in the near future for better results.
<!--
> If you need to reproduce the results below, please use the **v1** dataset: `python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_cn_1d --region cn --version v1`
>
> In the new version of qlib, the default dataset is **v2**. Since the data is collected from the YahooFinance API (which is not very stable), the results of *v2* and *v1* may differ -->
> NOTE:
> The backtest start from 0.8.0 is quite different from previous version. Please check out the changelog for the difference.
## Alpha158 dataset
| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown |
|------------------------------------------|-------------------------------------|-------------|-------------|-------------|-------------|-------------------|-------------------|--------------|
| TabNet(Sercan O. Arik, et al.) | Alpha158 | 0.0204±0.01 | 0.1554±0.07 | 0.0333±0.00 | 0.2552±0.05 | 0.0227±0.04 | 0.3676±0.54 | -0.1089±0.08 |
| Transformer(Ashish Vaswani, et al.) | Alpha158 | 0.0264±0.00 | 0.2053±0.02 | 0.0407±0.00 | 0.3273±0.02 | 0.0273±0.02 | 0.3970±0.26 | -0.1101±0.02 |
| GRU(Kyunghyun Cho, et al.) | Alpha158(with selected 20 features) | 0.0315±0.00 | 0.2450±0.04 | 0.0428±0.00 | 0.3440±0.03 | 0.0344±0.02 | 0.5160±0.25 | -0.1017±0.02 |
| LSTM(Sepp Hochreiter, et al.) | Alpha158(with selected 20 features) | 0.0318±0.00 | 0.2367±0.04 | 0.0435±0.00 | 0.3389±0.03 | 0.0381±0.03 | 0.5561±0.46 | -0.1207±0.04 |
| Localformer(Juyong Jiang, et al.) | Alpha158 | 0.0356±0.00 | 0.2756±0.03 | 0.0468±0.00 | 0.3784±0.03 | 0.0438±0.02 | 0.6600±0.33 | -0.0952±0.02 |
| SFM(Liheng Zhang, et al.) | Alpha158 | 0.0379±0.00 | 0.2959±0.04 | 0.0464±0.00 | 0.3825±0.04 | 0.0465±0.02 | 0.5672±0.29 | -0.1282±0.03 |
| ALSTM (Yao Qin, et al.) | Alpha158(with selected 20 features) | 0.0362±0.01 | 0.2789±0.06 | 0.0463±0.01 | 0.3661±0.05 | 0.0470±0.03 | 0.6992±0.47 | -0.1072±0.03 |
| GATs (Petar Velickovic, et al.) | Alpha158(with selected 20 features) | 0.0349±0.00 | 0.2511±0.01 | 0.0462±0.00 | 0.3564±0.01 | 0.0497±0.01 | 0.7338±0.19 | -0.0777±0.02 |
| TRA(Hengxu Lin, et al.) | Alpha158(with selected 20 features) | 0.0404±0.00 | 0.3197±0.05 | 0.0490±0.00 | 0.4047±0.04 | 0.0649±0.02 | 1.0091±0.30 | -0.0860±0.02 |
| Linear | Alpha158 | 0.0397±0.00 | 0.3000±0.00 | 0.0472±0.00 | 0.3531±0.00 | 0.0692±0.00 | 0.9209±0.00 | -0.1509±0.00 |
| TRA(Hengxu Lin, et al.) | Alpha158 | 0.0440±0.00 | 0.3535±0.05 | 0.0540±0.00 | 0.4451±0.03 | 0.0718±0.02 | 1.0835±0.35 | -0.0760±0.02 |
| CatBoost(Liudmila Prokhorenkova, et al.) | Alpha158 | 0.0481±0.00 | 0.3366±0.00 | 0.0454±0.00 | 0.3311±0.00 | 0.0765±0.00 | 0.8032±0.01 | -0.1092±0.00 |
| XGBoost(Tianqi Chen, et al.) | Alpha158 | 0.0498±0.00 | 0.3779±0.00 | 0.0505±0.00 | 0.4131±0.00 | 0.0780±0.00 | 0.9070±0.00 | -0.1168±0.00 |
| TFT (Bryan Lim, et al.) | Alpha158(with selected 20 features) | 0.0358±0.00 | 0.2160±0.03 | 0.0116±0.01 | 0.0720±0.03 | 0.0847±0.02 | 0.8131±0.19 | -0.1824±0.03 |
| MLP | Alpha158 | 0.0376±0.00 | 0.2846±0.02 | 0.0429±0.00 | 0.3220±0.01 | 0.0895±0.02 | 1.1408±0.23 | -0.1103±0.02 |
| LightGBM(Guolin Ke, et al.) | Alpha158 | 0.0448±0.00 | 0.3660±0.00 | 0.0469±0.00 | 0.3877±0.00 | 0.0901±0.00 | 1.0164±0.00 | -0.1038±0.00 |
| DoubleEnsemble(Chuheng Zhang, et al.) | Alpha158 | 0.0544±0.00 | 0.4340±0.00 | 0.0523±0.00 | 0.4284±0.01 | 0.1168±0.01 | 1.3384±0.12 | -0.1036±0.01 |
## Alpha360 dataset
| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown |
|---|---|---|---|---|---|---|---|---|
| Linear | Alpha360 | 0.0150±0.00 | 0.1049±0.00| 0.0284±0.00 | 0.1970±0.00 | -0.0659±0.00 | -0.7072±0.00| -0.2955±0.00 |
| CatBoost (Liudmila Prokhorenkova, et al.) | Alpha360 | 0.0397±0.00 | 0.2878±0.00| 0.0470±0.00 | 0.3703±0.00 | 0.0342±0.00 | 0.4092±0.00| -0.1057±0.00 |
| XGBoost (Tianqi Chen, et al.) | Alpha360 | 0.0400±0.00 | 0.3031±0.00| 0.0461±0.00 | 0.3862±0.00 | 0.0528±0.00 | 0.6307±0.00| -0.1113±0.00 |
| LightGBM (Guolin Ke, et al.) | Alpha360 | 0.0399±0.00 | 0.3075±0.00| 0.0492±0.00 | 0.4019±0.00 | 0.0323±0.00 | 0.4370±0.00| -0.0917±0.00 |
| MLP | Alpha360 | 0.0285±0.00 | 0.1981±0.02| 0.0402±0.00 | 0.2993±0.02 | 0.0073±0.02 | 0.0880±0.22| -0.1446±0.03 |
| GRU (Kyunghyun Cho, et al.) | Alpha360 | 0.0490±0.01 | 0.3787±0.05| 0.0581±0.00 | 0.4664±0.04 | 0.0726±0.02 | 0.9817±0.34| -0.0902±0.03 |
| LSTM (Sepp Hochreiter, et al.) | Alpha360 | 0.0443±0.01 | 0.3401±0.05| 0.0536±0.01 | 0.4248±0.05 | 0.0627±0.03 | 0.8441±0.48| -0.0882±0.03 |
| ALSTM (Yao Qin, et al.) | Alpha360 | 0.0493±0.01 | 0.3778±0.06| 0.0585±0.00 | 0.4606±0.04 | 0.0513±0.03 | 0.6727±0.38| -0.1085±0.02 |
| GATs (Petar Velickovic, et al.) | Alpha360 | 0.0475±0.00 | 0.3515±0.02| 0.0592±0.00 | 0.4585±0.01 | 0.0876±0.02 | 1.1513±0.27| -0.0795±0.02 |
| DoubleEnsemble (Chuheng Zhang, et al.) | Alpha360 | 0.0407±0.00| 0.3053±0.00 | 0.0490±0.00 | 0.3840±0.00 | 0.0380±0.02 | 0.5000±0.21 | -0.0984±0.02 |
| TabNet (Sercan O. Arik, et al.)| Alpha360 | 0.0192±0.00 | 0.1401±0.00| 0.0291±0.00 | 0.2163±0.00 | -0.0258±0.00 | -0.2961±0.00| -0.1429±0.00 |
| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown |
|-------------------------------------------|----------|-------------|-------------|-------------|-------------|-------------------|-------------------|--------------|
| Transformer(Ashish Vaswani, et al.) | Alpha360 | 0.0114±0.00 | 0.0716±0.03 | 0.0327±0.00 | 0.2248±0.02 | -0.0270±0.03 | -0.3378±0.37 | -0.1653±0.05 |
| TabNet(Sercan O. Arik, et al.) | Alpha360 | 0.0099±0.00 | 0.0593±0.00 | 0.0290±0.00 | 0.1887±0.00 | -0.0369±0.00 | -0.3892±0.00 | -0.2145±0.00 |
| MLP | Alpha360 | 0.0273±0.00 | 0.1870±0.02 | 0.0396±0.00 | 0.2910±0.02 | 0.0029±0.02 | 0.0274±0.23 | -0.1385±0.03 |
| Localformer(Juyong Jiang, et al.) | Alpha360 | 0.0404±0.00 | 0.2932±0.04 | 0.0542±0.00 | 0.4110±0.03 | 0.0246±0.02 | 0.3211±0.21 | -0.1095±0.02 |
| CatBoost((Liudmila Prokhorenkova, et al.) | Alpha360 | 0.0378±0.00 | 0.2714±0.00 | 0.0467±0.00 | 0.3659±0.00 | 0.0292±0.00 | 0.3781±0.00 | -0.0862±0.00 |
| XGBoost(Tianqi Chen, et al.) | Alpha360 | 0.0394±0.00 | 0.2909±0.00 | 0.0448±0.00 | 0.3679±0.00 | 0.0344±0.00 | 0.4527±0.02 | -0.1004±0.00 |
| DoubleEnsemble(Chuheng Zhang, et al.) | Alpha360 | 0.0404±0.00 | 0.3023±0.00 | 0.0495±0.00 | 0.3898±0.00 | 0.0468±0.01 | 0.6302±0.20 | -0.0860±0.01 |
| LightGBM(Guolin Ke, et al.) | Alpha360 | 0.0400±0.00 | 0.3037±0.00 | 0.0499±0.00 | 0.4042±0.00 | 0.0558±0.00 | 0.7632±0.00 | -0.0659±0.00 |
| ALSTM (Yao Qin, et al.) | Alpha360 | 0.0497±0.00 | 0.3829±0.04 | 0.0599±0.00 | 0.4736±0.03 | 0.0626±0.02 | 0.8651±0.31 | -0.0994±0.03 |
| LSTM(Sepp Hochreiter, et al.) | Alpha360 | 0.0448±0.00 | 0.3474±0.04 | 0.0549±0.00 | 0.4366±0.03 | 0.0647±0.03 | 0.8963±0.39 | -0.0875±0.02 |
| GRU(Kyunghyun Cho, et al.) | Alpha360 | 0.0493±0.00 | 0.3772±0.04 | 0.0584±0.00 | 0.4638±0.03 | 0.0720±0.02 | 0.9730±0.33 | -0.0821±0.02 |
| GATs (Petar Velickovic, et al.) | Alpha360 | 0.0476±0.00 | 0.3508±0.02 | 0.0598±0.00 | 0.4604±0.01 | 0.0824±0.02 | 1.1079±0.26 | -0.0894±0.03 |
| TCTS(Xueqing Wu, et al.) | Alpha360 | 0.0508±0.00 | 0.3931±0.04 | 0.0599±0.00 | 0.4756±0.03 | 0.0893±0.03 | 1.2256±0.36 | -0.0857±0.02 |
| TRA(Hengxu Lin, et al.) | Alpha360 | 0.0485±0.00 | 0.3787±0.03 | 0.0587±0.00 | 0.4756±0.03 | 0.0920±0.03 | 1.2789±0.42 | -0.0834±0.02 |
## Alpha158 dataset
| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown |
|---|---|---|---|---|---|---|---|---|
| Linear | Alpha158 | 0.0393±0.00 | 0.2980±0.00| 0.0475±0.00 | 0.3546±0.00 | 0.0795±0.00 | 1.0712±0.00| -0.1449±0.00 |
| CatBoost (Liudmila Prokhorenkova, et al.) | Alpha158 | 0.0503±0.00 | 0.3586±0.00| 0.0483±0.00 | 0.3667±0.00 | 0.1080±0.00 | 1.1561±0.00| -0.0787±0.00 |
| XGBoost (Tianqi Chen, et al.) | Alpha158 | 0.0481±0.00 | 0.3659±0.00| 0.0495±0.00 | 0.4033±0.00 | 0.1111±0.00 | 1.2915±0.00| -0.0893±0.00 |
| LightGBM (Guolin Ke, et al.) | Alpha158 | 0.0475±0.00 | 0.3979±0.00| 0.0485±0.00 | 0.4123±0.00 | 0.1143±0.00 | 1.2744±0.00| -0.0800±0.00 |
| MLP | Alpha158 | 0.0358±0.00 | 0.2738±0.03| 0.0425±0.00 | 0.3221±0.01 | 0.0836±0.02 | 1.0323±0.25| -0.1127±0.02 |
| TFT (Bryan Lim, et al.) | Alpha158 (with selected 20 features) | 0.0343±0.00 | 0.2071±0.02| 0.0107±0.00 | 0.0660±0.02 | 0.0623±0.02 | 0.5818±0.20| -0.1762±0.01 |
| GRU (Kyunghyun Cho, et al.) | Alpha158 (with selected 20 features) | 0.0311±0.00 | 0.2418±0.04| 0.0425±0.00 | 0.3434±0.02 | 0.0330±0.02 | 0.4805±0.30| -0.1021±0.02 |
| LSTM (Sepp Hochreiter, et al.) | Alpha158 (with selected 20 features) | 0.0312±0.00 | 0.2394±0.04| 0.0418±0.00 | 0.3324±0.03 | 0.0298±0.02 | 0.4198±0.33| -0.1348±0.03 |
| ALSTM (Yao Qin, et al.) | Alpha158 (with selected 20 features) | 0.0385±0.01 | 0.3022±0.06| 0.0478±0.00 | 0.3874±0.04 | 0.0486±0.03 | 0.7141±0.45| -0.1088±0.03 |
| GATs (Petar Velickovic, et al.) | Alpha158 (with selected 20 features) | 0.0349±0.00 | 0.2511±0.01| 0.0457±0.00 | 0.3537±0.01 | 0.0578±0.02 | 0.8221±0.25| -0.0824±0.02 |
| DoubleEnsemble (Chuheng Zhang, et al.) | Alpha158 | 0.0544±0.00 | 0.4338±0.01 | 0.0523±0.00 | 0.4257±0.01 | 0.1253±0.01 | 1.4105±0.14 | -0.0902±0.01 |
| TabNet (Sercan O. Arik, et al.)| Alpha158 | 0.0383±0.00 | 0.3414±0.00| 0.0388±0.00 | 0.3460±0.00 | 0.0226±0.00 | 0.2652±0.00| -0.1072±0.00 |
- The selected 20 features are based on the feature importance of a lightgbm-based model.
- The base model of DoubleEnsemble is LGBM.
- The base model of TCTS is GRU.

View File

@@ -1,4 +1,4 @@
pandas==1.1.2
numpy==1.17.4
scikit_learn==0.23.2
torch==1.7.0
torch==1.7.0

View File

@@ -26,24 +26,19 @@ data_handler_config: &data_handler_config
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
module_path: qlib.contrib.strategy.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: SFM
@@ -78,9 +73,7 @@ task:
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:

View File

@@ -1,38 +0,0 @@
# Temporally Correlated Task Scheduling for Sequence Learning
### Background
Sequence learning has attracted much research attention from the machine learning community in recent years. In many applications, a sequence learning task is usually associated with multiple temporally correlated auxiliary tasks, which are different in terms of how much input information to use or which future step to predict. In stock trend forecasting, as demonstrated in Figure1, one can predict the price of a stock in different future days (e.g., tomorrow, the day after tomorrow). In this paper, we propose a framework to make use of those temporally correlated tasks to help each other.
### Method
Given that there are usually multiple temporally correlated tasks, the key challenge lies in which tasks to use and when to use them in the training process. This work introduces a learnable task scheduler for sequence learning, which adaptively selects temporally correlated tasks during the training process. The scheduler accesses the model status and the current training data (e.g., in the current minibatch) and selects the best auxiliary task to help the training of the main task. The scheduler and the model for the main task are jointly trained through bi-level optimization: the scheduler is trained to maximize the validation performance of the model, and the model is trained to minimize the training loss guided by the scheduler. The process is demonstrated in Figure2.
<p align="center">
<img src="workflow.png"/>
</p>
At step <img src="https://latex.codecogs.com/png.latex?s" title="s" />, with training data <img src="https://latex.codecogs.com/png.latex?x_s,y_s" title="x_s,y_s" />, the scheduler <img src="https://latex.codecogs.com/png.latex?\varphi" title="\varphi" /> chooses a suitable task <img src="https://latex.codecogs.com/png.latex?T_{i_s}" title="T_{i_s}" /> (green solid lines) to update the model <img src="https://latex.codecogs.com/png.latex?f" title="f" /> (blue solid lines). After <img src="https://latex.codecogs.com/png.latex?S" title="S" /> steps, we evaluate the model <img src="https://latex.codecogs.com/png.latex?f" title="f" /> on the validation set and update the scheduler <img src="https://latex.codecogs.com/png.latex?\varphi" title="\varphi" /> (green dashed lines).
### Experiments
Due to different data versions and different Qlib versions, the original data and data preprocessing methods of the experimental settings in the paper are different from those experimental settings in the existing Qlib version. Therefore, we provide two versions of the code according to the two kinds of settings, 1) the [code](https://github.com/lwwang1995/tcts) that can be used to reproduce the experimental results and 2) the [code](https://github.com/microsoft/qlib/blob/main/qlib/contrib/model/pytorch_tcts.py) in the current Qlib baseline.
#### Setting1
* Dataset: We use the historical transaction data for 300 stocks on [CSI300](http://www.csindex.com.cn/en/indices/index-detail/000300) from 01/01/2008 to 08/01/2020. We split the data into training (01/01/2008-12/31/2013), validation (01/01/2014-12/31/2015), and test sets (01/01/2016-08/01/2020) based on the transaction time.
* The main tasks <img src="https://latex.codecogs.com/png.latex?T_k" title="T_k" /> refers to forecasting return of stock <img src="https://latex.codecogs.com/png.latex?i" title="i" /> as following,
<div align=center>
<img src="https://latex.codecogs.com/png.image?\dpi{110}&space;r_{i}^{t,k}&space;=&space;\frac{price_i^{t&plus;k}}{price_i^{t&plus;k-1}}-1" title="r_{i}^{t,k} = \frac{price_i^{t+k}}{price_i^{t+k-1}}-1" />
</div>
* Temporally correlated task sets <img src="https://latex.codecogs.com/png.latex?\mathcal{T}_k&space;=&space;\{T_1,&space;T_2,&space;...&space;,&space;T_k\}" title="\mathcal{T}_k = \{T_1, T_2, ... , T_k\}" />, in this paper, <img src="https://latex.codecogs.com/png.latex?\mathcal{T}_3" title="\mathcal{T}_3" />, <img src="https://latex.codecogs.com/png.latex?\mathcal{T}_5" title="\mathcal{T}_5" /> and <img src="https://latex.codecogs.com/png.latex?\mathcal{T}_{10}" title="\mathcal{T}_{10}" /> are used in <img src="https://latex.codecogs.com/png.latex?T_1" title="T_1" />, <img src="https://latex.codecogs.com/png.latex?T_2" title="T_2" />, and <img src="https://latex.codecogs.com/png.latex?T_3" title="T_3" />.
#### Setting2
* Dataset: We use the historical transaction data for 300 stocks on [CSI300](http://www.csindex.com.cn/en/indices/index-detail/000300) from 01/01/2008 to 08/01/2020. We split the data into training (01/01/2008-12/31/2014), validation (01/01/2015-12/31/2016), and test sets (01/01/2017-08/01/2020) based on the transaction time.
* The main tasks <img src="https://latex.codecogs.com/png.latex?T_k" title="T_k" /> refers to forecasting return of stock <img src="https://latex.codecogs.com/png.latex?i" title="i" /> as following,
<div align=center>
<img src="https://latex.codecogs.com/png.image?\dpi{110}&space;r_{i}^{t,k}&space;=&space;\frac{price_i^{t&plus;1&plus;k}}{price_i^{t&plus;1}}-1" title="r_{i}^{t,k} = \frac{price_i^{t+1+k}}{price_i^{t+1}}-1" />
</div>
* In Qlib baseline, <img src="https://latex.codecogs.com/png.latex?\mathcal{T}_3" title="\mathcal{T}_3" />, is used in <img src="https://latex.codecogs.com/png.latex?T_1" title="T_1" />.
### Experimental Result
You can find the experimental result of setting1 in the [paper](http://proceedings.mlr.press/v139/wu21e/wu21e.pdf) and the experimental result of setting2 in this [page](https://github.com/microsoft/qlib/tree/main/examples/benchmarks).

View File

@@ -1,4 +0,0 @@
pandas==1.1.2
numpy==1.17.4
scikit_learn==0.23.2
torch==1.7.0

Binary file not shown.

Before

Width:  |  Height:  |  Size: 29 KiB

View File

@@ -1,98 +0,0 @@
qlib_init:
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
market: &market csi300
benchmark: &benchmark SH000300
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
infer_processors:
- class: RobustZScoreNorm
kwargs:
fields_group: feature
clip_outlier: true
- class: Fillna
kwargs:
fields_group: feature
learn_processors:
- class: DropnaLabel
- class: CSRankNorm
kwargs:
fields_group: label
label: ["Ref($close, -2) / Ref($close, -1) - 1",
"Ref($close, -3) / Ref($close, -1) - 1",
"Ref($close, -4) / Ref($close, -1) - 1"]
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: TCTS
module_path: qlib.contrib.model.pytorch_tcts
kwargs:
d_feat: 6
hidden_size: 64
num_layers: 2
dropout: 0.3
n_epochs: 200
early_stop: 20
batch_size: 800
metric: loss
loss: mse
GPU: 0
fore_optimizer: adam
weight_optimizer: adam
output_dim: 3
fore_lr: 2e-3
weight_lr: 2e-3
steps: 3
target_label: 0
lowest_valid_performance: 0.993
dataset:
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: Alpha360
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
ana_long_short: False
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -8,7 +8,7 @@
Users can follow the ``workflow_by_code_tft.py`` to run the benchmark.
### Notes
1. Please be **aware** that this script can only support `Python 3.6 - 3.7`.
1. Please be **aware** that this script can only support `Python 3.5 - 3.8`.
2. If the CUDA version on your machine is not 10.0, please remember to run the following commands `conda install anaconda cudatoolkit=10.0` and `conda install cudnn` on your machine.
3. The model must run in GPU, or an error will be raised.
4. New datasets should be registered in ``data_formatters``, for detail please visit the source.

View File

@@ -195,8 +195,7 @@ class Alpha158Formatter(GenericDataFormatter):
for col in column_names:
if col not in {"forecast_time", "identifier"}:
# Using [col] is for aligning with the format when fitting
output[col] = self._target_scaler.inverse_transform(predictions[[col]])
output[col] = self._target_scaler.inverse_transform(predictions[col])
return output

View File

@@ -1,2 +1,3 @@
tensorflow-gpu==1.15.0
pandas==1.1.0
numpy == 1.19.4
pandas==1.1.0

View File

@@ -1,8 +1,6 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
from pathlib import Path
from typing import Union
import numpy as np
import pandas as pd
import tensorflow.compat.v1 as tf
@@ -245,7 +243,7 @@ class TFTModel(ModelFT):
# extract_numerical_data(targets), extract_numerical_data(p90_forecast),
# 0.9)
tf.keras.backend.set_session(default_keras_session)
print("Training completed at {}.".format(dte.datetime.now()))
print("Training completed.".format(dte.datetime.now()))
# ===========================Training Process===========================
def predict(self, dataset):
@@ -291,31 +289,3 @@ class TFTModel(ModelFT):
dataset for finetuning
"""
pass
def to_pickle(self, path: Union[Path, str]):
"""
Tensorflow model can't be dumped directly.
So the data should be save seperatedly
**TODO**: Please implement the function to load the files
Parameters
----------
path : Union[Path, str]
the target path to be dumped
"""
# FIXME: implementing saving tensorflow models
# save tensorflow model
# path = Path(path)
# path.mkdir(parents=True)
# self.model.save(path)
# save qlib model wrapper
drop_attrs = ["model", "tf_graph", "sess", "data_formatter"]
orig_attr = {}
for attr in drop_attrs:
orig_attr[attr] = getattr(self, attr)
setattr(self, attr, None)
super(TFTModel, self).to_pickle(path)
for attr in drop_attrs:
setattr(self, attr, orig_attr[attr])

View File

@@ -14,24 +14,19 @@ data_handler_config: &data_handler_config
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
module_path: qlib.contrib.strategy.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: TFTModel
@@ -51,9 +46,7 @@ task:
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:

View File

@@ -1,93 +0,0 @@
# Learning Multiple Stock Trading Patterns with Temporal Routing Adaptor and Optimal Transport
Temporal Routing Adaptor (TRA) is designed to capture multiple trading patterns in the stock market data. Please refer to [our paper](http://arxiv.org/abs/2106.12950) for more details.
If you find our work useful in your research, please cite:
```
@inproceedings{HengxuKDD2021,
author = {Hengxu Lin and Dong Zhou and Weiqing Liu and Jiang Bian},
title = {Learning Multiple Stock Trading Patterns with Temporal Routing Adaptor and Optimal Transport},
booktitle = {Proceedings of the 27th ACM SIGKDD Conference on Knowledge Discovery \& Data Mining},
series = {KDD '21},
year = {2021},
publisher = {ACM},
}
@article{yang2020qlib,
title={Qlib: An AI-oriented Quantitative Investment Platform},
author={Yang, Xiao and Liu, Weiqing and Zhou, Dong and Bian, Jiang and Liu, Tie-Yan},
journal={arXiv preprint arXiv:2009.11189},
year={2020}
}
```
## Usage (Recommended)
**Update**: `TRA` has been moved to `qlib.contrib.model.pytorch_tra` to support other `Qlib` components like `qlib.workflow` and `Alpha158/Alpha360` dataset.
Please follow the official [doc](https://qlib.readthedocs.io/en/latest/component/workflow.html) to use `TRA` with `workflow`. Here we also provide several example config files:
- `workflow_config_tra_Alpha360.yaml`: running `TRA` with `Alpha360` dataset
- `workflow_config_tra_Alpha158.yaml`: running `TRA` with `Alpha158` dataset (with feature subsampling)
- `workflow_config_tra_Alpha158_full.yaml`: running `TRA` with `Alpha158` dataset (without feature subsampling)
The performances of `TRA` are reported in [Benchmarks](https://github.com/microsoft/qlib/tree/main/examples/benchmarks).
## Usage (Not Maintained)
This section is used to reproduce the results in the paper.
### Running
We attach our running scripts for the paper in `run.sh`.
And here are two ways to run the model:
* Running from scripts with default parameters
You can directly run from Qlib command `qrun`:
```
qrun configs/config_alstm.yaml
```
* Running from code with self-defined parameters
Setting different parameters is also allowed. See codes in `example.py`:
```
python example.py --config_file configs/config_alstm.yaml
```
Here we trained TRA on a pretrained backbone model. Therefore we run `*_init.yaml` before TRA's scipts.
### Results
After running the scripts, you can find result files in path `./output`:
* `info.json` - config settings and result metrics.
* `log.csv` - running logs.
* `model.bin` - the model parameter dictionary.
* `pred.pkl` - the prediction scores and output for inference.
Evaluation metrics reported in the paper:
This result is generated by qlib==0.7.1.
| Methods | MSE| MAE| IC | ICIR | AR | AV | SR | MDD |
|-------|-------|------|-----|-----|-----|-----|-----|-----|
|Linear|0.163|0.327|0.020|0.132|-3.2%|16.8%|-0.191|32.1%|
|LightGBM|0.160(0.000)|0.323(0.000)|0.041|0.292|7.8%|15.5%|0.503|25.7%|
|MLP|0.160(0.002)|0.323(0.003)|0.037|0.273|3.7%|15.3%|0.264|26.2%|
|SFM|0.159(0.001) |0.321(0.001) |0.047 |0.381 |7.1% |14.3% |0.497 |22.9%|
|ALSTM|0.158(0.001) |0.320(0.001) |0.053 |0.419 |12.3% |13.7% |0.897 |20.2%|
|Trans.|0.158(0.001) |0.322(0.001) |0.051 |0.400 |14.5% |14.2% |1.028 |22.5%|
|ALSTM+TS|0.160(0.002) |0.321(0.002) |0.039 |0.291 |6.7% |14.6% |0.480|22.3%|
|Trans.+TS|0.160(0.004) |0.324(0.005) |0.037 |0.278 |10.4% |14.7% |0.722 |23.7%|
|ALSTM+TRA(Ours)|0.157(0.000) |0.318(0.000) |0.059 |0.460 |12.4% |14.0% |0.885 |20.4%|
|Trans.+TRA(Ours)|0.157(0.000) |0.320(0.000) |0.056 |0.442 |16.1% |14.2% |1.133 |23.1%|
A more detailed demo for our experiment results in the paper can be found in `Report.ipynb`.
## Common Issues
For help or issues using TRA, please submit a GitHub issue.
Sometimes we might encounter situation where the loss is `NaN`, please check the `epsilon` parameter in the sinkhorn algorithm, adjusting the `epsilon` according to input's scale is important.

File diff suppressed because one or more lines are too long

View File

@@ -1,63 +0,0 @@
qlib_init:
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
data_loader_config: &data_loader_config
class: StaticDataLoader
module_path: qlib.data.dataset.loader
kwargs:
config:
feature: data/feature.pkl
label: data/label.pkl
model_config: &model_config
input_size: 16
hidden_size: 256
num_layers: 2
num_heads: 2
use_attn: True
dropout: 0.1
num_states: &num_states 1
tra_config: &tra_config
num_states: *num_states
hidden_size: 16
tau: 1.0
src_info: LR_TPE
task:
model:
class: TRAModel
module_path: src/model.py
kwargs:
lr: 0.0002
n_epochs: 500
max_steps_per_epoch: 100
early_stop: 20
seed: 1000
logdir: output/test/alstm
model_type: LSTM
model_config: *model_config
tra_config: *tra_config
lamb: 1.0
rho: 0.99
freeze_model: False
model_init_state:
dataset:
class: MTSDatasetH
module_path: src/dataset.py
kwargs:
handler:
class: DataHandler
module_path: qlib.data.dataset.handler
kwargs:
data_loader: *data_loader_config
segments:
train: [2007-10-30, 2016-05-27]
valid: [2016-09-26, 2018-05-29]
test: [2018-09-21, 2020-06-30]
seq_len: 60
horizon: 21
num_states: *num_states
batch_size: 1024

View File

@@ -1,63 +0,0 @@
qlib_init:
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
data_loader_config: &data_loader_config
class: StaticDataLoader
module_path: qlib.data.dataset.loader
kwargs:
config:
feature: data/feature.pkl
label: data/label.pkl
model_config: &model_config
input_size: 16
hidden_size: 256
num_layers: 2
num_heads: 2
use_attn: True
dropout: 0.1
num_states: &num_states 10
tra_config: &tra_config
num_states: *num_states
hidden_size: 16
tau: 1.0
src_info: LR_TPE
task:
model:
class: TRAModel
module_path: src/model.py
kwargs:
lr: 0.0001
n_epochs: 500
max_steps_per_epoch: 100
early_stop: 20
seed: 1000
logdir: output/test/alstm_tra
model_type: LSTM
model_config: *model_config
tra_config: *tra_config
lamb: 2.0
rho: 0.99
freeze_model: True
model_init_state: output/test/alstm_tra_init/model.bin
dataset:
class: MTSDatasetH
module_path: src/dataset.py
kwargs:
handler:
class: DataHandler
module_path: qlib.data.dataset.handler
kwargs:
data_loader: *data_loader_config
segments:
train: [2007-10-30, 2016-05-27]
valid: [2016-09-26, 2018-05-29]
test: [2018-09-21, 2020-06-30]
seq_len: 60
horizon: 21
num_states: *num_states
batch_size: 1024

View File

@@ -1,63 +0,0 @@
qlib_init:
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
data_loader_config: &data_loader_config
class: StaticDataLoader
module_path: qlib.data.dataset.loader
kwargs:
config:
feature: data/feature.pkl
label: data/label.pkl
model_config: &model_config
input_size: 16
hidden_size: 256
num_layers: 2
num_heads: 2
use_attn: True
dropout: 0.1
num_states: &num_states 3
tra_config: &tra_config
num_states: *num_states
hidden_size: 16
tau: 1.0
src_info: LR_TPE
task:
model:
class: TRAModel
module_path: src/model.py
kwargs:
lr: 0.0002
n_epochs: 500
max_steps_per_epoch: 100
early_stop: 20
seed: 1000
logdir: output/test/alstm_tra_init
model_type: LSTM
model_config: *model_config
tra_config: *tra_config
lamb: 1.0
rho: 0.99
freeze_model: False
model_init_state:
dataset:
class: MTSDatasetH
module_path: src/dataset.py
kwargs:
handler:
class: DataHandler
module_path: qlib.data.dataset.handler
kwargs:
data_loader: *data_loader_config
segments:
train: [2007-10-30, 2016-05-27]
valid: [2016-09-26, 2018-05-29]
test: [2018-09-21, 2020-06-30]
seq_len: 60
horizon: 21
num_states: *num_states
batch_size: 512

View File

@@ -1,63 +0,0 @@
qlib_init:
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
data_loader_config: &data_loader_config
class: StaticDataLoader
module_path: qlib.data.dataset.loader
kwargs:
config:
feature: data/feature.pkl
label: data/label.pkl
model_config: &model_config
input_size: 16
hidden_size: 64
num_layers: 2
num_heads: 4
use_attn: False
dropout: 0.1
num_states: &num_states 1
tra_config: &tra_config
num_states: *num_states
hidden_size: 16
tau: 1.0
src_info: LR_TPE
task:
model:
class: TRAModel
module_path: src/model.py
kwargs:
lr: 0.0002
n_epochs: 500
max_steps_per_epoch: 100
early_stop: 20
seed: 1000
logdir: output/test/transformer
model_type: Transformer
model_config: *model_config
tra_config: *tra_config
lamb: 1.0
rho: 0.99
freeze_model: False
model_init_state:
dataset:
class: MTSDatasetH
module_path: src/dataset.py
kwargs:
handler:
class: DataHandler
module_path: qlib.data.dataset.handler
kwargs:
data_loader: *data_loader_config
segments:
train: [2007-10-30, 2016-05-27]
valid: [2016-09-26, 2018-05-29]
test: [2018-09-21, 2020-06-30]
seq_len: 60
horizon: 21
num_states: *num_states
batch_size: 1024

View File

@@ -1,63 +0,0 @@
qlib_init:
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
data_loader_config: &data_loader_config
class: StaticDataLoader
module_path: qlib.data.dataset.loader
kwargs:
config:
feature: data/feature.pkl
label: data/label.pkl
model_config: &model_config
input_size: 16
hidden_size: 64
num_layers: 2
num_heads: 4
use_attn: False
dropout: 0.1
num_states: &num_states 3
tra_config: &tra_config
num_states: *num_states
hidden_size: 16
tau: 1.0
src_info: LR_TPE
task:
model:
class: TRAModel
module_path: src/model.py
kwargs:
lr: 0.0005
n_epochs: 500
max_steps_per_epoch: 100
early_stop: 20
seed: 1000
logdir: output/test/transformer_tra
model_type: Transformer
model_config: *model_config
tra_config: *tra_config
lamb: 1.0
rho: 0.99
freeze_model: True
model_init_state: output/test/transformer_tra_init/model.bin
dataset:
class: MTSDatasetH
module_path: src/dataset.py
kwargs:
handler:
class: DataHandler
module_path: qlib.data.dataset.handler
kwargs:
data_loader: *data_loader_config
segments:
train: [2007-10-30, 2016-05-27]
valid: [2016-09-26, 2018-05-29]
test: [2018-09-21, 2020-06-30]
seq_len: 60
horizon: 21
num_states: *num_states
batch_size: 512

View File

@@ -1,63 +0,0 @@
qlib_init:
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
data_loader_config: &data_loader_config
class: StaticDataLoader
module_path: qlib.data.dataset.loader
kwargs:
config:
feature: data/feature.pkl
label: data/label.pkl
model_config: &model_config
input_size: 16
hidden_size: 64
num_layers: 2
num_heads: 4
use_attn: False
dropout: 0.1
num_states: &num_states 3
tra_config: &tra_config
num_states: *num_states
hidden_size: 16
tau: 1.0
src_info: LR_TPE
task:
model:
class: TRAModel
module_path: src/model.py
kwargs:
lr: 0.0002
n_epochs: 500
max_steps_per_epoch: 100
early_stop: 20
seed: 1000
logdir: output/test/transformer_tra_init
model_type: Transformer
model_config: *model_config
tra_config: *tra_config
lamb: 1.0
rho: 0.99
freeze_model: False
model_init_state:
dataset:
class: MTSDatasetH
module_path: src/dataset.py
kwargs:
handler:
class: DataHandler
module_path: qlib.data.dataset.handler
kwargs:
data_loader: *data_loader_config
segments:
train: [2007-10-30, 2016-05-27]
valid: [2016-09-26, 2018-05-29]
test: [2018-09-21, 2020-06-30]
seq_len: 60
horizon: 21
num_states: *num_states
batch_size: 512

View File

@@ -1 +0,0 @@
Data Link: https://drive.google.com/drive/folders/1fMqZYSeLyrHiWmVzygeI4sw3vp5Gt8cY?usp=sharing

View File

@@ -1,39 +0,0 @@
import argparse
import qlib
import ruamel.yaml as yaml
from qlib.utils import init_instance_by_config
def main(seed, config_file="configs/config_alstm.yaml"):
# set random seed
with open(config_file) as f:
config = yaml.safe_load(f)
# seed_suffix = "/seed1000" if "init" in config_file else f"/seed{seed}"
seed_suffix = ""
config["task"]["model"]["kwargs"].update(
{"seed": seed, "logdir": config["task"]["model"]["kwargs"]["logdir"] + seed_suffix}
)
# initialize workflow
qlib.init(
provider_uri=config["qlib_init"]["provider_uri"],
region=config["qlib_init"]["region"],
)
dataset = init_instance_by_config(config["task"]["dataset"])
model = init_instance_by_config(config["task"]["model"])
# train model
model.fit(dataset)
if __name__ == "__main__":
# set params from cmd
parser = argparse.ArgumentParser(allow_abbrev=False)
parser.add_argument("--seed", type=int, default=1000, help="random seed")
parser.add_argument("--config_file", type=str, default="configs/config_alstm.yaml", help="config file")
args = parser.parse_args()
main(**vars(args))

View File

@@ -1,5 +0,0 @@
pandas==1.1.2
numpy==1.17.4
scikit_learn==0.23.2
torch==1.7.0
seaborn

View File

@@ -1,29 +0,0 @@
#!/bin/bash
# we used random seed(1 1000 2000 3000 4000 5000) in our experiments
# Directly run from Qlib command `qrun`
qrun configs/config_alstm.yaml
qrun configs/config_transformer.yaml
qrun configs/config_transformer_tra_init.yaml
qrun configs/config_transformer_tra.yaml
qrun configs/config_alstm_tra_init.yaml
qrun configs/config_alstm_tra.yaml
# Or setting different parameters with example.py
python example.py --config_file configs/config_alstm.yaml
python example.py --config_file configs/config_transformer.yaml
python example.py --config_file configs/config_transformer_tra_init.yaml
python example.py --config_file configs/config_transformer_tra.yaml
python example.py --config_file configs/config_alstm_tra_init.yaml
python example.py --config_file configs/config_alstm_tra.yaml

View File

@@ -1,253 +0,0 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import copy
import torch
import numpy as np
import pandas as pd
from qlib.utils import init_instance_by_config
from qlib.data.dataset import DatasetH, DataHandler
device = "cuda" if torch.cuda.is_available() else "cpu"
def _to_tensor(x):
if not isinstance(x, torch.Tensor):
return torch.tensor(x, dtype=torch.float, device=device)
return x
def _create_ts_slices(index, seq_len):
"""
create time series slices from pandas index
Args:
index (pd.MultiIndex): pandas multiindex with <instrument, datetime> order
seq_len (int): sequence length
"""
assert index.is_lexsorted(), "index should be sorted"
# number of dates for each code
sample_count_by_codes = pd.Series(0, index=index).groupby(level=0).size().values
# start_index for each code
start_index_of_codes = np.roll(np.cumsum(sample_count_by_codes), 1)
start_index_of_codes[0] = 0
# all the [start, stop) indices of features
# features btw [start, stop) are used to predict the `stop - 1` label
slices = []
for cur_loc, cur_cnt in zip(start_index_of_codes, sample_count_by_codes):
for stop in range(1, cur_cnt + 1):
end = cur_loc + stop
start = max(end - seq_len, 0)
slices.append(slice(start, end))
slices = np.array(slices)
return slices
def _get_date_parse_fn(target):
"""get date parse function
This method is used to parse date arguments as target type.
Example:
get_date_parse_fn('20120101')('2017-01-01') => '20170101'
get_date_parse_fn(20120101)('2017-01-01') => 20170101
"""
if isinstance(target, pd.Timestamp):
_fn = lambda x: pd.Timestamp(x) # Timestamp('2020-01-01')
elif isinstance(target, str) and len(target) == 8:
_fn = lambda x: str(x).replace("-", "")[:8] # '20200201'
elif isinstance(target, int):
_fn = lambda x: int(str(x).replace("-", "")[:8]) # 20200201
else:
_fn = lambda x: x
return _fn
class MTSDatasetH(DatasetH):
"""Memory Augmented Time Series Dataset
Args:
handler (DataHandler): data handler
segments (dict): data split segments
seq_len (int): time series sequence length
horizon (int): label horizon (to mask historical loss for TRA)
num_states (int): how many memory states to be added (for TRA)
batch_size (int): batch size (<0 means daily batch)
shuffle (bool): whether shuffle data
pin_memory (bool): whether pin data to gpu memory
drop_last (bool): whether drop last batch < batch_size
"""
def __init__(
self,
handler,
segments,
seq_len=60,
horizon=0,
num_states=1,
batch_size=-1,
shuffle=True,
pin_memory=False,
drop_last=False,
**kwargs
):
assert horizon > 0, "please specify `horizon` to avoid data leakage"
self.seq_len = seq_len
self.horizon = horizon
self.num_states = num_states
self.batch_size = batch_size
self.shuffle = shuffle
self.drop_last = drop_last
self.pin_memory = pin_memory
self.params = (batch_size, drop_last, shuffle) # for train/eval switch
super().__init__(handler, segments, **kwargs)
def setup_data(self, handler_kwargs: dict = None, **kwargs):
super().setup_data()
# change index to <code, date>
# NOTE: we will use inplace sort to reduce memory use
df = self.handler._data
df.index = df.index.swaplevel()
df.sort_index(inplace=True)
self._data = df["feature"].values.astype("float32")
self._label = df["label"].squeeze().astype("float32")
self._index = df.index
# add memory to feature
self._data = np.c_[self._data, np.zeros((len(self._data), self.num_states), dtype=np.float32)]
# padding tensor
self.zeros = np.zeros((self.seq_len, self._data.shape[1]), dtype=np.float32)
# pin memory
if self.pin_memory:
self._data = _to_tensor(self._data)
self._label = _to_tensor(self._label)
self.zeros = _to_tensor(self.zeros)
# create batch slices
self.batch_slices = _create_ts_slices(self._index, self.seq_len)
# create daily slices
index = [slc.stop - 1 for slc in self.batch_slices]
act_index = self.restore_index(index)
daily_slices = {date: [] for date in sorted(act_index.unique(level=1))}
for i, (code, date) in enumerate(act_index):
daily_slices[date].append(self.batch_slices[i])
self.daily_slices = list(daily_slices.values())
def _prepare_seg(self, slc, **kwargs):
fn = _get_date_parse_fn(self._index[0][1])
start_date = fn(slc.start)
end_date = fn(slc.stop)
obj = copy.copy(self) # shallow copy
# NOTE: Seriable will disable copy `self._data` so we manually assign them here
obj._data = self._data
obj._label = self._label
obj._index = self._index
new_batch_slices = []
for batch_slc in self.batch_slices:
date = self._index[batch_slc.stop - 1][1]
if start_date <= date <= end_date:
new_batch_slices.append(batch_slc)
obj.batch_slices = np.array(new_batch_slices)
new_daily_slices = []
for daily_slc in self.daily_slices:
date = self._index[daily_slc[0].stop - 1][1]
if start_date <= date <= end_date:
new_daily_slices.append(daily_slc)
obj.daily_slices = new_daily_slices
return obj
def restore_index(self, index):
if isinstance(index, torch.Tensor):
index = index.cpu().numpy()
return self._index[index]
def assign_data(self, index, vals):
if isinstance(self._data, torch.Tensor):
vals = _to_tensor(vals)
elif isinstance(vals, torch.Tensor):
vals = vals.detach().cpu().numpy()
index = index.detach().cpu().numpy()
self._data[index, -self.num_states :] = vals
def clear_memory(self):
self._data[:, -self.num_states :] = 0
# TODO: better train/eval mode design
def train(self):
"""enable traning mode"""
self.batch_size, self.drop_last, self.shuffle = self.params
def eval(self):
"""enable evaluation mode"""
self.batch_size = -1
self.drop_last = False
self.shuffle = False
def _get_slices(self):
if self.batch_size < 0:
slices = self.daily_slices.copy()
batch_size = -1 * self.batch_size
else:
slices = self.batch_slices.copy()
batch_size = self.batch_size
return slices, batch_size
def __len__(self):
slices, batch_size = self._get_slices()
if self.drop_last:
return len(slices) // batch_size
return (len(slices) + batch_size - 1) // batch_size
def __iter__(self):
slices, batch_size = self._get_slices()
if self.shuffle:
np.random.shuffle(slices)
for i in range(len(slices))[::batch_size]:
if self.drop_last and i + batch_size > len(slices):
break
# get slices for this batch
slices_subset = slices[i : i + batch_size]
if self.batch_size < 0:
slices_subset = np.concatenate(slices_subset)
# collect data
data = []
label = []
index = []
for slc in slices_subset:
_data = self._data[slc].clone() if self.pin_memory else self._data[slc].copy()
if len(_data) != self.seq_len:
if self.pin_memory:
_data = torch.cat([self.zeros[: self.seq_len - len(_data)], _data], axis=0)
else:
_data = np.concatenate([self.zeros[: self.seq_len - len(_data)], _data], axis=0)
if self.num_states > 0:
_data[-self.horizon :, -self.num_states :] = 0
data.append(_data)
label.append(self._label[slc.stop - 1])
index.append(slc.stop - 1)
# concate
index = torch.tensor(index, device=device)
if isinstance(data[0], torch.Tensor):
data = torch.stack(data)
label = torch.stack(label)
else:
data = _to_tensor(np.stack(data))
label = _to_tensor(np.stack(label))
# yield -> generator
yield {"data": data, "label": label, "index": index}

View File

@@ -1,603 +0,0 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import os
import copy
import math
import json
import collections
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from tqdm import tqdm
from qlib.utils import get_or_create_path
from qlib.log import get_module_logger
from qlib.model.base import Model
device = "cuda" if torch.cuda.is_available() else "cpu"
class TRAModel(Model):
def __init__(
self,
model_config,
tra_config,
model_type="LSTM",
lr=1e-3,
n_epochs=500,
early_stop=50,
smooth_steps=5,
max_steps_per_epoch=None,
freeze_model=False,
model_init_state=None,
lamb=0.0,
rho=0.99,
seed=None,
logdir=None,
eval_train=True,
eval_test=False,
avg_params=True,
**kwargs,
):
np.random.seed(seed)
torch.manual_seed(seed)
self.logger = get_module_logger("TRA")
self.logger.info("TRA Model...")
self.model = eval(model_type)(**model_config).to(device)
if model_init_state:
self.model.load_state_dict(torch.load(model_init_state, map_location="cpu")["model"])
if freeze_model:
for param in self.model.parameters():
param.requires_grad_(False)
else:
self.logger.info("# model params: %d" % sum([p.numel() for p in self.model.parameters()]))
self.tra = TRA(self.model.output_size, **tra_config).to(device)
self.logger.info("# tra params: %d" % sum([p.numel() for p in self.tra.parameters()]))
self.optimizer = optim.Adam(list(self.model.parameters()) + list(self.tra.parameters()), lr=lr)
self.model_config = model_config
self.tra_config = tra_config
self.lr = lr
self.n_epochs = n_epochs
self.early_stop = early_stop
self.smooth_steps = smooth_steps
self.max_steps_per_epoch = max_steps_per_epoch
self.lamb = lamb
self.rho = rho
self.seed = seed
self.logdir = logdir
self.eval_train = eval_train
self.eval_test = eval_test
self.avg_params = avg_params
if self.tra.num_states > 1 and not self.eval_train:
self.logger.warn("`eval_train` will be ignored when using TRA")
if self.logdir is not None:
if os.path.exists(self.logdir):
self.logger.warn(f"logdir {self.logdir} is not empty")
os.makedirs(self.logdir, exist_ok=True)
self.fitted = False
self.global_step = -1
def train_epoch(self, data_set):
self.model.train()
self.tra.train()
data_set.train()
max_steps = self.n_epochs
if self.max_steps_per_epoch is not None:
max_steps = min(self.max_steps_per_epoch, self.n_epochs)
count = 0
total_loss = 0
total_count = 0
for batch in tqdm(data_set, total=max_steps):
count += 1
if count > max_steps:
break
self.global_step += 1
data, label, index = batch["data"], batch["label"], batch["index"]
feature = data[:, :, : -self.tra.num_states]
hist_loss = data[:, : -data_set.horizon, -self.tra.num_states :]
hidden = self.model(feature)
pred, all_preds, prob = self.tra(hidden, hist_loss)
loss = (pred - label).pow(2).mean()
L = (all_preds.detach() - label[:, None]).pow(2)
L -= L.min(dim=-1, keepdim=True).values # normalize & ensure postive input
data_set.assign_data(index, L) # save loss to memory
if prob is not None:
P = sinkhorn(-L, epsilon=0.01) # sample assignment matrix
lamb = self.lamb * (self.rho ** self.global_step)
reg = prob.log().mul(P).sum(dim=-1).mean()
loss = loss - lamb * reg
loss.backward()
self.optimizer.step()
self.optimizer.zero_grad()
total_loss += loss.item()
total_count += len(pred)
total_loss /= total_count
return total_loss
def test_epoch(self, data_set, return_pred=False):
self.model.eval()
self.tra.eval()
data_set.eval()
preds = []
metrics = []
for batch in tqdm(data_set):
data, label, index = batch["data"], batch["label"], batch["index"]
feature = data[:, :, : -self.tra.num_states]
hist_loss = data[:, : -data_set.horizon, -self.tra.num_states :]
with torch.no_grad():
hidden = self.model(feature)
pred, all_preds, prob = self.tra(hidden, hist_loss)
L = (all_preds - label[:, None]).pow(2)
L -= L.min(dim=-1, keepdim=True).values # normalize & ensure postive input
data_set.assign_data(index, L) # save loss to memory
X = np.c_[
pred.cpu().numpy(),
label.cpu().numpy(),
]
columns = ["score", "label"]
if prob is not None:
X = np.c_[X, all_preds.cpu().numpy(), prob.cpu().numpy()]
columns += ["score_%d" % d for d in range(all_preds.shape[1])] + [
"prob_%d" % d for d in range(all_preds.shape[1])
]
pred = pd.DataFrame(X, index=index.cpu().numpy(), columns=columns)
metrics.append(evaluate(pred))
if return_pred:
preds.append(pred)
metrics = pd.DataFrame(metrics)
metrics = {
"MSE": metrics.MSE.mean(),
"MAE": metrics.MAE.mean(),
"IC": metrics.IC.mean(),
"ICIR": metrics.IC.mean() / metrics.IC.std(),
}
if return_pred:
preds = pd.concat(preds, axis=0)
preds.index = data_set.restore_index(preds.index)
preds.index = preds.index.swaplevel()
preds.sort_index(inplace=True)
return metrics, preds
def fit(self, dataset, evals_result=dict()):
train_set, valid_set, test_set = dataset.prepare(["train", "valid", "test"])
best_score = -1
best_epoch = 0
stop_rounds = 0
best_params = {
"model": copy.deepcopy(self.model.state_dict()),
"tra": copy.deepcopy(self.tra.state_dict()),
}
params_list = {
"model": collections.deque(maxlen=self.smooth_steps),
"tra": collections.deque(maxlen=self.smooth_steps),
}
evals_result["train"] = []
evals_result["valid"] = []
evals_result["test"] = []
# train
self.fitted = True
self.global_step = -1
if self.tra.num_states > 1:
self.logger.info("init memory...")
self.test_epoch(train_set)
for epoch in range(self.n_epochs):
self.logger.info("Epoch %d:", epoch)
self.logger.info("training...")
self.train_epoch(train_set)
self.logger.info("evaluating...")
# average params for inference
params_list["model"].append(copy.deepcopy(self.model.state_dict()))
params_list["tra"].append(copy.deepcopy(self.tra.state_dict()))
self.model.load_state_dict(average_params(params_list["model"]))
self.tra.load_state_dict(average_params(params_list["tra"]))
# NOTE: during evaluating, the whole memory will be refreshed
if self.tra.num_states > 1 or self.eval_train:
train_set.clear_memory() # NOTE: clear the shared memory
train_metrics = self.test_epoch(train_set)[0]
evals_result["train"].append(train_metrics)
self.logger.info("\ttrain metrics: %s" % train_metrics)
valid_metrics = self.test_epoch(valid_set)[0]
evals_result["valid"].append(valid_metrics)
self.logger.info("\tvalid metrics: %s" % valid_metrics)
if self.eval_test:
test_metrics = self.test_epoch(test_set)[0]
evals_result["test"].append(test_metrics)
self.logger.info("\ttest metrics: %s" % test_metrics)
if valid_metrics["IC"] > best_score:
best_score = valid_metrics["IC"]
stop_rounds = 0
best_epoch = epoch
best_params = {
"model": copy.deepcopy(self.model.state_dict()),
"tra": copy.deepcopy(self.tra.state_dict()),
}
else:
stop_rounds += 1
if stop_rounds >= self.early_stop:
self.logger.info("early stop @ %s" % epoch)
break
# restore parameters
self.model.load_state_dict(params_list["model"][-1])
self.tra.load_state_dict(params_list["tra"][-1])
self.logger.info("best score: %.6lf @ %d" % (best_score, best_epoch))
self.model.load_state_dict(best_params["model"])
self.tra.load_state_dict(best_params["tra"])
metrics, preds = self.test_epoch(test_set, return_pred=True)
self.logger.info("test metrics: %s" % metrics)
if self.logdir:
self.logger.info("save model & pred to local directory")
pd.concat({name: pd.DataFrame(evals_result[name]) for name in evals_result}, axis=1).to_csv(
self.logdir + "/logs.csv", index=False
)
torch.save(best_params, self.logdir + "/model.bin")
preds.to_pickle(self.logdir + "/pred.pkl")
info = {
"config": {
"model_config": self.model_config,
"tra_config": self.tra_config,
"lr": self.lr,
"n_epochs": self.n_epochs,
"early_stop": self.early_stop,
"smooth_steps": self.smooth_steps,
"max_steps_per_epoch": self.max_steps_per_epoch,
"lamb": self.lamb,
"rho": self.rho,
"seed": self.seed,
"logdir": self.logdir,
},
"best_eval_metric": -best_score, # NOTE: minux -1 for minimize
"metric": metrics,
}
with open(self.logdir + "/info.json", "w") as f:
json.dump(info, f)
def predict(self, dataset, segment="test"):
if not self.fitted:
raise ValueError("model is not fitted yet!")
test_set = dataset.prepare(segment)
metrics, preds = self.test_epoch(test_set, return_pred=True)
self.logger.info("test metrics: %s" % metrics)
return preds
class LSTM(nn.Module):
"""LSTM Model
Args:
input_size (int): input size (# features)
hidden_size (int): hidden size
num_layers (int): number of hidden layers
use_attn (bool): whether use attention layer.
we use concat attention as https://github.com/fulifeng/Adv-ALSTM/
dropout (float): dropout rate
input_drop (float): input dropout for data augmentation
noise_level (float): add gaussian noise to input for data augmentation
"""
def __init__(
self,
input_size=16,
hidden_size=64,
num_layers=2,
use_attn=True,
dropout=0.0,
input_drop=0.0,
noise_level=0.0,
*args,
**kwargs,
):
super().__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.num_layers = num_layers
self.use_attn = use_attn
self.noise_level = noise_level
self.input_drop = nn.Dropout(input_drop)
self.rnn = nn.LSTM(
input_size=input_size,
hidden_size=hidden_size,
num_layers=num_layers,
batch_first=True,
dropout=dropout,
)
if self.use_attn:
self.W = nn.Linear(hidden_size, hidden_size)
self.u = nn.Linear(hidden_size, 1, bias=False)
self.output_size = hidden_size * 2
else:
self.output_size = hidden_size
def forward(self, x):
x = self.input_drop(x)
if self.training and self.noise_level > 0:
noise = torch.randn_like(x).to(x)
x = x + noise * self.noise_level
rnn_out, _ = self.rnn(x)
last_out = rnn_out[:, -1]
if self.use_attn:
laten = self.W(rnn_out).tanh()
scores = self.u(laten).softmax(dim=1)
att_out = (rnn_out * scores).sum(dim=1).squeeze()
last_out = torch.cat([last_out, att_out], dim=1)
return last_out
class PositionalEncoding(nn.Module):
# reference: https://pytorch.org/tutorials/beginner/transformer_tutorial.html
def __init__(self, d_model, dropout=0.1, max_len=5000):
super(PositionalEncoding, self).__init__()
self.dropout = nn.Dropout(p=dropout)
pe = torch.zeros(max_len, d_model)
position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0).transpose(0, 1)
self.register_buffer("pe", pe)
def forward(self, x):
x = x + self.pe[: x.size(0), :]
return self.dropout(x)
class Transformer(nn.Module):
"""Transformer Model
Args:
input_size (int): input size (# features)
hidden_size (int): hidden size
num_layers (int): number of transformer layers
num_heads (int): number of heads in transformer
dropout (float): dropout rate
input_drop (float): input dropout for data augmentation
noise_level (float): add gaussian noise to input for data augmentation
"""
def __init__(
self,
input_size=16,
hidden_size=64,
num_layers=2,
num_heads=2,
dropout=0.0,
input_drop=0.0,
noise_level=0.0,
**kwargs,
):
super().__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.num_layers = num_layers
self.num_heads = num_heads
self.noise_level = noise_level
self.input_drop = nn.Dropout(input_drop)
self.input_proj = nn.Linear(input_size, hidden_size)
self.pe = PositionalEncoding(input_size, dropout)
layer = nn.TransformerEncoderLayer(
nhead=num_heads, dropout=dropout, d_model=hidden_size, dim_feedforward=hidden_size * 4
)
self.encoder = nn.TransformerEncoder(layer, num_layers=num_layers)
self.output_size = hidden_size
def forward(self, x):
x = self.input_drop(x)
if self.training and self.noise_level > 0:
noise = torch.randn_like(x).to(x)
x = x + noise * self.noise_level
x = x.permute(1, 0, 2).contiguous() # the first dim need to be sequence
x = self.pe(x)
x = self.input_proj(x)
out = self.encoder(x)
return out[-1]
class TRA(nn.Module):
"""Temporal Routing Adaptor (TRA)
TRA takes historical prediction erros & latent representation as inputs,
then routes the input sample to a specific predictor for training & inference.
Args:
input_size (int): input size (RNN/Transformer's hidden size)
num_states (int): number of latent states (i.e., trading patterns)
If `num_states=1`, then TRA falls back to traditional methods
hidden_size (int): hidden size of the router
tau (float): gumbel softmax temperature
"""
def __init__(self, input_size, num_states=1, hidden_size=8, tau=1.0, src_info="LR_TPE"):
super().__init__()
self.num_states = num_states
self.tau = tau
self.src_info = src_info
if num_states > 1:
self.router = nn.LSTM(
input_size=num_states,
hidden_size=hidden_size,
num_layers=1,
batch_first=True,
)
self.fc = nn.Linear(hidden_size + input_size, num_states)
self.predictors = nn.Linear(input_size, num_states)
def forward(self, hidden, hist_loss):
preds = self.predictors(hidden)
if self.num_states == 1:
return preds.squeeze(-1), preds, None
# information type
router_out, _ = self.router(hist_loss)
if "LR" in self.src_info:
latent_representation = hidden
else:
latent_representation = torch.randn(hidden.shape).to(hidden)
if "TPE" in self.src_info:
temporal_pred_error = router_out[:, -1]
else:
temporal_pred_error = torch.randn(router_out[:, -1].shape).to(hidden)
out = self.fc(torch.cat([temporal_pred_error, latent_representation], dim=-1))
prob = F.gumbel_softmax(out, dim=-1, tau=self.tau, hard=False)
if self.training:
final_pred = (preds * prob).sum(dim=-1)
else:
final_pred = preds[range(len(preds)), prob.argmax(dim=-1)]
return final_pred, preds, prob
def evaluate(pred):
pred = pred.rank(pct=True) # transform into percentiles
score = pred.score
label = pred.label
diff = score - label
MSE = (diff ** 2).mean()
MAE = (diff.abs()).mean()
IC = score.corr(label)
return {"MSE": MSE, "MAE": MAE, "IC": IC}
def average_params(params_list):
assert isinstance(params_list, (tuple, list, collections.deque))
n = len(params_list)
if n == 1:
return params_list[0]
new_params = collections.OrderedDict()
keys = None
for i, params in enumerate(params_list):
if keys is None:
keys = params.keys()
for k, v in params.items():
if k not in keys:
raise ValueError("the %d-th model has different params" % i)
if k not in new_params:
new_params[k] = v / n
else:
new_params[k] += v / n
return new_params
def shoot_infs(inp_tensor):
"""Replaces inf by maximum of tensor"""
mask_inf = torch.isinf(inp_tensor)
ind_inf = torch.nonzero(mask_inf, as_tuple=False)
if len(ind_inf) > 0:
for ind in ind_inf:
if len(ind) == 2:
inp_tensor[ind[0], ind[1]] = 0
elif len(ind) == 1:
inp_tensor[ind[0]] = 0
m = torch.max(inp_tensor)
for ind in ind_inf:
if len(ind) == 2:
inp_tensor[ind[0], ind[1]] = m
elif len(ind) == 1:
inp_tensor[ind[0]] = m
return inp_tensor
def sinkhorn(Q, n_iters=3, epsilon=0.01):
# epsilon should be adjusted according to logits value's scale
with torch.no_grad():
Q = shoot_infs(Q)
Q = torch.exp(Q / epsilon)
for i in range(n_iters):
Q /= Q.sum(dim=0, keepdim=True)
Q /= Q.sum(dim=1, keepdim=True)
return Q

View File

@@ -1,136 +0,0 @@
qlib_init:
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
market: &market csi300
benchmark: &benchmark SH000300
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
infer_processors:
- class: FilterCol
kwargs:
fields_group: feature
col_list: ["RESI5", "WVMA5", "RSQR5", "KLEN", "RSQR10", "CORR5", "CORD5", "CORR10",
"ROC60", "RESI10", "VSTD5", "RSQR60", "CORR60", "WVMA60", "STD5",
"RSQR20", "CORD60", "CORD10", "CORR20", "KLOW"]
- class: RobustZScoreNorm
kwargs:
fields_group: feature
clip_outlier: true
- class: Fillna
kwargs:
fields_group: feature
learn_processors:
- class: CSRankNorm
kwargs:
fields_group: label
label: ["Ref($close, -2) / Ref($close, -1) - 1"]
num_states: &num_states 3
memory_mode: &memory_mode sample
tra_config: &tra_config
num_states: *num_states
rnn_arch: LSTM
hidden_size: 32
num_layers: 1
dropout: 0.0
tau: 1.0
src_info: LR_TPE
model_config: &model_config
input_size: 20
hidden_size: 64
num_layers: 2
rnn_arch: LSTM
use_attn: True
dropout: 0.0
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: TRAModel
module_path: qlib.contrib.model.pytorch_tra
kwargs:
tra_config: *tra_config
model_config: *model_config
model_type: RNN
lr: 1e-3
n_epochs: 100
max_steps_per_epoch:
early_stop: 20
logdir: output/Alpha158
seed: 0
lamb: 1.0
rho: 0.99
alpha: 0.5
transport_method: router
memory_mode: *memory_mode
eval_train: False
eval_test: True
pretrain: True
init_state:
freeze_model: False
freeze_predictors: False
dataset:
class: MTSDatasetH
module_path: qlib.contrib.data.dataset
kwargs:
handler:
class: Alpha158
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
seq_len: 60
horizon: 2
input_size:
num_states: *num_states
batch_size: 1024
n_samples:
memory_mode: *memory_mode
drop_last: True
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
ana_long_short: False
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -1,130 +0,0 @@
qlib_init:
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
market: &market csi300
benchmark: &benchmark SH000300
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
infer_processors:
- class: RobustZScoreNorm
kwargs:
fields_group: feature
clip_outlier: true
- class: Fillna
kwargs:
fields_group: feature
learn_processors:
- class: CSRankNorm
kwargs:
fields_group: label
label: ["Ref($close, -2) / Ref($close, -1) - 1"]
num_states: &num_states 3
memory_mode: &memory_mode sample
tra_config: &tra_config
num_states: *num_states
rnn_arch: LSTM
hidden_size: 32
num_layers: 1
dropout: 0.0
tau: 1.0
src_info: LR_TPE
model_config: &model_config
input_size: 158
hidden_size: 256
num_layers: 2
rnn_arch: LSTM
use_attn: True
dropout: 0.2
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: TRAModel
module_path: qlib.contrib.model.pytorch_tra
kwargs:
tra_config: *tra_config
model_config: *model_config
model_type: RNN
lr: 1e-3
n_epochs: 100
max_steps_per_epoch:
early_stop: 20
logdir: output/Alpha158_full
seed: 0
lamb: 1.0
rho: 0.99
alpha: 0.5
transport_method: router
memory_mode: *memory_mode
eval_train: False
eval_test: True
pretrain: True
init_state:
freeze_model: False
freeze_predictors: False
dataset:
class: MTSDatasetH
module_path: qlib.contrib.data.dataset
kwargs:
handler:
class: Alpha158
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
seq_len: 60
horizon: 2
input_size:
num_states: *num_states
batch_size: 1024
n_samples:
memory_mode: *memory_mode
drop_last: True
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
ana_long_short: False
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -1,130 +0,0 @@
qlib_init:
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
market: &market csi300
benchmark: &benchmark SH000300
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
infer_processors:
- class: RobustZScoreNorm
kwargs:
fields_group: feature
clip_outlier: true
- class: Fillna
kwargs:
fields_group: feature
learn_processors:
- class: CSRankNorm
kwargs:
fields_group: label
label: ["Ref($close, -2) / Ref($close, -1) - 1"]
num_states: &num_states 3
memory_mode: &memory_mode sample
tra_config: &tra_config
num_states: *num_states
rnn_arch: LSTM
hidden_size: 32
num_layers: 1
dropout: 0.0
tau: 1.0
src_info: LR_TPE
model_config: &model_config
input_size: 6
hidden_size: 64
num_layers: 2
rnn_arch: LSTM
use_attn: True
dropout: 0.0
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: TRAModel
module_path: qlib.contrib.model.pytorch_tra
kwargs:
tra_config: *tra_config
model_config: *model_config
model_type: RNN
lr: 1e-3
n_epochs: 100
max_steps_per_epoch:
early_stop: 20
logdir: output/Alpha360
seed: 0
lamb: 1.0
rho: 0.99
alpha: 0.5
transport_method: router
memory_mode: *memory_mode
eval_train: False
eval_test: True
pretrain: True
init_state:
freeze_model: False
freeze_predictors: False
dataset:
class: MTSDatasetH
module_path: qlib.contrib.data.dataset
kwargs:
handler:
class: Alpha360
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
seq_len: 60
horizon: 2
input_size: 6
num_states: *num_states
batch_size: 1024
n_samples:
memory_mode: *memory_mode
drop_last: True
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
ana_long_short: False
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -26,24 +26,19 @@ data_handler_config: &data_handler_config
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
module_path: qlib.contrib.strategy.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: TabnetModel
@@ -51,7 +46,6 @@ task:
kwargs:
d_feat: 158
pretrain: True
seed: 993
dataset:
class: DatasetH
module_path: qlib.data.dataset
@@ -69,9 +63,7 @@ task:
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:

View File

@@ -26,24 +26,19 @@ data_handler_config: &data_handler_config
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
module_path: qlib.contrib.strategy.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: TabnetModel
@@ -51,7 +46,6 @@ task:
kwargs:
d_feat: 360
pretrain: True
seed: 993
dataset:
class: DatasetH
module_path: qlib.data.dataset
@@ -69,9 +63,7 @@ task:
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:

View File

@@ -1,3 +0,0 @@
numpy==1.17.4
pandas==1.1.2
torch==1.2.0

View File

@@ -1,89 +0,0 @@
qlib_init:
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
market: &market csi300
benchmark: &benchmark SH000300
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
infer_processors:
- class: FilterCol
kwargs:
fields_group: feature
col_list: ["RESI5", "WVMA5", "RSQR5", "KLEN", "RSQR10", "CORR5", "CORD5", "CORR10",
"ROC60", "RESI10", "VSTD5", "RSQR60", "CORR60", "WVMA60", "STD5",
"RSQR20", "CORD60", "CORD10", "CORR20", "KLOW"
]
- class: RobustZScoreNorm
kwargs:
fields_group: feature
clip_outlier: true
- class: Fillna
kwargs:
fields_group: feature
learn_processors:
- class: DropnaLabel
- class: CSRankNorm
kwargs:
fields_group: label
label: ["Ref($close, -2) / Ref($close, -1) - 1"]
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: TransformerModel
module_path: qlib.contrib.model.pytorch_transformer_ts
kwargs:
seed: 0
n_jobs: 20
dataset:
class: TSDatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: Alpha158
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
step_len: 20
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
ana_long_short: False
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -1,80 +0,0 @@
qlib_init:
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
market: &market csi300
benchmark: &benchmark SH000300
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
infer_processors:
- class: RobustZScoreNorm
kwargs:
fields_group: feature
clip_outlier: true
- class: Fillna
kwargs:
fields_group: feature
learn_processors:
- class: DropnaLabel
- class: CSRankNorm
kwargs:
fields_group: label
label: ["Ref($close, -2) / Ref($close, -1) - 1"]
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: TransformerModel
module_path: qlib.contrib.model.pytorch_transformer
kwargs:
d_feat: 6
seed: 0
dataset:
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: Alpha360
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
ana_long_short: False
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -12,24 +12,19 @@ data_handler_config: &data_handler_config
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
module_path: qlib.contrib.strategy.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: XGBModel
@@ -57,9 +52,7 @@ task:
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:

View File

@@ -19,24 +19,19 @@ data_handler_config: &data_handler_config
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
module_path: qlib.contrib.strategy.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: XGBModel
@@ -64,9 +59,7 @@ task:
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:

208
examples/data/monitor.py Normal file
View File

@@ -0,0 +1,208 @@
"""
This script is the demonstrating the implementation of Metric Extractor and Detector
NOTE: A lot of details is not considered in this script
- Corner case that will raise error( std == 0)
The following functions are used to demonstrate the following examples
· Metric Extractor:
case 1) Basic statistics on different slices of the DataFrame df:
1) The statistics include:
· STD, Mean, Skewnes, Kurtosis
2) The above statistics can be calculated on the following data slices:
· df.groupby(['datetime'])
· df.groupby(['datetime', 'industry' ])
3) The statistics could be calculated on the time dimension for each instruments and factor(the factor can be represented by experssion)
· <df implemented by expresion>.groupby(['instrument', 'factor'])
case 2) Advanced statistics on different slices of the DataFrame df:
1) Auto-correlation:
· Calculate corr(df.loc[t, :, :], df.loc[t-w, :, :]), w=1, 2, ….
2) Correlation between factors:
· For any pair of factors (i, j): calculate corr(df.loc[t, :, i], df.loc[t, :, j]). The result is a correlation matrix with each element corresponds to a correlation value between a pair of factors.
· Detector: detect the abnormality of the extracted metric;
a) Algorithms:
§ Basic checks: NaN.
§ Point anomaly detection.
§ Segment anomaly detection.
b) Scenarios:
§ Online anomaly detection: monitoring streaming data.
The usage of the detectors are demonstrated in the `case_1_*`and `case_2_*`
case 3): Examples to use MetricExt to monitor IC and rank IC
1) IC(Information Coefficient) #case_3_1
2) RankIC #case_3_2
"""
# AUTO download data
from typing import List, Union
from qlib.utils import exists_qlib_data
from qlib.tests.data import GetData
from qlib.config import REG_CN
provider_uri = "~/.qlib/qlib_data/cn_data" # target_dir
if not exists_qlib_data(provider_uri):
print(f"Qlib data is not found in {provider_uri}")
GetData().qlib_data(target_dir=provider_uri, region=REG_CN)
import qlib
import pandas as pd
from qlib.contrib.data.handler import Alpha158
from qlib.data.dataset.loader import QlibDataLoader
from qlib.data.monitor.metric import format_conv
from qlib.data.monitor.metric import MeanM, SkewM, KurtM, StdM, AutoCM, CorrM
from qlib.data.monitor.detector import NDDetector, SWNDD, ThresholdD
from qlib.data import D
import fire
UNIVERSE = "csi300"
START_TIME = "20200101"
# ------------------ a helper function to get data to demonstrate the functionality --------------------
def get_data_df(col_idx: Union[int, List[int]] = 0, verbose: bool = True):
"""
a helper function to get data to demonstrate the functionality.
Parameters
----------
col_idx : Union[int, List[int]]
column index of the metrics
"""
dh = Alpha158(instruments=UNIVERSE, infer_processors=[], learn_processors=[], start_time=START_TIME)
df = dh.fetch()
if verbose:
print(df.head())
# We don't have industries in dataframe, we generate the with fake data
industry = pd.Series(df.index.get_level_values("instrument").str.slice(stop=2).to_list(), index=df.index)
# select a factor
factor_df = format_conv(df.iloc[:, col_idx], industry=industry)
if verbose:
print(f"Selected metric: {df.columns[col_idx]}")
print(factor_df)
return factor_df
def get_target(horizon=5):
target = f"Ref($close, -{horizon + 1})/Ref($close, -1) - 1" # There are lots of targets: return is one of them
qdl = QlibDataLoader(config=([target], ["target"]))
df = qdl.load(instruments=UNIVERSE, start_time=START_TIME) # Aligning with factor will improve performance
df = format_conv(df["target"])
return df
# ----------------- Cases to demonstrate the usage of detector and examples ----------------------
def case_1_1():
factor_df = get_data_df()
# 1) Extract metrics
# 1.1) df.groupby(["datetime"])
mtrc = MeanM()
m_mean = mtrc.extract(factor_df)
print(m_mean)
ndd = NDDetector()
ndd.fit(m_mean) # use historical data to fit detector
check_res = ndd.check(m_mean)
print(check_res) # detecting on new data or historical data
print(check_res.value_counts())
def case_1_2():
factor_df = get_data_df()
# 1.2) df.groupby("datetime", "industry")
mtrc = MeanM(group=["industry"])
m_multi = mtrc.extract(factor_df)
print(m_multi)
for col_name, s in m_multi.iteritems():
print(col_name)
ndd = NDDetector()
ndd.fit(s) # use historical data to fit detector
check_res = ndd.check(s)
print(check_res) # detecting on new data or historical data
print(check_res.value_counts())
def case_1_3():
# case 1.3
# factor_df = get_data_df()
qdl = QlibDataLoader(config=(["$close/Ref($close, 1) - 1"], ["return"]))
df = qdl.load(instruments=["SH600519"], start_time=START_TIME)
df = format_conv(df)
s = df.iloc[:, 0]
print(s)
dtc = SWNDD(window=20)
dtc.fit(s) # fit use historical data (TODO: updating will be supported in the future)
check_res = dtc.check(s) #
print(check_res)
print(check_res.value_counts())
print(check_res[check_res])
def case_2_1():
# · Calculate corr(df.loc[t, :, :], df.loc[t-w, :, :]), w=1, 2, ….
factor_df = get_data_df()
acm = AutoCM()
mtrc = acm.extract(factor_df)
print(mtrc)
thd = ThresholdD(0.0, reverse=True)
check_res = thd.check(mtrc)
print(check_res)
print(check_res.value_counts())
def case_2_2():
factor_df1, factor_df2 = get_data_df(0), get_data_df(1)
cm = CorrM()
mtrc = cm.extract(factor_df1, factor_df2)
print(mtrc)
thd = ThresholdD(0.0, reverse=True)
check_res = thd.check(mtrc)
print(check_res)
print(check_res.value_counts())
def case_3_1_3_2():
target, factor = get_target(), get_data_df(0)
ic_m, rank_ic_m = CorrM(), CorrM(mode="spearman")
ic, rank_ic = ic_m.extract(factor, target), rank_ic_m.extract(factor, target)
print(pd.DataFrame({"ic": ic, "rank_ic": rank_ic}))
def run(test_list=["case_1_1", "case_1_2", "case_1_3", "case_2_1", "case_2_2", "case_3_1_3_2"]):
"""
run the specific tests
python monitor.py case_3_1_3_2
Parameters
----------
test_list : str[]
The tests to run
"""
if isinstance(test_list, str):
test_list = [test_list]
for fn in test_list:
globals()[fn]()
if __name__ == "__main__":
qlib.init()
fire.Fire(run)

Some files were not shown because too many files have changed in this diff Show More